Package net.yacy.document.parser.xml

Source Code of net.yacy.document.parser.xml.opensearchdescriptionReader$Item

// opensearchdescriptionReader.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 11.03.2008 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2011-04-12 07:02:36 +0200 (Di, 12. Apr 2011) $
// $LastChangedRevision: 7649 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

package net.yacy.document.parser.xml;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.ByteBuffer;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;


public class opensearchdescriptionReader extends DefaultHandler {
   
    // statics for item generation and automatic categorization
    static int guidcount = 0;
    //private static final String recordTag = "OpenSearchDescription";
    private static final String[] tagsDef = new String[]{
        "ShortName",
        "LongName",
        "Image",
        "Language",
        "OutputEncoding",
        "InputEncoding",
        "AdultContent",
        "Description",
        "Url",
        "Developer",
        "Query",
        "Tags",
        "Contact",
        "Attribution",
        "SyndicationRight"
        };
    /*
    <?xml version="1.0" encoding="UTF-8"?>
    <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
      <ShortName>YaCy/#[clientname]#</ShortName>
      <LongName>YaCy.net - #[SearchPageGreeting]#</LongName>
      <Image type="image/gif">http://#[thisaddress]#/env/grafics/yacy.gif</Image>
      <Language>en-us</Language>
      <OutputEncoding>UTF-8</OutputEncoding>
      <InputEncoding>UTF-8</InputEncoding>
      <AdultContent>true</AdultContent>
      <Description>YaCy is an open-source GPL-licensed software that can be used for stand-alone search engine installations or as a client for a multi-user P2P-based web indexing cluster. This is the access to peer '#[clientname]#'.</Description>
      <Url type="application/rss+xml" method="GET" template="http://#[thisaddress]#/yacysearch.rss?query={searchTerms}&amp;Enter=Search" />
      <Developer>See http://developer.berlios.de/projects/yacy/</Developer>
      <Query role="example" searchTerms="yacy" />
      <Tags>YaCy P2P Web Search</Tags>
      <Contact>See http://#[thisaddress]#/ViewProfile.html?hash=localhash</Contact>
      <Attribution>YaCy Software &amp;copy; 2004-2007 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
      <SyndicationRight>open</SyndicationRight>
    </OpenSearchDescription>
    */
   
    private static final HashSet<String> tags = new HashSet<String>();
    static {
        for (int i = 0; i < tagsDef.length; i++) {
            tags.add(tagsDef[i]);
        }
    }
   
    // class variables
    private Item channel;
    private final StringBuilder buffer;
    private boolean parsingChannel;
    private final String imageURL;
    private final ArrayList<String> itemsGUID; // a list of GUIDs, so the items can be retrieved by a specific order
    private final HashMap<String, Item> items; // a guid:Item map
   
   
    public opensearchdescriptionReader() {
        itemsGUID = new ArrayList<String>();
        items = new HashMap<String, Item>();
        buffer = new StringBuilder();
        channel = null;
        parsingChannel = false;
        imageURL = null;
    }
   
    public opensearchdescriptionReader(final String path) {
        this();
        try {
            final SAXParserFactory factory = SAXParserFactory.newInstance();
            final SAXParser saxParser = factory.newSAXParser();
            saxParser.parse(path, this);
        } catch (final Exception e) {
            Log.logException(e);
        }
    }
   
    public opensearchdescriptionReader(final InputStream stream) {
        this();
        try {
            final SAXParserFactory factory = SAXParserFactory.newInstance();
            final SAXParser saxParser = factory.newSAXParser();
            saxParser.parse(stream, this);
        } catch (final Exception e) {
            Log.logException(e);
        }
    }
   
    public static opensearchdescriptionReader parse(final byte[] a) {

        // check integrity of array
        if ((a == null) || (a.length == 0)) {
            Log.logWarning("opensearchdescriptionReader", "response=null");
            return null;
        }
        if (a.length < 100) {
            Log.logWarning("opensearchdescriptionReader", "response=" + UTF8.String(a));
            return null;
        }
        if (!ByteBuffer.equals(a, UTF8.getBytes("<?xml"))) {
            Log.logWarning("opensearchdescriptionReader", "response does not contain valid xml");
            return null;
        }
        final String end = UTF8.String(a, a.length - 10, 10);
        if (end.indexOf("rss") < 0) {
            Log.logWarning("opensearchdescriptionReader", "response incomplete");
            return null;
        }
       
        // make input stream
        final ByteArrayInputStream bais = new ByteArrayInputStream(a);
       
        // parse stream
        opensearchdescriptionReader reader = null;
        try {
            reader = new opensearchdescriptionReader(bais);
        } catch (final Exception e) {
            Log.logWarning("opensearchdescriptionReader", "parse exception: " + e);
            return null;
        }
        try { bais.close(); } catch (final IOException e) {}
        return reader;
    }

    @Override
    public void startElement(final String uri, final String name, final String tag, final Attributes atts) throws SAXException {
        if ("channel".equals(tag)) {
            channel = new Item();
            parsingChannel = true;
        }
    }

    @Override
    public void endElement(final String uri, final String name, final String tag) {
        if (tag == null) return;
        if ("channel".equals(tag)) {
            parsingChannel = false;
        } else if (parsingChannel) {
            final String value = buffer.toString().trim();
            buffer.setLength(0);
            if (tags.contains(tag)) channel.setValue(tag, value);
        }
    }

    @Override
    public void characters(final char ch[], final int start, final int length) {
        if (parsingChannel) {
            buffer.append(ch, start, length);
        }
    }

    public Item getChannel() {
        return channel;
    }

    public Item getItem(final int i) {
        // retrieve item by order number
        return getItem(itemsGUID.get(i));
    }

    public Item getItem(final String guid) {
        // retrieve item by guid
        return items.get(guid);
    }

    public int items() {
        return items.size();
    }
   
    public String getImage() {
        return this.imageURL;
    }
   
    public static class Item {
       
        private final HashMap<String, String> map;

        public Item() {
            this.map = new HashMap<String, String>();
            this.map.put("guid", Long.toHexString(System.currentTimeMillis()) + ":" + guidcount++);
        }
       
        public void setValue(final String name, final String value) {
            map.put(name, value);
        }
    }
}
TOP

Related Classes of net.yacy.document.parser.xml.opensearchdescriptionReader$Item

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.