Package net.yacy.document

Examples of net.yacy.document.Document


        String infoString = ii.info.toString();
        images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));
       
        if (title == null || title.length() == 0) title = MultiProtocolURI.unescape(location.getFileName());
       
        return new Document[]{new Document(
             location,
             mimeType,
             "UTF-8",
             this,
             languages,
View Full Code Here


                    if (bookmark == null) {
                        // try to get the bookmark from the LURL database
                        final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash));
                        if (urlentry != null) try {
                            final URIMetadataRow.Components metadata = urlentry.metadata();
                            final Document document = Document.mergeDocuments(metadata.url(), null, sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Long.MAX_VALUE));
                            prop.put("mode_edit", "0"); // create mode
                            prop.put("mode_url", metadata.url().toNormalform(false, true));
                            prop.putHTML("mode_title", metadata.dc_title());
                            prop.putHTML("mode_description", (document == null) ? metadata.dc_title(): document.dc_title());
                            prop.putHTML("mode_author", metadata.dc_creator());
                            prop.putHTML("mode_tags", (document == null) ? metadata.dc_subject() : document.dc_subject(','));
                            prop.putHTML("mode_path","");
                            prop.put("mode_public", "0");
                            prop.put("mode_feed", "0"); //TODO: check if it IS a feed
                        } catch (final IOException e) {Log.logException(e);} catch (final Parser.Failure e) {Log.logException(e);}
                    } else {
View Full Code Here

          Log.logException(e);
        }
          } else if (isAutoTagger || isMetadata || isURLdb || isCrawlStart) {
            try {
                  final YMarkMetadata meta = new YMarkMetadata(new DigestURI(post.get(ROOT).substring(2)), sb.indexSegments);
              final Document document = meta.loadDocument(sb.loader);
              final TreeMap<String, YMarkTag> tags = sb.tables.bookmarks.getTags(bmk_user);
              if(isAutoTagger)  {
                prop.put("folders_"+count+"_foldername","<small><b>meta-"+YMarkMetadata.METADATA.KEYWORDS.name().toLowerCase()+":</b> " + meta.loadMetadata().get(YMarkMetadata.METADATA.KEYWORDS) + "</small>");
                putProp(count, "meta");
                count++;
View Full Code Here

          url=YMarkTables.PROTOCOLS.HTTP.protocol(url);
      }

          try {
        final YMarkMetadata meta = new YMarkMetadata(new DigestURI(url), sb.indexSegments);
        final Document document = meta.loadDocument(sb.loader);
        final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();

        prop.putXML("title", metadata.get(YMarkMetadata.METADATA.TITLE));
        prop.putXML("desc", metadata.get(YMarkMetadata.METADATA.DESCRIPTION));
        prop.put("keywords", putTags(document.dc_subject(','), "keywords"));
        prop.put("autotags", putTags(YMarkAutoTagger.autoTag(document, 5, sb.tables.bookmarks.getTags(bmk_user)), "autotags"));

        final YMarkCrawlStart crawlStart = new YMarkCrawlStart(sb.tables, url);
          int count = 0;
        if(!crawlStart.isEmpty()) {
View Full Code Here

                prop.put("viewMode_html", 1);
                prop.put("viewMode_html_url", url.toNormalform(false, true));
            }
        } else if (viewMode.equals("parsed") || viewMode.equals("sentences"|| viewMode.equals("words") || viewMode.equals("links")) {
            // parsing the resource content
            Document document = null;
            try {
                document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
                if (document == null) {
                    prop.put("error", "5");
                    prop.put("error_errorText", "Unknown error");
                    prop.put("viewMode", VIEW_MODE_NO_TEXT);
                    return prop;
                }
            } catch (final Parser.Failure e) {
                prop.put("error", "5");
                prop.putHTML("error_errorText", e.getMessage());
                prop.put("viewMode", VIEW_MODE_NO_TEXT);
                return prop;
            }

            if (viewMode.equals("parsed")) {
                final String content = UTF8.String(document.getTextBytes());
                // content = wikiCode.replaceHTML(content); // added by Marc Nause
                prop.put("viewMode", VIEW_MODE_AS_PARSED_TEXT);
                prop.put("viewMode_title", document.dc_title());
                prop.put("viewMode_creator", document.dc_creator());
                prop.put("viewMode_subject", document.dc_subject(','));
                prop.put("viewMode_description", document.dc_description());
                prop.put("viewMode_publisher", document.dc_publisher());
                prop.put("viewMode_format", document.dc_format());
                prop.put("viewMode_identifier", document.dc_identifier());
                prop.put("viewMode_source", url.toString());
                prop.put("viewMode_lat", document.lat());
                prop.put("viewMode_lon", document.lon());
                prop.put("viewMode_parsedText", markup(wordArray, content).replaceAll("\n", "<br />").replaceAll("\t", "&nbsp;&nbsp;&nbsp;&nbsp;"));

            } else if (viewMode.equals("sentences")) {
                prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES);
                final Collection<StringBuilder> sentences = document.getSentences(pre);

                boolean dark = true;
                int i = 0;
                String sentence;
                if (sentences != null) {

                    // Search word highlighting
                    for (final StringBuilder s: sentences) {
                        sentence = s.toString();
                        if (sentence.trim().length() > 0) {
                            prop.put("viewMode_sentences_" + i + "_nr", i + 1);
                            prop.put("viewMode_sentences_" + i + "_text", markup(wordArray, sentence));
                            prop.put("viewMode_sentences_" + i + "_dark", dark ? "1" : "0");
                            dark = !dark;
                            i++;
                        }
                    }
                }
                prop.put("viewMode_sentences", i);

            } else if (viewMode.equals("words")) {
                prop.put("viewMode", VIEW_MODE_AS_PARSED_WORDS);
                final Collection<StringBuilder> sentences = document.getSentences(pre);

                boolean dark = true;
                int i = 0;
                String sentence;
                StringBuilder token;
                if (sentences != null) {

                    // Search word highlighting
                    for (final StringBuilder s: sentences) {
                        sentence = s.toString();
                        Enumeration<StringBuilder> tokens = null;
                        tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(sentence)), LibraryProvider.dymLib);
                        while (tokens.hasMoreElements()) {
                            token = tokens.nextElement();
                            if (token.length() > 0) {
                                prop.put("viewMode_words_" + i + "_nr", i + 1);
                                prop.put("viewMode_words_" + i + "_word", token);
                                prop.put("viewMode_words_" + i + "_dark", dark ? "1" : "0");
                                dark = !dark;
                                i++;
                            }
                        }
                    }
                }
                prop.put("viewMode_words", i);

            } else if (viewMode.equals("links")) {
                prop.put("viewMode", VIEW_MODE_AS_LINKLIST);
                boolean dark = true;
                int i = 0;
                i += putMediaInfo(prop, wordArray, i, document.getVideolinks(), "video", (i % 2 == 0), document.getAnchors());
                i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0), document.getAnchors());
                dark = (i % 2 == 0);

                final Map<MultiProtocolURI, ImageEntry> ts = document.getImages();
                final Iterator<ImageEntry> tsi = ts.values().iterator();
                ImageEntry entry;
                while (tsi.hasNext()) {
                    entry = tsi.next();
                    prop.put("viewMode_links_" + i + "_nr", i);
                    prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
                    prop.put("viewMode_links_" + i + "_type", "image");
                    prop.put("viewMode_links_" + i + "_text", (entry.alt().isEmpty()) ? "&nbsp;" : markup(wordArray, entry.alt()));
                    prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(false, true));
                    prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(false, true)));
                    if (entry.width() > 0 && entry.height() > 0) {
                        prop.put("viewMode_links_" + i + "_rel", entry.width() + "x" + entry.height() + " Pixel");
                    } else {
                        prop.put("viewMode_links_" + i + "_rel", "");
                    }
                    prop.put("viewMode_links_" + i + "_name", "");
                    dark = !dark;
                    i++;
                }
                i += putMediaInfo(prop, wordArray, i, document.getApplinks(), "app", (i % 2 == 0), document.getAnchors());
                i += putMediaInfo(prop, wordArray, i, document.getHyperlinks(), "link", (i % 2 == 0), document.getAnchors());
                prop.put("viewMode_links", i);

            }
            if (document != null) document.close();
        }
        prop.put("error", "0");
        prop.put("error_url", url.toNormalform(false, true));
        prop.put("error_hash", urlHash);
        prop.put("error_wordCount", wordCount);
View Full Code Here

                    if (bookmark == null) {
                        // try to get the bookmark from the LURL database
                        final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash));
                        if (urlentry != null) try {
                            final URIMetadataRow.Components metadata = urlentry.metadata();
                            final Document document = Document.mergeDocuments(metadata.url(), null, sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE));
                            prop.put("mode_edit", "0"); // create mode
                            prop.put("mode_url", metadata.url().toNormalform(false, true));
                            prop.putHTML("mode_title", metadata.dc_title());
                            prop.putHTML("mode_description", (document == null) ? metadata.dc_title(): document.dc_title());
                            prop.putHTML("mode_author", metadata.dc_creator());
                            prop.putHTML("mode_tags", (document == null) ? metadata.dc_subject() : document.dc_subject(','));
                            prop.putHTML("mode_path","");
                            prop.put("mode_public", "0");
                            prop.put("mode_feed", "0"); //TODO: check if it IS a feed
                        } catch (final IOException e) {Log.logException(e);} catch (final Parser.Failure e) {Log.logException(e);}
                    } else {
View Full Code Here

            null,
            false);
    }
   
    public void writeXML(OutputStreamWriter os) throws IOException {
        Document doc = document();
        if (doc != null) {
            doc.writeXML(os, this.getDate());
        }
    }
View Full Code Here

        final String infoString = ii.info.toString();
        images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));

        if (title == null || title.length() == 0) title = MultiProtocolURI.unescape(location.getFileName());

        return new Document[]{new Document(
             location,
             mimeType,
             "UTF-8",
             this,
             languages,
View Full Code Here

   
    public Document document() {
        HashSet<String> languages = new HashSet<String>();
        languages.add(getLanguage());
       
        return new Document(
            getIdentifier(true),
            "text/html",
            "UTF-8",
            this,
            languages,
View Full Code Here

            final BObject nameo = info.get("name");
            if (nameo != null) title = UTF8.String(nameo.getString());
        }
        if (title == null || title.length() == 0) title = MultiProtocolURI.unescape(location.getFileName());
        try {
            return new Document[]{new Document(
                    location,
                    mimeType,
                    charset,
                    this,
                    null,
View Full Code Here

TOP

Related Classes of net.yacy.document.Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.