Package net.yacy.kelondro.data.meta

Examples of net.yacy.kelondro.data.meta.URIMetadataRow.metadata()


        if (urlhash == null) return 0;
        // determine the url string
        final URIMetadataRow entry = urlMetadata().load(urlhash);
        if (entry == null) return 0;
        final URIMetadataRow.Components metadata = entry.metadata();
        if (metadata == null || metadata.url() == null) return 0;

        try {
            // parse the resource
            final Document document = Document.mergeDocuments(metadata.url(), null, loader.loadDocuments(loader.request(metadata.url(), true, false), cacheStrategy, 10000, Long.MAX_VALUE));
View Full Code Here


                        // "+entry.getUrlHash());
                        final URIMetadataRow ue = Segment.this.urlMetadata.load(entry.urlhash());
                        if (ue == null) {
                            urlHashs.put(entry.urlhash());
                        } else {
                            url = ue.metadata().url();
                            if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
                                urlHashs.put(entry.urlhash());
                            }
                        }
                    }
View Full Code Here

          metadata.put(METADATA.SNIPPET, String.valueOf(urlEntry.snippet()));
          metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount()));
          metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype()));
          metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language()));

          final URIMetadataRow.Components meta = urlEntry.metadata();
          if (meta != null) {
            metadata.put(METADATA.TITLE, meta.dc_title());
            metadata.put(METADATA.CREATOR, meta.dc_creator());
            metadata.put(METADATA.KEYWORDS, meta.dc_subject());
            metadata.put(METADATA.PUBLISHER, meta.dc_publisher());
View Full Code Here

    public DigestURI getURL(final Segments.Process process, final byte[] urlhash) {
        if (urlhash == null) return null;
        if (urlhash.length == 0) return null;
        final URIMetadataRow le = this.indexSegments.urlMetadata(process).load(urlhash);
        if (le != null) {
            final Components metadata = le.metadata();
            if (metadata == null) return null;
            return metadata.url();
        }
        return this.crawlQueues.getURL(urlhash);
    }
View Full Code Here

                    final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.getBookmark(urlHash);
                    if (bookmark == null) {
                        // try to get the bookmark from the LURL database
                        final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(ASCII.getBytes(urlHash));
                        if (urlentry != null) try {
                            final URIMetadataRow.Components metadata = urlentry.metadata();
                            final Document document = Document.mergeDocuments(metadata.url(), null, sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Long.MAX_VALUE));
                            prop.put("mode_edit", "0"); // create mode
                            prop.put("mode_url", metadata.url().toNormalform(false, true));
                            prop.putHTML("mode_title", metadata.dc_title());
                            prop.putHTML("mode_description", (document == null) ? metadata.dc_title(): document.dc_title());
View Full Code Here

        if (urlhash == null || urlhash.length() == 0) return prop;

        final URIMetadataRow entry = segment.urlMetadata().load(urlhash.getBytes());
        if (entry == null) return prop;

        final URIMetadataRow.Components metadata = entry.metadata();
        if (metadata.url() == null) {
            return prop;
        }
        final URIMetadataRow le = (entry.referrerHash() == null || entry.referrerHash().length != Word.commonHashLength) ? null : segment.urlMetadata().load(entry.referrerHash());
View Full Code Here

        prop.put("geo_long", metadata.lon());

        prop.put("yacy_urlhash", metadata.url().hash());
        prop.putXML("yacy_loaddate", entry.loaddate().toString());
        prop.putXML("yacy_referrer_hash", (le == null) ? "" : ASCII.String(le.hash()));
        prop.putXML("yacy_referrer_url", (le == null) ? "" : le.metadata().url().toNormalform(false, true));
        prop.put("yacy_size", entry.size());
        prop.put("yacy_words",entry.wordCount());

        // return rewrite properties
        return prop;
View Full Code Here

            if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (entry null) from peer " + iam + "\n\tURL properties: "+ propStr);
            prop.put("delay", "3600");
            return prop;
        }

        final URIMetadataRow.Components metadata = entry.metadata();
        if (metadata.url() == null) {
            if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (url null) for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
            prop.put("delay", "3600");
            return prop;
        }
View Full Code Here

        if (post.containsKey("urlhashdelete")) {
            final URIMetadataRow entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
            if (entry == null) {
                prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
            } else {
                urlstring = entry.metadata().url().toNormalform(false, true);
                prop.put("urlstring", "");
                sb.urlRemove(segment, urlhash.getBytes());
                prop.putHTML("result", "Removed URL " + urlstring);
            }
            prop.put("lurlexport", 0);
View Full Code Here

        if (post.containsKey("urlhashsearch")) {
            final URIMetadataRow entry = segment.urlMetadata().load(ASCII.getBytes(urlhash));
            if (entry == null) {
                prop.putHTML("result", "No Entry for URL hash " + urlhash);
            } else {
                prop.putHTML("urlstring", entry.metadata().url().toNormalform(false, true));
                prop.putAll(genUrlProfile(segment, entry, urlhash));
                prop.put("statistics", 0);
            }
            prop.put("lurlexport", 0);
            prop.put("reload", 0);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.