Package net.yacy.kelondro.data.meta

Examples of net.yacy.kelondro.data.meta.URIMetadataRow.metadata()


                                Log.logException(e);
                            }
                            final URIMetadataRow e = segment.urlMetadata().load(b);
                            segment.urlMetadata().remove(b);
                            if (e != null) {
                                url = e.metadata().url();
                                pw.println(url.getHost() + "/" + url.getFile());
                                for (final String supportedBlacklistType : supportedBlacklistTypes) {
                                    if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklist)) {
                                        Switchboard.urlBlacklist.add(
                                                supportedBlacklistType,
View Full Code Here


                                Log.logException(e);
                            }
                            final URIMetadataRow e = segment.urlMetadata().load(b);
                            segment.urlMetadata().remove(b);
                            if (e != null) {
                                url = e.metadata().url();
                                pw.println(url.getHost() + "/.*");
                                for (final String supportedBlacklistType : supportedBlacklistTypes) {
                                    if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklist)) {
                                        Switchboard.urlBlacklist.add(
                                                supportedBlacklistType,
View Full Code Here

            DigestURI url;
            URIMetadataRow entry;
            String us;
            long rn = -1;
            while (!ranked.isEmpty() && (entry = ranked.takeURL(false, 1000)) != null) {
                if ((entry == null) || (entry.metadata() == null)) continue;
                url = entry.metadata().url();
                if (url == null) continue;
                us = url.toNormalform(false, false);
                if (rn == -1) rn = entry.ranking();
                prop.put("genUrlList_urlList_"+i+"_urlExists", "1");
View Full Code Here

            URIMetadataRow entry;
            String us;
            long rn = -1;
            while (!ranked.isEmpty() && (entry = ranked.takeURL(false, 1000)) != null) {
                if ((entry == null) || (entry.metadata() == null)) continue;
                url = entry.metadata().url();
                if (url == null) continue;
                us = url.toNormalform(false, false);
                if (rn == -1) rn = entry.ranking();
                prop.put("genUrlList_urlList_"+i+"_urlExists", "1");
                prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
View Full Code Here

                }
              continue;
            }

            // prepare values for constraint check
            final URIMetadataRow.Components metadata = page.metadata();

            // check errors
            if (metadata == null) {
                this.sortout++;
                continue; // rare case where the url is corrupted
View Full Code Here

        if (this.hostResolver != null) while (domhashs.hasNext() && result.sizeSmaller(30)) {
            hosthash = domhashs.next();
            if (hosthash == null) continue;
            urlhash = this.hostResolver.get(hosthash);
            row = urlhash == null ? null : this.query.getSegment().urlMetadata().load(urlhash);
            hostname = row == null ? null : row.metadata().url().getHost();
            if (hostname != null) {
                result.set(hostname, this.hostNavigator.get(hosthash));
            }
        }
        if (result.sizeSmaller(2)) result.clear(); // navigators with one entry are not useful
View Full Code Here

    public DigestURI getURL(final Segments.Process process, final byte[] urlhash) {
        if (urlhash == null) return null;
        if (urlhash.length == 0) return null;
        final URIMetadataRow le = this.indexSegments.urlMetadata(process).load(urlhash);
        if (le != null) {
            final Components metadata = le.metadata();
            if (metadata == null) return null;
            return metadata.url();
        }
        return this.crawlQueues.getURL(urlhash);
    }
View Full Code Here

                        // "+entry.getUrlHash());
                        final URIMetadataRow ue = Segment.this.urlMetadata.load(entry.urlhash());
                        if (ue == null) {
                            urlHashs.put(entry.urlhash());
                        } else {
                            url = ue.metadata().url();
                            if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
                                urlHashs.put(entry.urlhash());
                            }
                        }
                    }
View Full Code Here

        this.statsDump = new ArrayList<HostStat>();
        final TreeSet<String> set = new TreeSet<String>();
        for (final URLHashCounter hs: domainSamples.values()) {
            if (hs == null) continue;
            urlref = this.load(hs.urlhashb);
            if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue;
            set.add(urlref.metadata().url().getHost());
            count--;
            if (count == 0) break;
        }
        return set;
View Full Code Here

        this.statsDump = new ArrayList<HostStat>();
        final TreeSet<String> set = new TreeSet<String>();
        for (final URLHashCounter hs: domainSamples.values()) {
            if (hs == null) continue;
            urlref = this.load(hs.urlhashb);
            if (urlref == null || urlref.metadata() == null || urlref.metadata().url() == null || urlref.metadata().url().getHost() == null) continue;
            set.add(urlref.metadata().url().getHost());
            count--;
            if (count == 0) break;
        }
        return set;
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.