Examples of urlMetadata()


Examples of de.anomic.search.Segment.urlMetadata()

                        // stack request
                        // first delete old entry, if exists
                        final DigestURI url = new DigestURI(crawlingStart);
                        final byte[] urlhash = url.hash();
                        indexSegment.urlMetadata().remove(urlhash);
                        sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
                        sb.crawlQueues.errorURL.remove(urlhash);

                        // stack url
                        sb.crawler.removePassive(crawlingStartURL.hash()); // if there is an old entry, delete it
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

                            final Map.Entry<MultiProtocolURI, Properties> e = linkiterator.next();
                            if (e.getKey() == null) continue;
                            nexturl = new DigestURI(e.getKey());
                            // remove the url from the database to be prepared to crawl them again
                            final byte[] urlhash = nexturl.hash();
                            indexSegment.urlMetadata().remove(urlhash);
                            sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
                            sb.crawlQueues.errorURL.remove(urlhash);
                            sb.crawlStacker.enqueueEntry(new Request(
                                    sb.peers.mySeed().hash.getBytes(),
                                    nexturl,
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

        // get the url hash from which the content should be loaded
        String urlHash = post.get("urlHash", "");
        URIMetadataRow urlEntry = null;
        // get the urlEntry that belongs to the url hash
        if (urlHash.length() > 0 && (urlEntry = indexSegment.urlMetadata().load(ASCII.getBytes(urlHash))) != null) {
            // get the url that belongs to the entry
            final URIMetadataRow.Components metadata = urlEntry.metadata();
            if ((metadata == null) || (metadata.url() == null)) {
                prop.put("error", "3");
                prop.put("viewMode", VIEW_MODE_NO_TEXT);
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

        if ((!block) && (post == null || post.get("cat", "href").equals("href"))) {
            String urlmask = null;

            // check available memory and clean up if necessary
            if (!MemoryControl.request(8000000L, false)) {
                indexSegment.urlMetadata().clearCache();
                SearchEventCache.cleanupEvents(true);
            }

            final RankingProfile ranking = sb.getRanking();
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

                if (!sb.verifyAuthentication(header, true)) {
                    prop.put("AUTHENTICATE", "admin log-in"); // force log-in
                    return prop;
                }
                final String recommendHash = post.get("recommendref", ""); // urlhash
                final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(recommendHash));
                if (urlentry != null) {
                    final URIMetadataRow.Components metadata = urlentry.metadata();
                    Document[] documents = null;
                    try {
                        documents = sb.loader.loadDocuments(sb.loader.request(metadata.url(), true, false), CacheStrategy.IFEXIST, 5000, Long.MAX_VALUE);
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

        // delta settings
        prop.put("minimumLocalDelta", sb.crawlQueues.noticeURL.getMinimumLocalDelta());
        prop.put("minimumGlobalDelta", sb.crawlQueues.noticeURL.getMinimumGlobalDelta());
       
        // table cache settings
        prop.putNum("urlCacheSize", indexSegment.urlMetadata().writeCacheSize())
        prop.putNum("wordCacheSize", indexSegment.termIndex().getBufferSize());
        prop.putNum("wordCacheSizeKBytes", indexSegment.termIndex().getBufferSizeBytes()/1024);
        prop.putNum("maxURLinCache", indexSegment.termIndex().getBufferMaxReferences());
        prop.putNum("maxAgeOfCache", indexSegment.termIndex().getBufferMaxAge() / 1000 / 60); // minutes
        prop.putNum("minAgeOfCache", indexSegment.termIndex().getBufferMinAge() / 1000 / 60); // minutes
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

            prop.put("linkfreq", sb.getConfigLong("defaultLinkReceiveFrequency",30));
            prop.put("wordfreq", sb.getConfigLong("defaultWordReceiveFrequency",10));
            prop.put("dtable", "");
            prop.put("rtable", "");
            prop.putNum("wcount", indexSegment.termIndex().sizesMax());
            prop.putNum("ucount", indexSegment.urlMetadata().size());
            return prop; // be save
        }
       
        if (post.containsKey("indexsharesetting")) {
            sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW, post.containsKey("distribute"));
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

            sb.setConfig("defaultWordReceiveFrequency", post.getInt("wordfreq", 10));
        }

        // insert constants
        prop.putNum("wcount", indexSegment.termIndex().sizesMax());
        prop.putNum("ucount", indexSegment.urlMetadata().size());
       
        // return rewrite properties
        return prop;
    }
}
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

        }
       
        if (post!=null) {
            if (post.get("action").equals("ustart")) {
                if (urldbCleanerThread==null || !urldbCleanerThread.isAlive()) {
                    urldbCleanerThread = indexSegment.urlMetadata().getBlacklistCleaner(Switchboard.urlBlacklist, sb.crawlStacker);
                    urldbCleanerThread.start();
                }
                else {
                    urldbCleanerThread.endPause();
                }
View Full Code Here

Examples of de.anomic.search.Segment.urlMetadata()

            prop.put("LOCATION","");
            return prop;
        }
        if (urldbCleanerThread!=null) {
            prop.put("urldb", "1");
            prop.putNum("urldb_percentUrls", ((double)urldbCleanerThread.totalSearchedUrls/indexSegment.urlMetadata().size())*100);
            prop.putNum("urldb_blacklisted", urldbCleanerThread.blacklistedUrls);
            prop.putNum("urldb_total", urldbCleanerThread.totalSearchedUrls);
            prop.putHTML("urldb_lastBlacklistedUrl", urldbCleanerThread.lastBlacklistedUrl);
            prop.put("urldb_lastBlacklistedHash", urldbCleanerThread.lastBlacklistedHash);
            prop.putHTML("urldb_lastUrl", urldbCleanerThread.lastUrl);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.