Examples of profileHandle()


Examples of de.anomic.crawler.retrieval.Request.profileHandle()

            int i, showNum = 0;
            for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
                urle = crawlerList.get(i);
                if (urle != null && urle.url() != null) {
                    initiator = sb.peers.getConnected((urle.initiator() == null) ? "" : ASCII.String(urle.initiator()));
                    profileHandle = urle.profileHandle();
                    profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
                    prop.put("crawler-queue_list_"+showNum+"_dark", dark ? "1" : "0");
                    prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
                    prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
                    prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

            int i;
            for (i = 0; (i < crawlerList.size()) && (showNum < showLimit); i++) {
                urle = crawlerList.get(i);
                if ((urle != null)&&(urle.url()!=null)) {
                    initiator = sb.peers.getConnected(urle.initiator() == null ? "" : ASCII.String(urle.initiator()));
                    profileHandle = urle.profileHandle();
                    profileEntry = profileHandle == null ? null : sb.crawler.getActive(profileHandle.getBytes());
                    prop.put("crawler-queue_list_"+showNum+"_dark", dark ? "1" : "0");
                    prop.putHTML("crawler-queue_list_"+showNum+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) );
                    prop.put("crawler-queue_list_"+showNum+"_profile", ((profileEntry == null) ? "unknown" : profileEntry.name()));
                    prop.put("crawler-queue_list_"+showNum+"_depth", urle.depth());
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

        if (searchEvent != null) {
            searchEvent.addHeuristic(url.hash(), heuristicName, true);
        }
        if (this.indexSegments.segment(process).urlMetadata.exists(url.hash())) return; // don't do double-work
        final Request request = this.loader.request(url, true, true);
        final CrawlProfile profile = sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
        final String acceptedError = this.crawlStacker.checkAcceptance(url, profile, 0);
        if (acceptedError != null) {
            this.log.logWarning("addToIndex: cannot load " + url.toNormalform(false, false) + ": " + acceptedError);
            return;
        }
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

            crawlEntry = new Request(rowEntry);
            //Log.logInfo("Balancer", "fetched next url: " + crawlEntry.url().toNormalform(true, false));

            // at this point we must check if the crawlEntry has relevance because the crawl profile still exists
            // if not: return null. A calling method must handle the null value and try again
            final CrawlProfile profileEntry = cs.getActive(UTF8.getBytes(crawlEntry.profileHandle()));
            if (profileEntry == null) {
              Log.logWarning("Balancer", "no profile entry for handle " + crawlEntry.profileHandle());
              return null;
            }
            // depending on the caching policy we need sleep time to avoid DoS-like situations
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

            // at this point we must check if the crawlEntry has relevance because the crawl profile still exists
            // if not: return null. A calling method must handle the null value and try again
            final CrawlProfile profileEntry = cs.getActive(UTF8.getBytes(crawlEntry.profileHandle()));
            if (profileEntry == null) {
              Log.logWarning("Balancer", "no profile entry for handle " + crawlEntry.profileHandle());
              return null;
            }
            // depending on the caching policy we need sleep time to avoid DoS-like situations
            sleeptime = (
                    profileEntry.cacheStrategy() == CacheStrategy.CACHEONLY ||
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

            try {
                if (this.noticeURL.stackSize(NoticedURL.StackType.NOLOAD) > 0) {
                    // get one entry that will not be loaded, just indexed
                    urlEntry = this.noticeURL.pop(NoticedURL.StackType.NOLOAD, true, this.sb.crawler);
                    if (urlEntry == null) continue;
                    final String profileHandle = urlEntry.profileHandle();
                    if (profileHandle == null) {
                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                        return true;
                    }
                    final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(profileHandle));
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

                    // get one entry that will not be loaded, just indexed
                    urlEntry = this.noticeURL.pop(NoticedURL.StackType.NOLOAD, true, this.sb.crawler);
                    if (urlEntry == null) continue;
                    final String profileHandle = urlEntry.profileHandle();
                    if (profileHandle == null) {
                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                        return true;
                    }
                    final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(profileHandle));
                    if (profile == null) {
                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                        return true;
                    }
                    final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(profileHandle));
                    if (profile == null) {
                        this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                        return true;
                    }
                    try {
                        this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(Segments.Process.LOCALCRAWLING, new Response(urlEntry, profile), null, null));
                        Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false));
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

                    return true;
                }

                urlEntry = this.noticeURL.pop(NoticedURL.StackType.CORE, true, this.sb.crawler);
                if (urlEntry == null) continue;
                final String profileHandle = urlEntry.profileHandle();
                // System.out.println("DEBUG plasmaSwitchboard.processCrawling:
                // profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
                if (profileHandle == null) {
                    this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                    return true;
View Full Code Here

Examples of de.anomic.crawler.retrieval.Request.profileHandle()

                if (urlEntry == null) continue;
                final String profileHandle = urlEntry.profileHandle();
                // System.out.println("DEBUG plasmaSwitchboard.processCrawling:
                // profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
                if (profileHandle == null) {
                    this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
                    return true;
                }
                load(urlEntry, stats, profileHandle);
                return true;
            } catch (final IOException e) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.