Package org.archive.modules.net

Examples of org.archive.modules.net.CrawlHost


     */
    protected void saveHostStats(String hostname, long size) {
        // TODO: consider moving 'top' accounting elsewhere, such
        // as the frontier or ServerCache itself
       
        CrawlHost host = serverCache.getHostFor(hostname);
        hostsDistributionTop.update(hostname, host.getSubstats().getFetchSuccesses());
        hostsBytesTop.update(hostname, host.getSubstats().getSuccessBytes());
        hostsLastFinishedTop.update(hostname, host.getSubstats().getLastSuccessTime());
    }
View Full Code Here


                    true,
                    false);    
        serverCache.forAllHostsDo(new Closure() {
            @Override
            public void execute(Object hostObj) {
                CrawlHost host = (CrawlHost) hostObj;
                sortedMap.put(-host.getSubstats().getFetchSuccesses(), host.getHostName());
            }
        });
        return sortedMap;
    }
View Full Code Here

            }
        }

        @Override
        public CrawlHost getHostFor(String host) {
            CrawlHost h = new CrawlHost(host);
            h.setIP(LOCALHOST, -1);
            return h;
        }
View Full Code Here

            return (String)curi.getData().get(A_DNS_SERVER_IP_LABEL);
        }
        // otherwise, host referenced in URI
        // TODO:FIXME: have fetcher insert exact IP contacted into curi,
        // use that rather than inferred by CrawlHost lookup
        CrawlHost h = getServerCache().getHostFor(curi.getUURI());
        if (h == null) {
            throw new NullPointerException("Crawlhost is null for " +
                curi + " " + curi.getVia());
        }
        InetAddress a = h.getIP();
        if (a == null) {
            throw new NullPointerException("Address is null for " +
                curi + " " + curi.getVia() + ". Address " +
                ((h.getIpFetched() == CrawlHost.IP_NEVER_LOOKED_UP)?
                     "was never looked up.":
                     (System.currentTimeMillis() - h.getIpFetched()) +
                         " ms ago."));
        }
        return h.getIP().getHostAddress();
    }
View Full Code Here

            this.serverCache = serverCache;
        }

        @Override
        public InetAddress[] resolve(String host) throws UnknownHostException {
            CrawlHost crawlHost = this.serverCache.getHostFor(host);
            if (crawlHost != null) {
                InetAddress ip = crawlHost.getIP();
                if (ip != null) {
                    return new InetAddress[] {ip};
                }
            }
View Full Code Here

        }

        jo.put("content_digest", checkForNull(curi.getContentDigestSchemeString()));
        jo.put("seed", checkForNull(curi.getSourceTag()));

        CrawlHost host = serverCache.getHostFor(curi.getUURI());
        if (host != null) {
            jo.put("host", host.fixUpName());
        } else {
            jo.put("host", JSONObject.NULL);
        }

        jo.put("annotations", checkForNull(StringUtils.join(curi.getAnnotations(), ",")));
View Full Code Here

        jo.put("decisiveRule", decisiveRule.getClass().getSimpleName());
        jo.put("result", result.toString());

        jo.put("url", curi.toString());

        CrawlHost host = getServerCache().getHostFor(curi.getUURI());
        if (host != null) {
            jo.put("host", host.fixUpName());
        } else {
            jo.put("host", JSONObject.NULL);
        }

        jo.put("sourceSeed", curi.getSourceTag());
View Full Code Here

     */
    public CrawlHost getHostFor(final String hostname) {
        if (hostname == null || hostname.length() == 0) {
            return null;
        }
        CrawlHost host = hosts.getOrUse(
                hostname,
                new Supplier<CrawlHost>() {
                    public CrawlHost get() {
                        String hkey = new String(hostname); // ensure private minimal key
                        return new CrawlHost(hkey);
                    }});
        if (host != null && host.getIP() != null
                && "0.0.0.0".equals(host.getIP().getHostAddress())) {
            throw new IllegalStateException("got suspicious value 0.0.0.0 for " + hostname);
        }
        return host;
    }
View Full Code Here

        if (server != null) {
            server.getSubstats().tally(curi, stage);
            server.makeDirty();
        }
        try {
            CrawlHost host = getServerCache().getHostFor(curi.getUURI());
            if (host != null) {
                host.getSubstats().tally(curi, stage);
                host.makeDirty();
            }
        } catch (Exception e) {
            logger.log(Level.WARNING, "unable to tally host stats for " + curi, e);
        }
        FrontierGroup group = getGroup(curi);
View Full Code Here

        this.serverCache = serverCache;
    }
   
    public String getClassKey(final CrawlURI curi) {
       
        CrawlHost host;
        host = serverCache.getHostFor(curi.getUURI());
        if(host == null) {
            return "NO-HOST";
        } else if(host.getIP() == null) {
            return "NO-IP-".concat(Long.toString(Math.abs((long) host
                    .getHostName().hashCode()) & DEFAULT_NOIP_BITMASK));
        } else {
            return Long.toString(Math.abs((long) host.getIP().hashCode())
                    % DEFAULT_QUEUES_HOSTS_MODULO);
        }
    }
View Full Code Here

TOP

Related Classes of org.archive.modules.net.CrawlHost

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.