Package org.archive.modules.net

Examples of org.archive.modules.net.CrawlHost


        if(dnsName == null) {
            curi.setFetchStatus(S_UNFETCHABLE_URI);
            return;
        }

        CrawlHost targetHost = getServerCache().getHostFor(dnsName);
        if (isQuadAddress(curi, dnsName, targetHost)) {
          // We're done processing.
          return;
        }
       
        // Do actual DNS lookup.
        curi.setFetchBeginTime(System.currentTimeMillis());

        // Try to get the records for this host (assume domain name)
        // TODO: Bug #935119 concerns potential hang here
        String lookupName = dnsName.endsWith(".") ? dnsName : dnsName + ".";
        try {
            rrecordSet = (new Lookup(lookupName, TypeType, ClassType)).run();
        } catch (TextParseException e) {
            rrecordSet = null;
        }
        curi.setContentType("text/dns");
        if (rrecordSet != null) {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("Found recordset for " + lookupName);
            }
          storeDNSRecord(curi, dnsName, targetHost, rrecordSet);
        } else {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("Failed find of recordset for " + lookupName);
            }
            if (getAcceptNonDnsResolves()||"localhost".equals(dnsName)) {
                // Do lookup that bypasses javadns.
                InetAddress address = null;
                try {
                    address = InetAddress.getByName(dnsName);
                } catch (UnknownHostException e1) {
                    address = null;
                }
                if (address != null) {
                    targetHost.setIP(address, DEFAULT_TTL_FOR_NON_DNS_RESOLVES);
                    curi.setFetchStatus(S_GETBYNAME_SUCCESS);
                    if (logger.isLoggable(Level.FINE)) {
                        logger.fine("Found address for " + dnsName +
                            " using native dns.");
                    }
View Full Code Here


    public void setServerCache(ServerCache serverCache) {
        this.serverCache = serverCache;
    }
   
    public String getClassKey(CrawlURI cauri) {
        CrawlHost host = serverCache.getHostFor(cauri.getUURI());
        if (host == null || host.getIP() == null) {
            // if no server or no IP, use superclass implementation
            return super.getClassKey(cauri);
        }
        // use dotted-decimal IP address
        return host.getIP().getHostAddress();
    }
View Full Code Here

    /**
     * Adds outlinks to whois:{domain} and whois:{ipAddress}
     */
    protected void addWhoisLinks(CrawlURI curi) throws InterruptedException {
        CrawlHost ch = serverCache.getHostFor(curi.getUURI());

        if (ch == null) {
            return;
        }

        if (ch.getIP() != null) {
            // do a whois lookup on the ip address
            addWhoisLink(curi, ch.getIP().getHostAddress());
        }

        if (InternetDomainName.isValid(ch.getHostName())) {
            // do a whois lookup on the domain
            try {
                String topmostAssigned = InternetDomainName.from(ch.getHostName()).topPrivateDomain().toString();
                addWhoisLink(curi, topmostAssigned);
            } catch (IllegalStateException e) {
                // java.lang.IllegalStateException: Not under a public suffix: mod.uk
                logger.warning("problem resolving topmost assigned domain, will try whois lookup on the plain hostname " + ch.getHostName() + " - " + e);
                addWhoisLink(curi, ch.getHostName());
            }
        }
    }
View Full Code Here

TOP

Related Classes of org.archive.modules.net.CrawlHost

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.