Package de.anomic.crawler.retrieval

Examples of de.anomic.crawler.retrieval.Response


    }

    public void run() {
        RSSReader rss = null;
        try {
            final Response response = this.sb.loader.load(this.sb.loader.request(this.urlf, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, true);
            final byte[] resource = response == null ? null : response.getContent();
            rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
        } catch (final MalformedURLException e) {
            Log.logWarning("Load_RSS", "rss loading for url '" + getName().substring(9) + "' failed: " + e.getMessage());
            return;
        } catch (final IOException e) {
View Full Code Here


    this.merge = true;
  }

  private static Document loadDocument(final String url, final LoaderDispatcher loader) {
    DigestURI uri;
    Response response;
    try {
      uri = new DigestURI(url);
    } catch (final MalformedURLException e) {
      Log.logWarning(YMarkTables.BOOKMARKS_LOG, "loadDocument failed due to malformed url: "+url);
      return null;
    }
    try {
      response = loader.load(loader.request(uri, true, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, true);
    } catch (final IOException e) {
      Log.logWarning(YMarkTables.BOOKMARKS_LOG, "loadDocument failed due to IOException for url: "+url);
      return null;
    }
    try {
      return Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
    } catch (final Failure e) {
      Log.logWarning(YMarkTables.BOOKMARKS_LOG, "loadDocument failed due to a parser failure for url: "+url);
      return null;
    }
  }
View Full Code Here

    public void processSurrogate(final InputStream is, final String name) throws IOException {
        final SurrogateReader reader = new SurrogateReader(is, 100);
        final Thread readerThread = new Thread(reader, name);
        readerThread.start();
        DCEntry surrogate;
        Response response;
        while ((surrogate = reader.take()) != DCEntry.poison) {
            // check if url is in accepted domain
            assert surrogate != null;
            assert this.crawlStacker != null;
            final String urlRejectReason = this.crawlStacker.urlInAcceptedDomain(surrogate.getIdentifier(true));
            if (urlRejectReason != null) {
                this.log.logWarning("Rejected URL '" + surrogate.getIdentifier(true) + "': " + urlRejectReason);
                continue;
            }

            // create a queue entry
            final Document document = surrogate.document();
            final Request request = new Request(
                    ASCII.getBytes(this.peers.mySeed().hash),
                    surrogate.getIdentifier(true),
                    null,
                    "",
                    surrogate.getDate(),
                    this.crawler.defaultSurrogateProfile.handle(),
                    0,
                    0,
                    0,
                    0
            );
            response = new Response(request, null, null, "200", this.crawler.defaultSurrogateProfile);
            final indexingQueueEntry queueEntry = new indexingQueueEntry(Segments.Process.SURROGATES, response, new Document[]{document}, null);

            // place the queue entry into the concurrent process of the condenser (document analysis)
            try {
                this.indexingCondensementProcessor.enQueue(queueEntry);
View Full Code Here

        }
        new Thread() {
            @Override
            public void run() {
                try {
                    final Response response = Switchboard.this.loader.load(request, CacheStrategy.IFFRESH, true);
                    if (response == null) {
                        throw new IOException("response == null");
                    }
                    if (response.getContent() == null) {
                        throw new IOException("content == null");
                    }
                    if (response.getResponseHeader() == null) {
                        throw new IOException("header == null");
                    }
                    final Document[] documents = response.parse();
                    if (documents != null) {
                        for (final Document document: documents) {
                            if (document.indexingDenied()) {
                                throw new Parser.Failure("indexing is denied", url);
                            }
View Full Code Here

                }

                // if we have an url then try to load the rss
                RSSReader rss = null;
                try {
                    final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, true);
                    final byte[] resource = (response == null) ? null : response.getContent();
                    //System.out.println("BLEKKO: " + UTF8.String(resource));
                    rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
                } catch (final IOException e) {
                    Log.logException(e);
                }
View Full Code Here

            ResultClass source) {
        /* ===========================================================================
         * LOAD RESOURCE DATA
         * =========================================================================== */
        // if the snippet is not in the cache, we can try to get it from the htcache
        Response response = null;
        try {
            // first try to get the snippet from metadata
            String loc;
            final Request request = loader.request(url, true, reindexing);
            final boolean inCache = de.anomic.http.client.Cache.has(comp.url());
            final boolean noCacheUsage = url.isFile() || url.isSMB() || cacheStrategy == null;
            if (containsAllHashes(loc = comp.dc_title(), queryhashes) ||
                containsAllHashes(loc = comp.dc_creator(), queryhashes) ||
                containsAllHashes(loc = comp.dc_subject(), queryhashes) ||
                containsAllHashes(loc = comp.url().toNormalform(true, true).replace('-', ' '), queryhashes)) {
                // try to create the snippet from information given in the url
                if (inCache) response = loader == null ? null : loader.load(request, CacheStrategy.CACHEONLY, true);
                Document document = null;
                if (response != null) {
                    try {
                        document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
                    } catch (final Parser.Failure e) {
                    }
                }
                init(url.hash(), loc, ResultClass.SOURCE_METADATA, null);
                return document;
            } else {
                // try to load the resource from the cache
                response = loader == null ? null : loader.load(request, noCacheUsage ? CacheStrategy.NOCACHE : cacheStrategy, true);
                if (response == null) {
                    // in case that we did not get any result we can still return a success when we are not allowed to go online
                    if (cacheStrategy == null || cacheStrategy.mustBeOffline()) {
                        init(url.hash(), null, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry");
                        return null;
                    }

                    // if it is still not available, report an error
                    init(url.hash(), null, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry");
                    return null;
                } else {
                    // place entry on indexing queue
                    Switchboard.getSwitchboard().toIndexer(response);
                    source = ResultClass.SOURCE_WEB;
                }
            }
        } catch (final Exception e) {
            //Log.logException(e);
            init(url.hash(), null, ResultClass.ERROR_SOURCE_LOADING, "error loading resource: " + e.getMessage());
            return null;
        }

        /* ===========================================================================
         * PARSE RESOURCE
         * =========================================================================== */
        Document document = null;
        try {
            document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
        } catch (final Parser.Failure e) {
            init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed
            return null;
        }
        if (document == null) {
View Full Code Here

        // GEON0
        if (post.containsKey("geon0Load")) {
            // load from the net
            try {
                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
                LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
                prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
                prop.put("geon0ActionLoaded", 1);
            } catch (final MalformedURLException e) {
                Log.logException(e);
                prop.put("geon0ActionLoaded", 2);
                prop.put("geon0ActionLoaded_error", e.getMessage());
            } catch (final IOException e) {
                Log.logException(e);
                prop.put("geon0ActionLoaded", 2);
                prop.put("geon0ActionLoaded_error", e.getMessage());
            }
        }

        if (post.containsKey("geon0Remove")) {
            FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.file());
            FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.fileDisabled());
            LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
            prop.put("geon0ActionRemoved", 1);
        }

        if (post.containsKey("geon0Deactivate")) {
            LibraryProvider.Dictionary.GEON0.file().renameTo(LibraryProvider.Dictionary.GEON0.fileDisabled());
            LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
            prop.put("geon0ActionDeactivated", 1);
        }

        if (post.containsKey("geon0Activate")) {
            LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
            LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
            prop.put("geon0ActionActivated", 1);
        }

        // GEO1
        if (post.containsKey("geo1Load")) {
            // load from the net
            try {
                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
                LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
                LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
                prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
                prop.put("geo1ActionLoaded", 1);
            } catch (final MalformedURLException e) {
                Log.logException(e);
                prop.put("geo1ActionLoaded", 2);
                prop.put("geo1ActionLoaded_error", e.getMessage());
            } catch (final IOException e) {
                Log.logException(e);
                prop.put("geo1ActionLoaded", 2);
                prop.put("geo1ActionLoaded_error", e.getMessage());
            }
        }

        if (post.containsKey("geo1Remove")) {
            FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.file());
            FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.fileDisabled());
            LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
            prop.put("geo1ActionRemoved", 1);
        }

        if (post.containsKey("geo1Deactivate")) {
            LibraryProvider.Dictionary.GEODB1.file().renameTo(LibraryProvider.Dictionary.GEODB1.fileDisabled());
            LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
            prop.put("geo1ActionDeactivated", 1);
        }

        if (post.containsKey("geo1Activate")) {
            LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
            LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
            prop.put("geo1ActionActivated", 1);
        }

        // DRW0
        if (post.containsKey("drw0Load")) {
            // load from the net
            try {
                final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false);
                final byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file());
                LibraryProvider.integrateDeReWo();
                LibraryProvider.initDidYouMean();
                prop.put("drw0Status", LibraryProvider.Dictionary.DRW0.file().exists() ? 1 : 0);
                prop.put("drw0ActionLoaded", 1);
View Full Code Here

        }
        //System.out.println("*** DEBUG: fetching OSM tile: " + tileURL.toNormalform(true, true));
        byte[] tileb = Cache.getContent(tileURL.hash());
        if (tileb == null) {
            // download resource using the crawler and keep resource in memory if possible
            Response entry = null;
            try {
                entry = Switchboard.getSwitchboard().loader.load(Switchboard.getSwitchboard().loader.request(tileURL, false, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, true);
            } catch (final IOException e) {
                Log.logWarning("OSMTile", "cannot load: " + e.getMessage());
                return null;
            }
            tileb = entry.getContent();
        }
        try {
            ImageIO.setUseCache(false); // do not write a cache to disc; keep in RAM
            return ImageIO.read(new ByteArrayInputStream(tileb));
        } catch (final EOFException e) {
View Full Code Here

TOP

Related Classes of de.anomic.crawler.retrieval.Response

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.