Package bixo.config

Examples of bixo.config.FetcherPolicy


    public SimpleHttpFetcher(UserAgent userAgent) {
        this(DEFAULT_MAX_THREADS, userAgent);
    }
   
    public SimpleHttpFetcher(int maxThreads, UserAgent userAgent) {
        this(maxThreads, new FetcherPolicy(), userAgent);
    }
View Full Code Here


    }

    public static BaseFetcher createFetcher(UserAgent userAgent, int maxThreads) {
        // TODO KKr - add static createRobotsFetcher method somewhere that
        // I can use here, and also in SimpleGroupingKeyGenerator
        FetcherPolicy policy = new FetcherPolicy();
        policy.setMaxContentSize(MAX_ROBOTS_SIZE);
        policy.setMaxConnectionsPerHost(MAX_CONNECTIONS_PER_HOST);
        SimpleHttpFetcher fetcher = new SimpleHttpFetcher(maxThreads, policy, userAgent);
        fetcher.setMaxRetryCount(ROBOTS_RETRY_COUNT);
        fetcher.setConnectionTimeout(ROBOTS_CONNECTION_TIMEOUT);
        fetcher.setSocketTimeout(ROBOTS_SOCKET_TIMEOUT);
       
View Full Code Here

    @Override
    public void operate(FlowProcess process, BufferCall<NullContext> buffCall) {
        QueuedValues values = new QueuedValues(buffCall.getArgumentsIterator());

        _collector = buffCall.getOutputCollector();
        FetcherPolicy fetcherPolicy = _fetcher.getFetcherPolicy();
       
        // Each value is a PreFetchedDatum that contains a set of URLs to fetch in one request from
        // a single server, plus other values needed to set state properly.
        while (!Thread.interrupted() && !fetcherPolicy.isTerminateFetch() && !values.isEmpty()) {
            FetchSetDatum datum = values.nextOrNull(_fetcherMode);
           
            try {
                if (datum == null) {
                    trace("Nothing ready to fetch, sleeping...");
View Full Code Here

        "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\n" +
        "<title>LoggingFetcher</title>\n" +
        "</head><body>URL = %s</body></html>\n";
   
    public LoggingFetcher(int maxThreads) {
        super(maxThreads, new FetcherPolicy(), new UserAgent("agentName", "agentName@domain.com", "http://agentName.domain.com"));
    }
View Full Code Here

     * @param maxThreads - number of requests to make in parallel. Should be 1 to 100?
     * @param userAgent - what to use when making requests.
     * @return BaseFetcher that can be passed to the UrlLengthener constructor.
     */
    public static BaseFetcher makeFetcher(int maxThreads, UserAgent userAgent) {
        FetcherPolicy policy = new FetcherPolicy();
        policy.setRedirectMode(RedirectMode.FOLLOW_NONE);
        policy.setMaxRedirects(MAX_REDIRECTS);
        policy.setMaxConnectionsPerHost(maxThreads);

        SimpleHttpFetcher result = new SimpleHttpFetcher(maxThreads, policy, userAgent);
        result.setDefaultMaxContentSize(MAX_CONTENT_SIZE);
       
        // We don't want any encoding (compression) of the data.
View Full Code Here

TOP

Related Classes of bixo.config.FetcherPolicy

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.