Package bixo.datum

Examples of bixo.datum.ScoredUrlDatum


            long fetchTime = fetchStartTime + (i * 10);
            int groupingKey = 100;
            String groupingRef = "groupingRef";
            List<ScoredUrlDatum> scoredUrls = new ArrayList<ScoredUrlDatum>();
            String url = String.format("http://domain-%03d.com/index.html", i);
            scoredUrls.add(new ScoredUrlDatum(url, groupingRef, UrlStatus.UNFETCHED, 0.0));
            FetchSetDatum datum = new FetchSetDatum(scoredUrls, fetchTime, fetchDelay, groupingKey, groupingRef);
            datums[i] = datum;
            assertTrue(queue.offer(datum));
        }
       
View Full Code Here


        Server server = startServer(new ResourcesResponseHandler(), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8088/simple-page.html";
       
        try {
            fetcher.get(new ScoredUrlDatum(url));
            fail("Exception not thrown");
        } catch (IOFetchException e) {
            assertTrue(e.getCause() instanceof HttpHostConnectException);
        } finally {
            server.stop();
View Full Code Here

            }
        }

        BaseFetcher fetcher = new SimpleHttpFetcher(1, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/simple-page.html";
        fetcher.get(new ScoredUrlDatum(url));
       
        // TODO KKr - control keep-alive (linger?) value for Jetty, so we can set it
        // to something short and thus make this sleep delay much shorter.
        Thread.sleep(2000);
       
        fetcher.get(new ScoredUrlDatum(url));
        server.stop();
    }
View Full Code Here

        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);

        String url = "http://localhost:8089/test.html";
        try {
            fetcher.get(new ScoredUrlDatum(url));
            fail("Aborted fetch exception not thrown");
        } catch (AbortedFetchException e) {
            assertEquals(AbortedFetchReason.SLOW_RESPONSE_RATE, e.getAbortReason());
        }
        server.stop();
View Full Code Here

           
            @Override
            public void run() {
                String url = "http://localhost:8089/test.html";
                try {
                    fetcher.get(new ScoredUrlDatum(url));
                    failMsg[0] = "No exception thrown, should have thrown an aborted by interrupt exception";
                } catch (AbortedFetchException e) {
                    if (e.getAbortReason() != AbortedFetchReason.INTERRUPTED) {
                        failMsg[0] = "Wrong abort exception thrown, should have thrown an aborted by interrupt exception";
                    }
View Full Code Here

        policy.setMinResponseRate(FetcherPolicy.NO_MIN_RESPONSE_RATE);

        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);

        String url = "http://localhost:8089/test.html";
        fetcher.get(new ScoredUrlDatum(url));
        server.stop();
    }
View Full Code Here

    public final void testLargeContent() throws Exception {
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new RandomResponseHandler(policy.getMaxContentSize() * 2), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/test.html";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();

        assertTrue("Content size should be truncated", result.getContentLength() <= policy.getMaxContentSize());
    }
View Full Code Here

        FetcherPolicy policy = new FetcherPolicy();
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        fetcher.setDefaultMaxContentSize(1000);
        fetcher.setMaxContentSize("image/png", 5000);
        ScoredUrlDatum datumToFetch = new ScoredUrlDatum("http://localhost:8089/karlie.html");
       
        FetchedDatum result1 = fetcher.get(datumToFetch);
        FetchedDatum result2 = fetcher.get(datumToFetch);
       
        // Verify that we got the same data from each fetch request.
        assertEquals(1000, result1.getContentLength());
        assertEquals(1000, result2.getContentLength());
        byte[] bytes1 = result1.getContentBytes();
        byte[] bytes2 = result2.getContentBytes();
        for (int i = 0; i < bytes1.length; i++) {
            assertEquals(bytes1[i], bytes2[i]);
        }

        datumToFetch = new ScoredUrlDatum("http://localhost:8089/bixolabs_mining.png");
        FetchedDatum result3 = fetcher.get(datumToFetch);
        assertTrue(result3.getContentLength() > 1000);
       
        fetcher.setMaxContentSize("image/png", 1500);
        try {
View Full Code Here

    public final void testLargeHtml() throws Exception {
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new ResourcesResponseHandler(), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/karlie.html";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();

        assertTrue("Content size should be truncated", result.getContentLength() <= policy.getMaxContentSize());

    }
View Full Code Here

    public final void testContentTypeHeader() throws Exception {
        FetcherPolicy policy = new FetcherPolicy();
        Server server = startServer(new ResourcesResponseHandler(), 8089);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        String url = "http://localhost:8089/simple-page.html";
        FetchedDatum result = fetcher.get(new ScoredUrlDatum(url));
        server.stop();
       
        String contentType = result.getHeaders().getFirst(HttpHeaderNames.CONTENT_TYPE);
        assertNotNull(contentType);
        assertEquals("text/html", contentType);
View Full Code Here

TOP

Related Classes of bixo.datum.ScoredUrlDatum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.