Package cascading.pipe

Examples of cascading.pipe.Pipe


       
        Payload payload = new Payload();
        payload.put("payload-field-1", 1);
        Tap in = makeInputData(platform, "testRedirectException", "localhost:" + port, numPages, payload);

        Pipe pipe = new Pipe("urlSource");
        BaseScoreGenerator scorer = new FixedScoreGenerator();
        FetcherPolicy policy = new FetcherPolicy();
        policy.setRedirectMode(RedirectMode.FOLLOW_TEMP);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, 1);
View Full Code Here


        final int numPages = 10;
        final int port = 8089;
       
        Tap in = makeInputData(platform, "testTerminatingFetchPipe", "localhost:" + port, numPages, null);

        Pipe pipe = new Pipe("urlSource");
        BaseScoreGenerator scorer = new FixedScoreGenerator();
       
        FetcherPolicy policy = new FetcherPolicy();
        policy.setCrawlEndTime(System.currentTimeMillis() + 50000);
        // Assume we should only need 10ms for fetching all 10 URLs.
View Full Code Here

    protected void testPayloads(BixoPlatform platform) throws Exception {
        Payload payload = new Payload();
        payload.put("key", "value");
        Tap in = makeInputData(platform, "testPayloads", 1, 1, payload);

        Pipe pipe = new Pipe("urlSource");
        BaseFetcher fetcher = new FakeHttpFetcher(false, 10);
        BaseScoreGenerator scorer = new FixedScoreGenerator();
        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy();
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
View Full Code Here

   
    protected void testSkippingURLsByScore(BixoPlatform platform) throws Exception {
        // Create four pages, for domain0/page0, domain0/page1, domain1/page0, domain1/page1
        Tap in = makeInputData(platform, "testSkippingURLsByScore", 2, 2);

        Pipe pipe = new Pipe("urlSource");
        BaseFetcher fetcher = new FakeHttpFetcher(false, 1);
        BaseScoreGenerator scorer = new SkippedScoreGenerator();
        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy();
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
View Full Code Here

    protected void testDurationLimitSimple(BixoPlatform platform) throws Exception {
        // Pretend like we have 10 URLs from the same domain
        Tap in = makeInputData(platform, "testDurationLimitSimple", 1, 10);

        // Create the fetch pipe we'll use to process these fake URLs
        Pipe pipe = new Pipe("urlSource");
       
        // This will force all URLs to get skipped because of the crawl end time limit.
        FetcherPolicy defaultPolicy = new FetcherPolicy();
        defaultPolicy.setCrawlEndTime(0);
        BaseFetcher fetcher = new FakeHttpFetcher(false, 1, defaultPolicy);
View Full Code Here

        // Pretend like we have 2 URLs from the same domain
        final int sourceUrls = 2;
        Tap in = makeInputData(platform, "testMaxUrlsPerServer", 1, sourceUrls);

        // Create the fetch pipe we'll use to process these fake URLs
        Pipe pipe = new Pipe("urlSource");
       
        // This will limit us to one URL.
        final int maxUrls = 1;
        FetcherPolicy defaultPolicy = new FetcherPolicy();
        BaseFetcher fetcher = new FakeHttpFetcher(false, 1, defaultPolicy);
View Full Code Here

            BixoPlatform platform = new BixoPlatform(RunFakeFetchPipe.class, Platform.Local);
           
            BasePath inputPath = platform.makePath(path.getFile());
            Tap in = platform.makeTap(platform.makeTextScheme(), inputPath);

            Pipe importPipe = new Each("url importer", new Fields("line"), new CreateUrlFunction());

            BaseScoreGenerator scorer = new FixedScoreGenerator();
            BaseFetcher fetcher = new FakeHttpFetcher(true, 10);
            FetchPipe fetchPipe = new FetchPipe(importPipe, scorer, fetcher, 1);
View Full Code Here

    public void testParserPipe() throws Exception {

        BixoPlatform platform = new BixoPlatform(ParsePipeTest.class, Platform.Local);
       

        Pipe pipe = new Pipe("parse_source");
        ParsePipe parserPipe = new ParsePipe(pipe, new SimpleParser());
        BasePath inputPath = platform.makePath("build/test/ParserPipeTest/in");
        Tap in = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), inputPath);
        BasePath outputPath = platform.makePath("build/test/ParserPipeTest/out");
        Tap out = platform.makeTap(platform.makeBinaryScheme(ParsedDatum.FIELDS), outputPath, SinkMode.REPLACE);
View Full Code Here

    @SuppressWarnings("serial")
  private static class SplitEmails extends SubAssembly {

    public SplitEmails(FetchPipe fetchPipe) {
            Pipe splitPipe = new Pipe(SPLITTER_PIPE_NAME, fetchPipe.getContentTailPipe());
            splitPipe = new Each(splitPipe, new MboxSplitterFunction());
            // TODO KKr - code currently relies on splitPipe being first tail pipe.
            setTails(splitPipe, fetchPipe.getStatusTailPipe());
      }
View Full Code Here

          // that the file already has the results of splitting the mbox file into emails.
            BasePath inputPath = platform.makePath(inputFileName);
            platform.assertPathExists(inputPath, "input file");
            Tap sourceTap = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), inputPath);
           
            Pipe pipe = new Pipe("Email Analyzer");
            pipe = new Each(pipe, new ParseEmailFunction());
           
            // We'll get output that has ANALYZED_EMAIL_FIELDS in it. We want to group by
            // the message-id field, and then do an aggregation on that of the scores.
            pipe = new GroupBy(pipe, new Fields(FieldNames.MESSAGE_ID));
View Full Code Here

TOP

Related Classes of cascading.pipe.Pipe

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.