Package com.scaleunlimited.cascading

Examples of com.scaleunlimited.cascading.BasePath


        BaseScoreGenerator scorer = new FixedScoreGenerator();
        BaseFetcher fetcher = new SimpleHttpFetcher(ConfigUtils.BIXO_TEST_AGENT);
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, 1);
       
        String output = "build/test/FetchPipeTest/testFetchPipe";
        BasePath outputPath = platform.makePath(output);
        BasePath statusPath = platform.makePath(outputPath, "status");
        BasePath contentPath = platform.makePath(outputPath, "content");
        Tap status = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath, SinkMode.REPLACE);
        Tap content = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath, SinkMode.REPLACE);

        // Finally we can run it.
        FlowConnector flowConnector = platform.makeFlowConnector();
View Full Code Here


        policy.setRedirectMode(RedirectMode.FOLLOW_TEMP);
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, 1);
       
        String output = "build/test/FetchPipeTest/testRedirectException";
        BasePath outputPath = platform.makePath(output);
        BasePath statusPath = platform.makePath(outputPath, "status");
        BasePath contentPath = platform.makePath(outputPath, "content");
        Tap status = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath, SinkMode.REPLACE);
        Tap content = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath, SinkMode.REPLACE);

        // Finally we can run it.
        FlowConnector flowConnector = platform.makeFlowConnector();
View Full Code Here

        policy.setRequestTimeout(10);
       
        BaseFetcher fetcher = new SimpleHttpFetcher(1, policy, ConfigUtils.BIXO_TEST_AGENT);
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, 1);
       
        BasePath outputPath = makeOutputPath(platform, "testTerminatingFetchPipe");
        BasePath statusPath = platform.makePath(outputPath, "status");
        Tap status = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath, SinkMode.REPLACE);

        // Finally we can run it.
        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(in, status, fetchPipe.getStatusTailPipe());
View Full Code Here

        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy();
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
       
        String output = "build/test/FetchPipeTest/dual";
        BasePath outputPath = platform.makePath(output);
        BasePath statusPath = platform.makePath(outputPath, "status");
        Tap status = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath, SinkMode.REPLACE);
        BasePath contentPath = platform.makePath(outputPath, "content");
        Tap content = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath, SinkMode.REPLACE);

        // Finally we can run it.
        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
View Full Code Here

        BaseScoreGenerator scorer = new SkippedScoreGenerator();
        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy();
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);
       
        BasePath outputPath = makeOutputPath(platform, "testSkippingURLsByScore");
        BasePath contentPath = platform.makePath(outputPath, "content");
        Tap content = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath, SinkMode.REPLACE);
       
        // Finally we can run it.
        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(null, content), fetchPipe);
View Full Code Here

        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy(defaultPolicy);
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);

        // Create the output
        BasePath outputPath = makeOutputPath(platform, "testDurationLimitSimple");
        BasePath statusPath = platform.makePath(outputPath, "status");
        Tap statusSink = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath, SinkMode.REPLACE);
        BasePath contentPath = platform.makePath(outputPath, "content");
        Tap contentSink = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath, SinkMode.REPLACE);

        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(statusSink, contentSink), fetchPipe);
        flow.complete();
View Full Code Here

        BaseRobotsParser parser = new SimpleRobotRulesParser();
        BaseFetchJobPolicy fetchJobPolicy = new DefaultFetchJobPolicy(defaultPolicy.getMaxRequestsPerConnection(), maxUrls, BaseFetchJobPolicy.DEFAULT_CRAWL_DELAY);
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, fetcher, parser, fetchJobPolicy, 1);

        // Create the output
        BasePath outputPath = makeOutputPath(platform, "testMaxUrlsPerServer");
        BasePath statusPath = platform.makePath(outputPath, "status");
        Tap statusSink = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath, SinkMode.REPLACE);
        BasePath contentPath = platform.makePath(outputPath, "content");
        Tap contentSink = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath, SinkMode.REPLACE);

        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(statusSink, contentSink), fetchPipe);
        flow.complete();
View Full Code Here

        return makeInputData(platform, testname, numDomains, numPages, null);
    }
   
    private Tap makeInputData(BasePlatform platform, String testname, int numDomains, int numPages, Payload payload) throws Exception {
        String platformName = platform.getClass().getSimpleName();
        BasePath defaultPath = platform.makePath(BASE_INPUT_PATH + testname + "/" + platformName + "/in");
        Tap in = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), defaultPath, SinkMode.REPLACE);
        TupleEntryCollector write = in.openForWrite(platform.makeFlowProcess());
        for (int i = 0; i < numDomains; i++) {
            for (int j = 0; j < numPages; j++) {
                // Use special domain name pattern so code deep inside of operations "knows" not
View Full Code Here

        return in;
    }
   
    private Tap makeInputData(BasePlatform platform, String testname, String domain, int numPages, Payload payload) throws Exception {
        String platformName = platform.getClass().getSimpleName();
        BasePath defaultPath = platform.makePath(BASE_INPUT_PATH + testname + "/" + platformName + "/in");
        Tap in = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), defaultPath, SinkMode.REPLACE);
        TupleEntryCollector write = in.openForWrite(platform.makeFlowProcess());
        for (int j = 0; j < numPages; j++) {
            write.add(makeTuple(domain, j, payload));
        }
View Full Code Here

                System.exit(-1);
            }

            BixoPlatform platform = new BixoPlatform(RunFakeFetchPipe.class, Platform.Local);
           
            BasePath inputPath = platform.makePath(path.getFile());
            Tap in = platform.makeTap(platform.makeTextScheme(), inputPath);

            Pipe importPipe = new Each("url importer", new Fields("line"), new CreateUrlFunction());

            BaseScoreGenerator scorer = new FixedScoreGenerator();
            BaseFetcher fetcher = new FakeHttpFetcher(true, 10);
            FetchPipe fetchPipe = new FetchPipe(importPipe, scorer, fetcher, 1);

            // Create the output, which is a dual file sink tap.
            String output = "build/test/RunFakeFetchPipe/dual";
            BasePath outputPath = platform.makePath(output);
            BasePath statusPath = platform.makePath(outputPath, "status");
            Tap status = platform.makeTap(platform.makeTextScheme(), statusPath, SinkMode.REPLACE);

            BasePath contentPath = platform.makePath(outputPath, "content");
            Tap content = platform.makeTap(platform.makeTextScheme(), contentPath, SinkMode.REPLACE);
           
            // Finally we can run it.
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
View Full Code Here

TOP

Related Classes of com.scaleunlimited.cascading.BasePath

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.