Package cascading.flow

Examples of cascading.flow.Flow.complete()


            Tap urlSink = _platform.makeTap(_platform.makeBinaryScheme(CrawlDbDatum.FIELDS), _destDirPath, SinkMode.REPLACE);

            FlowConnector flowConnector = _platform.makeFlowConnector();
            Flow flow = flowConnector.connect(urlSource, urlSink, importPipe);
            flow.complete();
        } catch (Exception e) {
            throw e;
        }
    }
View Full Code Here


                BasePath curLoopDirPath = CrawlDirUtils.makeLoopDir(platform, outputPath, curLoop);
                String curLoopDirName = curLoopDirPath.getName();
                setLoopLoggerFile(logsDir+curLoopDirName, curLoop);

                Flow flow = DemoCrawlWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, options);
                flow.complete();
               
                // Writing out .dot files is a good way to verify your flows.
//              flow.writeDOT("build/valid-flow.dot");

                // Update crawlDbPath to point to the latest crawl db
View Full Code Here

        resultsPipe = new Every(resultsPipe, new LatestUrlDatumBuffer(), Fields.RESULTS);


        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(sources, resultSink, resultsPipe);
        flow.complete();
       
        // verify that the resulting pipe has the latest tuple
       
        Tap testSink = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), resultsPath);
        TupleEntryIterator reader = testSink.openForRead(platform.makeFlowProcess());
View Full Code Here

            Tap sourceTap = platform.makeTap(platform.makeTextScheme(), inputPath);
            Tap sinkTap = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), crawlDBPath, SinkMode.REPLACE);
           
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(sourceTap, sinkTap, importPipe);
            flow.complete();
        }

        return crawlDBPath.getAbsolutePath();
    }
   
View Full Code Here

        BaseScoreGenerator scorer = new FixedScoreGenerator();
        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, 1);

        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
        flow.complete();
       
        // Test for all valid fetches.
        Tap validate = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath);
        TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
        while (tupleEntryIterator.hasNext()) {
View Full Code Here

        FetchPipe fetchPipe = new FetchPipe(pipe, scorer, fetcher, 1);

        FlowConnector flowConnector = platform.makeFlowConnector();

        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
        flow.complete();
       
        // Test for 10 good fetches.
        Tap validate = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath);
        TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
        int fetchedPages = 0;
View Full Code Here

            // Finally we can run it.
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(sourceTap, sinkTapMap, splitterPipe, mboxPageStatusPipe, analysisPipe);
            flow.writeDOT("build/goodFlow.dot");
            flow.complete();
        } catch (Throwable t) {
            System.err.println("Exception running AnalyzeEmail: " + t.getMessage());
            t.printStackTrace(System.err);
            System.exit(-1);
        }
View Full Code Here

        UserAgent userAgent = new UserAgent("test", "test@domain.com", "http://test.domain.com");
        Server server = null;
        try {
            server = startServer(new FakeWebSiteHandler(), 8089);
            Flow flow = DemoCrawlWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, options);
            flow.complete();

            // Update the crawlDb path
            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

            // Now we should have an output/1-<timestamp>/ directory, where the
View Full Code Here

            // Do it one more time, to verify status gets propagated forward.
            curLoopDirPath = CrawlDirUtils.makeLoopDir(platform, baseDirPath, 2);

            flow = DemoCrawlWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, options);
            flow.complete();
            // Update crawldb path
            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

            crawldbTap = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), crawlDbPath);
            iter = crawldbTap.openForRead(platform.makeFlowProcess());
View Full Code Here

            SinkTap sinkTap = new NullSinkTap(new Fields("url"));
           
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(sourceTap, sinkTap, pipe);

            flow.complete();
        } catch (Exception e) {
            System.err.println("Exception running tool: " + e.getMessage());
            e.printStackTrace(System.err);
            System.exit(-1);
        }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.