Package cascading.flow

Examples of cascading.flow.Flow


        Tap statusSink = platform.makeTap(platform.makeBinaryScheme(StatusDatum.FIELDS), statusPath, SinkMode.REPLACE);
        BasePath contentPath = platform.makePath(outputPath, "content");
        Tap contentSink = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath, SinkMode.REPLACE);

        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(statusSink, contentSink), fetchPipe);
        flow.complete();
       
        Tap validate = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentPath);
        TupleEntryIterator tupleEntryIterator = validate.openForRead(platform.makeFlowProcess());
        Assert.assertTrue(tupleEntryIterator.hasNext());
        tupleEntryIterator.next();
View Full Code Here


            BasePath contentPath = platform.makePath(outputPath, "content");
            Tap content = platform.makeTap(platform.makeTextScheme(), contentPath, SinkMode.REPLACE);
           
            // Finally we can run it.
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
            flow.complete();
        } catch (Throwable t) {
            System.err.println("Exception running fake fetch pipe assembly: " + t.getMessage());
            t.printStackTrace(System.err);
            System.exit(-1);
        }
View Full Code Here

            }
        }

        write.close();
        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(in, out, parserPipe);
        flow.complete();
       
        // Currently many of the docs fail parsing:
        // http://webtools.uiuc.edu/calendar/RSS?calId=504
        // http://www.cs.uiuc.edu/rss/cs-news.rss
        // http://fsl.cs.uiuc.edu/opensearch_desc.php
View Full Code Here

            fetcherPolicy.setValidMimeTypes(validMimeTypes);

            // Let's limit our crawl to two loops
            for (int curLoop = 1; curLoop <= 2; curLoop++) {
                BasePath curLoopDirPath = CrawlDirUtils.makeLoopDir(platform, workingDirPath, curLoop);
                Flow flow = DemoWebMiningWorkflow.createWebMiningWorkflow(platform, crawlDbPath, curLoopDirPath, fetcherPolicy, userAgent, options);
                flow.complete();

                // Update crawlDbPath to point to the latest crawl db
                crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
            }
           
View Full Code Here

            Tap sinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputDirName), SinkMode.REPLACE);
           
            // Finally we can run it.
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(sourceTap, sinkTap, pipe);
            flow.complete();
        } catch (Throwable t) {
            System.err.println("Exception running AnalyzeMbox: " + t.getMessage());
            t.printStackTrace(System.err);
            System.exit(-1);
        }
View Full Code Here

        sinkMap.put(statusPipe.getName(), statusSink);
        sinkMap.put(contentPipe.getName(), contentSink);
        sinkMap.put(resultsPipe.getName(), resultsSink);

        FlowConnector flowConnector = platform.makeFlowConnector();
        Flow flow = flowConnector.connect(inputSource, sinkMap, updatePipe, statusPipe, contentPipe, resultsPipe);

        return flow;
    }
View Full Code Here

        FlowDef flowDef = new FlowDef()
            .setName("group-DRMs-by-key")
            .addSource(lhs, dRM1Source)
            .addSource(rhs, dRM2Source)
            .addTailSink(groupByItemIDPipe, groupedOutputSink);
        Flow flow = new HadoopFlowConnector().connect(flowDef);
        flow.complete();

        //todo: not sure if it matters but may need to rename the part files to .csv
    }
View Full Code Here

        FlowDef flowDef = new FlowDef()
            .setName("convert-to-CSV")
            .addSource(dRM1, dRM1Source)
            .addTailSink(dRM1, outputSink);
        Flow flow = new HadoopFlowConnector().connect(flowDef);
        flow.complete();

        //todo: not sure if it matters but may need to rename the part files to .csv
    }
View Full Code Here

        // Now read from the results, and write to a Solr index.
        Pipe writePipe = new Pipe("tuples to Solr");

        Tap solrSink = makeSolrSink(testFields, out);
        Flow flow = makeFlowConnector().connect(source, solrSink, writePipe);
        flow.complete();

        // Open up the Solr index, and do some searches.
        System.setProperty("solr.data.dir", out + "/part-00000");

        CoreContainer coreContainer = new CoreContainer(SOLR_HOME_DIR);
View Full Code Here

    flowDef.setDebugLevel( DebugLevel.VERBOSE );

    // set to AssertionLevel.STRICT for all assertions, or AssertionLevel.NONE in production
    flowDef.setAssertionLevel( AssertionLevel.STRICT );

    Flow similarityFlow = flowConnector.connect( flowDef );
    similarityFlow.writeDOT( "dot/similarity.dot" );
    similarityFlow.complete();
    }
View Full Code Here

TOP

Related Classes of cascading.flow.Flow

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.