Examples of Every

cascading.pipe.Every
The Every operator applies an {@link Aggregator} or {@link Buffer} to every grouping.
Any number of Every instances may follow other Every, {@link GroupBy}, or {@link CoGroup} instances if they apply anAggregator, not a Buffer. If a Buffer, only one Every may follow a GroupBy or CoGroup.
Every operators create aggregate values for every grouping they encounter. This aggregate value is added to the current grouping Tuple.
In the case of a CoGroup, the grouping Tuple will be all the grouping keys from all joined streams, and if an "outer" type join is used, one value on the groupingTuple may be null.
Subsequent Every instances can continue to append values to the grouping Tuple. When an Each follows and Every, the Each applies its operation to the grouping Tuple (thus all child values in the grouping are discarded and only aggregate values are propagated).
org.jboss.seam.cron.api.scheduling.Every

Examples of cascading.pipe.Every

        
        // Group the finished datums, the skipped datums, status, outlinks
        Pipe updatePipe = new CoGroup("update pipe", Pipe.pipes(finishedDatumsFromDb, statusPipe, analyzerPipe, outlinksPipe), 
                        Fields.fields(new Fields(CrawlDbDatum.URL_FIELD), new Fields(StatusDatum.URL_FN), 
                                        new Fields(AnalyzedDatum.URL_FIELD), new Fields(LinkDatum.URL_FN)), null, new OuterJoin());
        updatePipe = new Every(updatePipe, new UpdateCrawlDbBuffer(), Fields.RESULTS);


        
        // output : loop dir specific crawldb
        BasePath outCrawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
        Tap crawlDbSink = platform.makeTap(platform.makeTextScheme(), outCrawlDbPath, SinkMode.REPLACE);

View Full Code Here

Examples of cascading.pipe.Every

    public FetchPipe(Pipe urlProvider, BaseScoreGenerator scorer, BaseFetcher fetcher, BaseFetcher robotsFetcher, BaseRobotsParser parser,
                    BaseFetchJobPolicy fetchJobPolicy, int numReducers) {
        super(urlProvider);
        Pipe robotsPipe = new Each(urlProvider, new GroupFunction(new GroupByDomain()));
        robotsPipe = new GroupBy("Grouping URLs by IP/delay", robotsPipe, GroupedUrlDatum.getGroupingField());
        robotsPipe = new Every(robotsPipe, new FilterAndScoreByUrlAndRobots(robotsFetcher, parser, scorer), Fields.RESULTS);
        
        // Split into records for URLs that are special (not fetchable) and regular
        SplitterAssembly splitter = new SplitterAssembly(robotsPipe, new SplitIntoSpecialAndRegularKeys());
        
        // Now generate sets of URLs to fetch. We'll wind up with all URLs for the same server & the same crawl delay,
        // ordered by score, getting passed per list to the PreFetchBuffer. This will generate PreFetchDatums that contain a key
        // based on the hash of the IP address (with a range of values == number of reducers), plus a list of URLs and a target
        // crawl time.
        Pipe prefetchPipe = new GroupBy("Distributing URL sets", splitter.getRHSPipe(), GroupedUrlDatum.getGroupingField(), ScoredUrlDatum.getSortingField(), true);
        
        prefetchPipe = new Every(prefetchPipe, new MakeFetchSetsBuffer(fetchJobPolicy, numReducers), Fields.RESULTS);
        Pipe fetchPipe = new GroupBy("Fetching URL sets", prefetchPipe, FetchSetDatum.getGroupingField(), FetchSetDatum.getSortingField());
        fetchPipe = new Every(fetchPipe, new FetchBuffer(fetcher), Fields.RESULTS);


        Pipe fetchedContent = new Pipe(CONTENT_PIPE_NAME, new Each(fetchPipe, new FilterErrorsFunction()));
        
        Pipe fetchedStatus = new Pipe("fetched status", new Each(fetchPipe, new MakeStatusFunction()));

View Full Code Here

Examples of cascading.pipe.Every

    stop words -- based on an R script
    */


    Pipe tokenPipe = new Pipe( "token", joinPipe ); // name branch
    tokenPipe = new GroupBy( tokenPipe, new Fields( "token" ) );
    tokenPipe = new Every( tokenPipe, Fields.ALL, new Count(), Fields.ALL );


    /*
    flow part #3
    generate an inverted index for ((uid1,uid2), token) to avoid having to perform
    a cross-product, which would impose a bottleneck in the parallelism
    */


    Pipe invertPipe = new Pipe( "inverted index", joinPipe );
    invertPipe = new CoGroup( invertPipe, new Fields( "token" ), 1, new Fields( "uid1", "ignore", "uid2", "token" ) );


    Fields filterArguments = new Fields( "uid1", "uid2" );
    String uidFilter = "uid1.compareToIgnoreCase( uid2 ) >= 0";
    invertPipe = new Each( invertPipe, filterArguments, new ExpressionFilter( uidFilter, String.class ) );
    Fields ignore = new Fields( "ignore" );
    invertPipe = new Discard( invertPipe, ignore );


    /*
    flow part #4
    count the number of tokens in common for each uid pair and apply a threshold
    */


    Pipe commonPipe = new GroupBy( new Pipe( "uid common", invertPipe ), new Fields( "uid1", "uid2" ) );
    commonPipe = new Every( commonPipe, Fields.ALL, new Count( new Fields( "common" ) ), Fields.ALL );


    String commonFilter = String.format( "common < %d", MIN_COMMON_TOKENS );
    commonPipe = new Each( commonPipe, new Fields( "common" ), new ExpressionFilter( commonFilter, Integer.TYPE ) );


    /*
    flow part #5
    count the number of tokens overall for each uid, then join to calculate
    the vector length for uid1
    */


    Fields tokenCount = new Fields( "token_count" );
    Pipe countPipe = new GroupBy( "count", joinPipe, new Fields( "uid" ) );
    countPipe = new Every( countPipe, Fields.ALL, new Count( tokenCount ), Fields.ALL );


    joinPipe = new CoGroup( countPipe, new Fields( "uid" ), commonPipe, new Fields( "uid1" ) );
    joinPipe = new Pipe( "common", joinPipe );
    joinPipe = new Discard( joinPipe, new Fields( "uid" ) );

View Full Code Here

Examples of cascading.pipe.Every

    if( isCategorical )
      buffer = new CategoricalSelectionBuffer( ensembleSpec );
    else
      buffer = new PredictionSelectionBuffer( ensembleSpec );


    pipe = new Every( pipe, predictedFields, buffer, Fields.SWAP );


    if( modelSchema.getKeyFields().isNone() )
      pipe = new Discard( pipe, keyFields );


    setTails( pipe );

View Full Code Here

Examples of cascading.pipe.Every


    // For every Tuple group
    // count the number of occurrences of "word" and store result in
    // a field named "count"
    Aggregator count = new Count(new Fields("resource"));
    pipeline = new Every(pipeline, count);




    // create a SINK tap to write to the default filesystem
    // by default, TextLine writes all fields out
    Tap remoteLogTap = new Hfs(new TextLine(), outputPath, SinkMode.REPLACE);

View Full Code Here

Examples of cascading.pipe.Every

    Pipe tsPipe = new Each( "arrival rate", new Fields( "time" ), dateParser, Fields.RESULTS );


    // name the per second assembly and split on tsPipe
    Pipe tsCountPipe = new Pipe( "tsCount", tsPipe );
    tsCountPipe = new GroupBy( tsCountPipe, new Fields( "ts" ) );
    tsCountPipe = new Every( tsCountPipe, Fields.GROUP, new Count() );


    // apply expression to create a timestamp with 'minute' granularity
    // declares field "tm"
    Pipe tmPipe = new Each( tsPipe, new ExpressionFunction( new Fields( "tm" ), "ts - (ts % (60 * 1000))", long.class ) );


    // name the per minute assembly and split on tmPipe
    Pipe tmCountPipe = new Pipe( "tmCount", tmPipe );
    tmCountPipe = new GroupBy( tmCountPipe, new Fields( "tm" ) );
    tmCountPipe = new Every( tmCountPipe, Fields.GROUP, new Count() );


    // create taps to write the results the default filesystem, using the given fields
    Tap tsSinkTap = new Hfs( new TextLine(), arrivalRateSecPath );
    Tap tmSinkTap = new Hfs( new TextLine(), arrivalRateMinPath );

View Full Code Here

Examples of cascading.pipe.Every

      RegexGenerator wordGenerator = new RegexGenerator( new Fields( "word" ), "(?<!\\pL)(?=\\pL)[^ ]*(?<=\\pL)(?!\\pL)" );
      pipe = new Each( pipe, new Fields( "words" ), wordGenerator, new Fields( "url", "word" ) );


      // group on "url"
      Pipe urlCountPipe = new GroupBy( sinkUrlName, pipe, new Fields( "url", "word" ) );
      urlCountPipe = new Every( urlCountPipe, new Fields( "url", "word" ), new Count(), new Fields( "url", "word", "count" ) );


      // group on "word"
      Pipe wordCountPipe = new GroupBy( sinkWordName, pipe, new Fields( "word" ) );
      wordCountPipe = new Every( wordCountPipe, new Fields( "word" ), new Count(), new Fields( "word", "count" ) );


      setTails( urlCountPipe, wordCountPipe );
      }

View Full Code Here

Examples of cascading.pipe.Every

    Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );


    // determine the word counts
    Pipe wcPipe = new Pipe( "wc", docPipe );
    wcPipe = new GroupBy( wcPipe, token );
    wcPipe = new Every( wcPipe, Fields.ALL, new Count(), Fields.ALL );


    // connect the taps, pipes, etc., into a flow
    FlowDef flowDef = FlowDef.flowDef()
     .setName( "wc" )
     .addSource( docPipe, docTap )

View Full Code Here

Examples of cascading.pipe.Every


    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );


    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new GroupBy( pipeLower, new Fields( "num" ) );
    pipeLower = new Every( pipeLower, new Fields( "char" ), new First(), Fields.ALL );


    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );
    pipeUpper = new GroupBy( pipeUpper, new Fields( "num" ) );
    pipeUpper = new Every( pipeUpper, new Fields( "char" ), new First(), Fields.ALL );


    Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) );


    Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );

View Full Code Here

Examples of cascading.pipe.Every

    Function splitter1 = new RegexSplitter( new Fields( "num1", "char1" ), " " );


    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter1 );
    pipeLower = new Each( pipeLower, new Insert( new Fields( "one", "two", "three", "four" ), "one", "two", "three", "four" ), Fields.ALL );
    pipeLower = new GroupBy( pipeLower, new Fields( "num1" ) );
    pipeLower = new Every( pipeLower, new Fields( "char1" ), new First(), Fields.ALL );


    Function splitter2 = new RegexSplitter( new Fields( "num2", "char2" ), " " );


    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter2 );
    pipeUpper = new GroupBy( pipeUpper, new Fields( "num2" ) );
    pipeUpper = new Every( pipeUpper, new Fields( "char2" ), new First(), Fields.ALL );


    Pipe splice = new CoGroup( pipeLower, new Fields( "num1" ), pipeUpper, new Fields( "num2" ) );


    Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.