Package cascading.flow

Examples of cascading.flow.Flow


    // set the current job jar
    Properties properties = new Properties();
    FlowConnector.setApplicationJarClass(properties, PopularLogResources.class);

    // connect the assembly to the SOURCE and SINK taps
    Flow parsedLogFlow = new FlowConnector(properties).connect(logTap, remoteLogTap, pipeline);

    // start execution of the flow (either locally or on the cluster
    parsedLogFlow.start();

    // block until the flow completes
    parsedLogFlow.complete();
  }
View Full Code Here


      inputPath.matches( "^[^:]+://.*" ) ? new Hfs( new TextLine(), inputPath ) : new Lfs( new TextLine(), inputPath );
    // create a tap to read/write from the default filesystem
    Tap parsedLogTap = new Hfs( apacheFields, logsPath );

    // connect the assembly to source and sink taps
    Flow importLogFlow = flowConnector.connect( logTap, parsedLogTap, importPipe );

    // create an assembly to parse out the time field into a timestamp
    // then count the number of requests per second and per minute

    // apply a text parser to create a timestamp with 'second' granularity
    // declares field "ts"
    DateParser dateParser = new DateParser( new Fields( "ts" ), "dd/MMM/yyyy:HH:mm:ss Z" );
    Pipe tsPipe = new Each( "arrival rate", new Fields( "time" ), dateParser, Fields.RESULTS );

    // name the per second assembly and split on tsPipe
    Pipe tsCountPipe = new Pipe( "tsCount", tsPipe );
    tsCountPipe = new GroupBy( tsCountPipe, new Fields( "ts" ) );
    tsCountPipe = new Every( tsCountPipe, Fields.GROUP, new Count() );

    // apply expression to create a timestamp with 'minute' granularity
    // declares field "tm"
    Pipe tmPipe = new Each( tsPipe, new ExpressionFunction( new Fields( "tm" ), "ts - (ts % (60 * 1000))", long.class ) );

    // name the per minute assembly and split on tmPipe
    Pipe tmCountPipe = new Pipe( "tmCount", tmPipe );
    tmCountPipe = new GroupBy( tmCountPipe, new Fields( "tm" ) );
    tmCountPipe = new Every( tmCountPipe, Fields.GROUP, new Count() );

    // create taps to write the results the default filesystem, using the given fields
    Tap tsSinkTap = new Hfs( new TextLine(), arrivalRateSecPath );
    Tap tmSinkTap = new Hfs( new TextLine(), arrivalRateMinPath );

    // a convenience method for binding taps and pipes, order is significant
    Map<String, Tap> sinks = Cascades.tapsMap( Pipe.pipes( tsCountPipe, tmCountPipe ), Tap.taps( tsSinkTap, tmSinkTap ) );

    // connect the assembly to the source and sink taps
    Flow arrivalRateFlow = flowConnector.connect( parsedLogTap, sinks, tsCountPipe, tmCountPipe );

    // optionally print out the arrivalRateFlow to a graph file for import into a graphics package
    //arrivalRateFlow.writeDOT( "arrivalrate.dot" );

    // connect the flows by their dependencies, order is not significant
View Full Code Here

    // create the tap instances
    Tap localPagesSource = new Lfs( new TextLine(), inputPath );
    Tap importedPages = new Hfs( new SequenceFile( new Fields( "url", "page" ) ), pagesPath );

    // connect the pipe assembly to the tap instances
    Flow importPagesFlow = flowConnector.connect( "import pages", localPagesSource, importedPages, importPipe );

    // a predefined pipe assembly that splits the stream into two named "url pipe" and "word pipe"
    // these pipes could be retrieved via the getTails() method and added to new pipe instances
    SubAssembly wordCountPipe = new WordCountSplitAssembly( "wordcount pipe", "url pipe", "word pipe" );

    // create Hadoop sequence files to store the results of the counts
    Tap sinkUrl = new Hfs( new SequenceFile( new Fields( "url", "word", "count" ) ), urlsPath );
    Tap sinkWord = new Hfs( new SequenceFile( new Fields( "word", "count" ) ), wordsPath );

    // convenience method to bind multiple pipes and taps
    Map<String, Tap> sinks = Cascades.tapsMap( new String[]{"url pipe", "word pipe"}, Tap.taps( sinkUrl, sinkWord ) );

    // wordCountPipe will be recognized as an assembly and handled appropriately
    Flow count = flowConnector.connect( importedPages, sinks, wordCountPipe );

    // create an assembly to export the Hadoop sequence file to local text files
    Pipe exportPipe = new Each( "export pipe", new Identity() );

    Tap localSinkUrl = new Lfs( new TextLine(), localUrlsPath );
    Tap localSinkWord = new Lfs( new TextLine(), localWordsPath );

    // connect up both sinks using the same exportPipe assembly
    Flow exportFromUrl = flowConnector.connect( "export url", sinkUrl, localSinkUrl, exportPipe );
    Flow exportFromWord = flowConnector.connect( "export word", sinkWord, localSinkWord, exportPipe );

    // connect up all the flows, order is not significant
    Cascade cascade = new CascadeConnector().connect( importPagesFlow, count, exportFromUrl, exportFromWord );

    // run the cascade to completion
View Full Code Here

    // set the current job jar
    Properties properties = new Properties();
    FlowConnector.setApplicationJarClass( properties, Main.class );

    // connect the assembly to the SOURCE and SINK taps
    Flow parsedLogFlow = new FlowConnector( properties ).connect( logTap, remoteLogTap, importPipe );

    // optionally print out the parsedLogFlow to a DOT file for import into a graphics package
    // parsedLogFlow.writeDOT( "logparser.dot" );

    // start execution of the flow (either locally or on the cluster
    parsedLogFlow.start();

    // block until the flow completes
    parsedLogFlow.complete();
    }
View Full Code Here

      }
    }

    FlowConnector.setApplicationJarClass(properties, Main.class);
    FlowConnector flowConnector = new FlowConnector(properties);
    Flow flow = flowConnector.connect(sources, sinks, tails);
    if ("hadoop".equals(runningMode)) {
      try {
        flow.addListener(tempDir);
      } catch (Exception e) {
        e.printStackTrace();
      }
    } else {
      try {
        flow.addListener(new FlowListener() {

          @Override
          public void onStarting(Flow flow) {
          }

          @Override
          public void onStopping(Flow flow) {
          }

          @Override
          public void onCompleted(Flow flow) {
          }

          @Override
          public boolean onThrowable(Flow flow, Throwable throwable) {
            throwable.printStackTrace();
            return false;
          }
        });
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    flow.complete();
  }
View Full Code Here

     .addSource( logsPipe, logsTap )
     .addTailSink( recoPipe, recoTap )
    ;

    // write a DOT file and run the flow
    Flow copaFlow = flowConnector.connect( flowDef );
    copaFlow.writeDOT( "dot/copa.dot" );
    copaFlow.complete();
    }
View Full Code Here

   HadoopFlowConnector flowConnector = new HadoopFlowConnector( properties );

   FlowDef flowDef = createFlowDef(docPath, wcPath);

   // write a DOT file and run the flow
   Flow wcFlow = flowConnector.connect( flowDef );
   wcFlow.writeDOT( "dot/wc.dot" );
   wcFlow.complete();
   }
View Full Code Here

    Pipe pipeLower = new Each( "lhs", new Fields( "line" ), new RegexSplitter( new Fields( "numLHS", "charLHS" ), " " ) );
    Pipe pipeUpper = new Each( "rhs", new Fields( "line" ), new RegexSplitter( new Fields( "numRHS", "charRHS" ), " " ) );

    Pipe cross = new CoGroup( pipeLower, new Fields( "numLHS" ), pipeUpper, new Fields( "numRHS" ), new InnerJoin() );

    Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cross );

    flow.complete();

    validateLength( flow, 37, null );

    List<Tuple> values = getSinkAsList( flow );
View Full Code Here

    Map<Object, Object> properties = getProperties();

    // make sure hasher is getting called, but does nothing special
    FlowProps.setDefaultTupleElementComparator( properties, getPlatform().getStringComparator( false ).getClass().getCanonicalName() );

    Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice );

    flow.complete();

    validateLength( flow, 5 );

    List<Tuple> values = getSinkAsList( flow );
View Full Code Here

//    splice = new Each( splice, new Debug( true ) );
    splice = new Pipe( "splice", splice );
    splice = new Pipe( "tail", splice );

    Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice );

    flow.complete();

    validateLength( flow, 5 );

    List<Tuple> values = getSinkAsList( flow );
View Full Code Here

TOP

Related Classes of cascading.flow.Flow

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.