Examples of cascading.operation.regex.RegexSplitter

cascading.operation.regex.RegexSplitter
Class RegexSplitter will split an incoming argument value by the given regex delimiter patternString.


    // CREATE NEW TABLE FROM SOURCE


    Tap source = new Lfs( new TextLine(), inputFile );


    Pipe parsePipe = new Each( "insert", new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ), "\\s" ) );


    String url = "jdbc:hsqldb:hsql://localhost/testing";
    String driver = "org.hsqldb.jdbcDriver";
    String tableName = "testingtable";
    String[] columnNames = {"num", "lower", "upper"};

View Full Code Here

    // CREATE NEW TABLE FROM SOURCE


    Tap source = new Lfs( new TextLine(), inputFile );


    Fields columnFields = new Fields( "num", "lower", "upper" );
    Pipe parsePipe = new Each( "insert", new Fields( "line" ), new RegexSplitter( columnFields, "\\s" ) );


    String url = "jdbc:hsqldb:hsql://localhost/testing";
    String driver = "org.hsqldb.jdbcDriver";
    String tableName = "testingtablealias";
    String[] columnNames = {"db_num", "db_lower", "db_upper"};

View Full Code Here

  private static class ImportCrawlDataAssembly extends SubAssembly
    {
    public ImportCrawlDataAssembly( String name )
      {
      // split the text line into "url" and "raw" with the default delimiter of tab
      RegexSplitter regexSplitter = new RegexSplitter( new Fields( "url", "raw" ) );
      Pipe importPipe = new Each( name, new Fields( "line" ), regexSplitter );
      // remove all pdf documents from the stream
      importPipe = new Each( importPipe, new Fields( "url" ), new RegexFilter( ".*\\.pdf$", true ) );
      // replace ":nl" with a new line, return the fields "url" and "page" to the stream.
      // discared the other fields in the stream

View Full Code Here

    sources.put( "lhs", getPlatform().getTextFile( inputFileLhs ) );
    sources.put( "rhs", getPlatform().getTextFile( inputFileRhs ) );


    Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cross" ), SinkMode.REPLACE );


    Pipe pipeLower = new Each( "lhs", new Fields( "line" ), new RegexSplitter( new Fields( "numLHS", "charLHS" ), " " ) );
    Pipe pipeUpper = new Each( "rhs", new Fields( "line" ), new RegexSplitter( new Fields( "numRHS", "charRHS" ), " " ) );


    Pipe cross = new CoGroup( pipeLower, new Fields( "numLHS" ), pipeUpper, new Fields( "numRHS" ), new InnerJoin() );


    Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cross );

View Full Code Here

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );


    Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cogroup" ), SinkMode.REPLACE );


    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );


    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );


    Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new InnerJoin( Fields.size( 4 ) ) );

View Full Code Here

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );


    Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "renamedpipes" ), SinkMode.REPLACE );


    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );


    Pipe pipeLower = new Pipe( "lower" );
    Pipe pipeUpper = new Pipe( "upper" );


    // these pipes will hide the source name, and could cause one to be lost

View Full Code Here

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );


    Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "unknown" ), SinkMode.REPLACE );


    Function splitter = new RegexSplitter( Fields.UNKNOWN, " " );


    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );


    Pipe splice = new CoGroup( pipeLower, new Fields( 0 ), pipeUpper, new Fields( 0 ), Fields.size( 4 ) );

View Full Code Here

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );


    Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cogroupfilteredbranch" ), SinkMode.REPLACE );


    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );


    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );
    pipeUpper = new Each( pipeUpper, new Fields( "num" ), new RegexFilter( "^fobar" ) ); // intentionally filtering all
    pipeUpper = new GroupBy( pipeUpper, new Fields( "num" ) );

View Full Code Here

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );


    Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cogroupself" ), SinkMode.REPLACE );


    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );


    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );


    Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) );

View Full Code Here

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );


    Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "afterevery" ), SinkMode.REPLACE );


    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );


    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new GroupBy( pipeLower, new Fields( "num" ) );
    pipeLower = new Every( pipeLower, new Fields( "char" ), new First(), Fields.ALL );

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of cascading.operation.regex.RegexSplitter

cascading.BasicPipesPlatformTest

cascading.BufferPipesPlatformTest

cascading.cascade.CascadePlatformTest

cascading.cascade.hadoop.RiffleCascadePlatformTest

cascading.cascade.ParallelCascadePlatformTest

cascading.CoGroupFieldedPipesPlatformTest

cascading.DistanceUseCasePlatformTest

cascading.FieldedPipesPlatformTest

cascading.flow.hadoop.BuildJobsHadoopPlatformTest

cascading.flow.hadoop.FlowPlatformTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.