Package cascading.tap

Examples of cascading.tap.MultiSourceTap$TupleIterator


            joinedOutFields = new Fields(iDFieldName, dRM1FieldName, dRM2FieldName);
        }
    }

    void joinDRMsWriteToSolr(Path iDIndexPath, Path itemIndexPath, Path dRM1InputPath , Path dRM2InputPath, Path groupedCSVOutputPath) throws IOException {
        MultiSourceTap dRM1Source = getTaps(dRM1InputPath, inFieldsDRM1);
        MultiSourceTap dRM2Source = getTaps(dRM2InputPath, inFieldsDRM2);

        Pipe lhs = new Pipe("DRM1");
        Pipe rhs = new Pipe("DRM2");
        Pipe groupByItemIDPipe = new CoGroup(lhs, common, rhs, common, grouped, new InnerJoin());
        groupByItemIDPipe = new Each(groupByItemIDPipe, new VectorsToCSVFunction(joinedOutFields));
View Full Code Here


        //todo: not sure if it matters but may need to rename the part files to .csv
    }

    void writeDRMToSolr(Path iDIndexPath, Path itemIndexPath, Path dRM1InputPath, Path cSVOutputPath) throws IOException {
        MultiSourceTap dRM1Source = getTaps(dRM1InputPath, inFieldsDRM1);

        Pipe dRM1 = new Pipe("DRM1");
        dRM1 = new Each(dRM1, new VectorsToCSVFunction(simpleOutFields));
        //the DRM (Mahout Distributed Row Matrix) has row and items indexes the two dictionary BiHashMaps
        //pass these to the output function so the strings from the indexes can be written instead of the
View Full Code Here

                }
            }
        }
        Tap[] sourceTaps = all.toArray(new Tap[all.size()]);
        if(sourceTaps.length == 0 ) throw new IOException("No Source files found at: "+p.toString());
        MultiSourceTap source = new MultiSourceTap(sourceTaps);

        return source;
    }
View Full Code Here

    {
    Pipe pipe = new Pipe( "multitap" );

    pipe = new Each( pipe, new Identity() );

    Tap source = new MultiSourceTap( sources );
    Tap sink = getPlatform().getTextFile( getOutputPath( path + "/multitap" ), SinkMode.REPLACE );

    return getPlatform().getFlowConnector().connect( source, sink, pipe );
    }
View Full Code Here

    getPlatform().copyFromLocal( inputFileUpper );

    Tap sourceLower = getPlatform().getTextFile( inputFileLower );
    Tap sourceUpper = getPlatform().getTextFile( inputFileUpper );

    Tap source = new MultiSourceTap( sourceLower, sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Tap sink = getPlatform().getTextFile( getOutputPath( "complexconcat" ), SinkMode.REPLACE );
View Full Code Here

      {
      LOG.info( "could not find files in distributed cache. delegating to parent: {}", super.getIdentifier() );
      return super.openForRead( flowProcess, input );
      }

    return new MultiSourceTap( taps.toArray( new Tap[ taps.size() ] ) ).openForRead( flowProcess, input );
    }
View Full Code Here

    getPlatform().copyFromLocal( inputFileUpper );

    GlobHfs source1 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r]}.txt" );
    GlobHfs source2 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{owe?}.txt" );

    MultiSourceTap source = new MultiSourceTap( source1, source2 );

    assertEquals( 2, source.getNumChildTaps() );

    // using null pos so all fields are written
    Tap sink = new Hfs( new TextLine(), getOutputPath( "globmultisource" ), SinkMode.REPLACE );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), "\\s" );
View Full Code Here

    getPlatform().copyFromLocal( inputFileUpper );

    GlobHfs source1 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r]}.txt" );
    GlobHfs source2 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{owe?}.txt" );

    MultiSourceTap source = new MultiSourceTap( source1, source2 );

    validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10 );

    GlobHfs sourceMulti = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r],owe?}.txt" );

    source = new MultiSourceTap( sourceMulti );

    validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10, null );
    }
View Full Code Here

TOP

Related Classes of cascading.tap.MultiSourceTap$TupleIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.