Examples of Lfs


Examples of cascading.tap.Lfs

  public void testJDBC() throws IOException
    {

    // CREATE NEW TABLE FROM SOURCE

    Tap source = new Lfs( new TextLine(), inputFile );

    Pipe parsePipe = new Each( "insert", new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ), "\\s" ) );

    String url = "jdbc:hsqldb:hsql://localhost/testing";
    String driver = "org.hsqldb.jdbcDriver";
    String tableName = "testingtable";
    String[] columnNames = {"num", "lower", "upper"};
    String[] columnDefs = {"VARCHAR(100) NOT NULL", "VARCHAR(100) NOT NULL", "VARCHAR(100) NOT NULL"};
    String[] primaryKeys = {"num", "lower"};
    TableDesc tableDesc = new TableDesc( tableName, columnNames, columnDefs, primaryKeys );

    Tap replaceTap = new JDBCTap( url, driver, tableDesc, new JDBCScheme( columnNames ), SinkMode.REPLACE );

    Flow parseFlow = new FlowConnector( getProperties() ).connect( source, replaceTap, parsePipe );

    parseFlow.complete();

    verifySink( parseFlow, 13 );

    // READ DATA FROM TABLE INTO TEXT FILE

    // create flow to read from hbase and save to local file
    Tap sink = new Lfs( new TextLine(), "build/test/jdbc", SinkMode.REPLACE );

    Pipe copyPipe = new Each( "read", new Identity() );

    Flow copyFlow = new FlowConnector( getProperties() ).connect( replaceTap, sink, copyPipe );
View Full Code Here

Examples of cascading.tap.Lfs

  public void testJDBCAliased() throws IOException
    {

    // CREATE NEW TABLE FROM SOURCE

    Tap source = new Lfs( new TextLine(), inputFile );

    Fields columnFields = new Fields( "num", "lower", "upper" );
    Pipe parsePipe = new Each( "insert", new Fields( "line" ), new RegexSplitter( columnFields, "\\s" ) );

    String url = "jdbc:hsqldb:hsql://localhost/testing";
    String driver = "org.hsqldb.jdbcDriver";
    String tableName = "testingtablealias";
    String[] columnNames = {"db_num", "db_lower", "db_upper"};
    String[] columnDefs = {"VARCHAR(100) NOT NULL", "VARCHAR(100) NOT NULL", "VARCHAR(100) NOT NULL"};
    String[] primaryKeys = {"db_num", "db_lower"};
    TableDesc tableDesc = new TableDesc( tableName, columnNames, columnDefs, primaryKeys );

    Tap replaceTap = new JDBCTap( url, driver, tableDesc, new JDBCScheme( columnFields, columnNames ), SinkMode.REPLACE );

    Flow parseFlow = new FlowConnector( getProperties() ).connect( source, replaceTap, parsePipe );

    parseFlow.complete();

    verifySink( parseFlow, 13 );

    // READ DATA FROM TABLE INTO TEXT FILE

    // create flow to read from hbase and save to local file
    Tap sink = new Lfs( new TextLine(), "build/test/jdbc", SinkMode.REPLACE );

    Pipe copyPipe = new Each( "read", new Identity() );

    Flow copyFlow = new FlowConnector( getProperties() ).connect( replaceTap, sink, copyPipe );
View Full Code Here

Examples of cascading.tap.Lfs

    Pipe importPipe = new Each( "import", new Fields( "line" ), parser );

    // create tap to read a resource from the local file system, if not an url for an external resource
    // Lfs allows for relative paths
    Tap logTap =
      inputPath.matches( "^[^:]+://.*" ) ? new Hfs( new TextLine(), inputPath ) : new Lfs( new TextLine(), inputPath );
    // create a tap to read/write from the default filesystem
    Tap parsedLogTap = new Hfs( apacheFields, logsPath );

    // connect the assembly to source and sink taps
    Flow importLogFlow = flowConnector.connect( logTap, parsedLogTap, importPipe );
View Full Code Here

Examples of cascading.tap.Lfs

    // a predefined pipe assembly that returns fields named "url" and "page"
    Pipe importPipe = new ImportCrawlDataAssembly( "import pipe" );

    // create the tap instances
    Tap localPagesSource = new Lfs( new TextLine(), inputPath );
    Tap importedPages = new Hfs( new SequenceFile( new Fields( "url", "page" ) ), pagesPath );

    // connect the pipe assembly to the tap instances
    Flow importPagesFlow = flowConnector.connect( "import pages", localPagesSource, importedPages, importPipe );

    // a predefined pipe assembly that splits the stream into two named "url pipe" and "word pipe"
    // these pipes could be retrieved via the getTails() method and added to new pipe instances
    SubAssembly wordCountPipe = new WordCountSplitAssembly( "wordcount pipe", "url pipe", "word pipe" );

    // create Hadoop sequence files to store the results of the counts
    Tap sinkUrl = new Hfs( new SequenceFile( new Fields( "url", "word", "count" ) ), urlsPath );
    Tap sinkWord = new Hfs( new SequenceFile( new Fields( "word", "count" ) ), wordsPath );

    // convenience method to bind multiple pipes and taps
    Map<String, Tap> sinks = Cascades.tapsMap( new String[]{"url pipe", "word pipe"}, Tap.taps( sinkUrl, sinkWord ) );

    // wordCountPipe will be recognized as an assembly and handled appropriately
    Flow count = flowConnector.connect( importedPages, sinks, wordCountPipe );

    // create an assembly to export the Hadoop sequence file to local text files
    Pipe exportPipe = new Each( "export pipe", new Identity() );

    Tap localSinkUrl = new Lfs( new TextLine(), localUrlsPath );
    Tap localSinkWord = new Lfs( new TextLine(), localWordsPath );

    // connect up both sinks using the same exportPipe assembly
    Flow exportFromUrl = flowConnector.connect( "export url", sinkUrl, localSinkUrl, exportPipe );
    Flow exportFromWord = flowConnector.connect( "export word", sinkWord, localSinkWord, exportPipe );
View Full Code Here

Examples of cascading.tap.Lfs

    // define what the input file looks like, "offset" is bytes from beginning
    TextLine scheme = new TextLine( new Fields( "offset", "line" ) );

    // create SOURCE tap to read a resource from the local file system, if input is not an URL
    Tap logTap = inputPath.matches( "^[^:]+://.*" ) ? new Hfs( scheme, inputPath ) : new Lfs( scheme, inputPath );

    // create an assembly to parse an Apache log file and store on an HDFS cluster

    // declare the field names we will parse out of the log file
    Fields apacheFields = new Fields( "ip", "time", "method", "event", "status", "size" );
View Full Code Here

Examples of cascading.tap.hadoop.Lfs

    similarityFlow.complete();
    }

  public static Tap makeTap( String path, Scheme scheme )
    {
    return path.matches( "^[^:]+://.*" ) ? new Hfs( scheme, path ) : new Lfs( scheme, path );
    }
View Full Code Here

Examples of cascading.tap.hadoop.Lfs

    if( stepStatePath == null )
      throw new FlowException( "unable to find step state from distributed cache" );

    LOG.info( "reading step state from local path: {}", stepStatePath );

    Hfs temp = new Lfs( new TextLine( new Fields( "line" ) ), stepStatePath.toString() );

    TupleEntryIterator reader = null;

    try
      {
      reader = temp.openForRead( new HadoopFlowProcess( jobConf ) );

      if( !reader.hasNext() )
        throw new FlowException( "step state path is empty: " + temp.getIdentifier() );

      return reader.next().getString( 0 );
      }
    catch( IOException exception )
      {
      throw new FlowException( "unable to find state path: " + temp.getIdentifier(), exception );
      }
    finally
      {
      try
        {
View Full Code Here

Examples of cascading.tap.hadoop.Lfs

  // test is not executed, just guarantees flow is run locally

  @Test
  public void testLocalModeSource() throws Exception
    {
    Tap source = new Lfs( new TextLine(), "input/path" );
    Tap sink = new Hfs( new TextLine(), "output/path", SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
View Full Code Here

Examples of cascading.tap.hadoop.Lfs

  @Test
  public void testLocalModeSink() throws Exception
    {
    Tap source = new Hfs( new TextLine(), "input/path" );
    Tap sink = new Lfs( new TextLine(), "output/path", SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
View Full Code Here

Examples of cascading.tap.hadoop.Lfs

    }

  @Test
  public void testFlowID() throws Exception
    {
    Tap source = new Lfs( new TextLine(), "input/path" );
    Tap sink = new Hfs( new TextLine(), "output/path", true );

    Pipe pipe = new Pipe( "test" );

    Map<Object, Object> props = getProperties();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.