Package cascading.tap

Examples of cascading.tap.Tap


        //pass these to the output function so the strings from the indexes can be written instead of the
        //binary values of the Keys and Vectors in the DRMs
        dRM1.getStepConfigDef().setProperty("itemIndexPath", itemIndexPath.toString());
        dRM1.getStepConfigDef().setProperty("rowIndexPath", iDIndexPath.toString());
        dRM1.getStepConfigDef().setProperty("joining", "false");
        Tap outputSink = new Hfs(new TextDelimited(true,","), cSVOutputPath.toString());

        FlowDef flowDef = new FlowDef()
            .setName("convert-to-CSV")
            .addSource(dRM1, dRM1Source)
            .addTailSink(dRM1, outputSink);
View Full Code Here


                //if(s.getPath().toString().contains("part-")){//found a part-xxxxx file
                if(s.getPath().getName().matches("^part.*")){//found a part-xxxxx file
                    Path filePath = new Path(s.getPath().toString());
                    //Tap t = new Hfs( inFields, filePath.toString());
                    //Tap t = new Hfs(new TextLine(), filePath.toString(), true);
                    Tap t = new Hfs( new WritableSequenceFile( f, LongWritable.class, VectorWritable.class ), filePath.toString() );
                    if( s.getLen() != 0 ){// then part file is not empty
                        all.add(t);
                    }
                }
            }
View Full Code Here

        final String in = getTestDir() + "testSimpleIndexing/in";
        final String out = getTestDir() + "testSimpleIndexing/out";

        byte[] imageData = new byte[] {0, 1, 2, 3, 5};
       
        Tap source = makeSourceTap(testFields, in);
        TupleEntryCollector write = source.openForWrite(makeFlowProcess());
        Tuple t = new Tuple();
        t.add(1);
        t.add("TurboWriter 2.3");
        t.add(395.50f);
        t.add(new Tuple("wordprocessor", "Japanese"));
        t.add(true);
        t.add(imageData);
        write.add(t);
       
        t = new Tuple();
        t.add(2);
        t.add("Shasta 1.0");
        t.add(95.00f);
        t.add("Chinese");
        t.add(false);
       
        BytesWritable bw = new BytesWritable(imageData);
        bw.setCapacity(imageData.length + 10);
        t.add(bw);
        write.add(t);
        write.close();

        // Now read from the results, and write to a Solr index.
        Pipe writePipe = new Pipe("tuples to Solr");

        Tap solrSink = makeSolrSink(testFields, out);
        Flow flow = makeFlowConnector().connect(source, solrSink, writePipe);
        flow.complete();

        // Open up the Solr index, and do some searches.
        System.setProperty("solr.data.dir", out + "/part-00000");
View Full Code Here

    Properties properties = new Properties();
    AppProps.setApplicationJarClass( properties, Main.class );
    FlowConnector flowConnector = new HadoopFlowConnector( properties );

    // create SOURCE taps, and read from local file system if inputs are not URLs
    Tap tweetTap = makeTap( tweetPath, new TextDelimited( true, "\t" ) );

    Tap stopTap = makeTap( stopWords, new TextDelimited( new Fields( "stop" ), true, "\t" ) );

    // create SINK taps, replacing previous output if needed
    Tap tokenTap = new Hfs( new TextDelimited( true, "\t" ), tokenPath, SinkMode.REPLACE );
    Tap similarityTap = new Hfs( new TextDelimited( true, "\t" ), similarityPath, SinkMode.REPLACE );

    /*
    flow part #1
    generate a bipartite map of (uid, token), while filtering out stop-words
    */
 
View Full Code Here

    Properties properties = new Properties();
    AppProps.setApplicationJarClass( properties, Main.class );
    HadoopFlowConnector flowConnector = new HadoopFlowConnector( properties );

    // create source and sink taps
    Tap docTap = new Hfs( new TextDelimited( true, "\t" ), docPath );
    Tap wcTap = new Hfs( new TextDelimited( true, "\t" ), wcPath );

    Fields stop = new Fields( "stop" );
    Tap stopTap = new Hfs( new TextDelimited( stop, true, "\t" ), stopPath );
    Tap tfidfTap = new Hfs( new TextDelimited( true, "\t" ), tfidfPath );

    // specify a regex operation to split the "document" text lines into a token stream
    Fields token = new Fields( "token" );
    Fields text = new Fields( "text" );
    RegexSplitGenerator splitter = new RegexSplitGenerator( token, "[ \\[\\]\\(\\),.]" );
View Full Code Here

      input.add(t);
    }
    input.close();

    // Create flow to read from local file and insert into HBase.
    Tap source = new Hfs(new SequenceFile(inputFields), mHelper.manageTemporaryPath("input"));

    Pipe pipe = new Pipe("values");
    Fields keyFields = new Fields("num");
    Fields valueFields = new Fields("lower", "upper");
    Tap hBaseTap = new HBaseTap("testTable",
        new SerializingHBaseScheme(keyFields, valueFields,
                                       new Class<?>[]{String.class, String.class},
          false, SerializingHBaseScheme.Direction.FOR_WRITE),
        SinkMode.REPLACE);
View Full Code Here

    File inputFile = new File(inputPath);
    if (inputFile.exists()) {
      throw new CascadingException("Input file " + inputPath + " already exists.");
    }
    Tap inputTap = new Hfs(new SequenceFile(fields), inputPath, SinkMode.REPLACE);
    TupleEntryCollector collector = inputTap.openForWrite(getJobConf());
    return collector;
  }
View Full Code Here

   */
  public Flow runFlow(Pipe[] tails, Map<String, Fields> inputs, Fields[] outputs,
                      String[] outputPaths) {
    Map<String, Tap> sources = new HashMap<String, Tap>();
    for (Map.Entry<String, Fields> input : inputs.entrySet()) {
      Tap tap = new Hfs(new SequenceFile(input.getValue()),
                        manageTemporaryPath(input.getKey()));
      sources.put(input.getKey(), tap);
    }

    if (tails.length != outputs.length) {
      System.err.println("size of tails should be same as outputs.");
      return null;
    }

    int i = 0;
    Map<String, Tap> sinks = new HashMap<String, Tap>();
    for (Fields output : outputs) {
      String sinkName = tails[i].getName();
      Tap sink = new Hfs(new SequenceFile(output), outputPaths[i]);
      sinks.put(sinkName, sink);
      i++;
    }

    Flow f = mFlowConnector.connect(sources, sinks, tails);
View Full Code Here

    Properties properties = new Properties();
    AppProps.setApplicationJarClass( properties, Main.class );
    HadoopFlowConnector flowConnector = new HadoopFlowConnector( properties );

    // create source and sink taps
    Tap inputTap = new Hfs( new TextDelimited( true, "\t" ), inputPath );
    Tap classifyTap = new Hfs( new TextDelimited( true, "\t" ), classifyPath );

    // handle command line options
    OptionParser optParser = new OptionParser();
    optParser.accepts( "pmml" ).withRequiredArg();
View Full Code Here

    new RegressionFlowExample().run();
    }

  public void run() throws IOException
    {
    Tap irisTap = new FileTap( new TextDelimited( true, "\t", "\"" ), "data/iris.lm_p.tsv", SinkMode.KEEP );

    Tap resultsTap = new FileTap( new TextDelimited( true, "\t", "\"" ), "build/test/output/flow/results.tsv", SinkMode.REPLACE );

    FlowDef flowDef = FlowDef.flowDef()
      .setName( "pmml flow" )
      .addSource( "iris", irisTap )
      .addSink( "results", resultsTap );

    PMMLPlanner pmmlPlanner = new PMMLPlanner()
      .setPMMLInput( new File( "data/iris.lm_p.xml" ) )
      .retainOnlyActiveIncomingFields();

    flowDef.addAssemblyPlanner( pmmlPlanner );

    Flow flow = new LocalFlowConnector().connect( flowDef );

    flow.complete();

    TupleEntryIterator iterator = resultsTap.openForRead( flow.getFlowProcess() );

    while( iterator.hasNext() )
      System.out.println( iterator.next() );

    iterator.close();
View Full Code Here

TOP

Related Classes of cascading.tap.Tap

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.