Package cascading.tap.hadoop

Examples of cascading.tap.hadoop.Hfs.openForRead()


    TupleEntryIterator reader = null;

    try
      {
      reader = temp.openForRead( new HadoopFlowProcess( jobConf ) );

      if( !reader.hasNext() )
        throw new FlowException( "step state path is empty: " + temp.getIdentifier() );

      return reader.next().getString( 0 );
View Full Code Here


    Map<Object, Object> properties = new HashMap<Object, Object>(){{
      put("io.serializations", new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
    }};
    new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();

    TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
    Set<Tuple> tuples = new HashSet<Tuple>();
    while (tei.hasNext()) {
      tuples.add(tei.next().getTupleCopy());
    }
View Full Code Here

      put("io.serializations",
          new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
    }};
    new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();

    TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
    Set<Tuple> tuples = new HashSet<Tuple>();
    while (tei.hasNext()) {
      tuples.add(tei.next().getTupleCopy());
    }
View Full Code Here

    }
    tec.close();

    // read results back out
    Tap outputTap = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryIterator iter = outputTap.openForRead(new HadoopFlowProcess(), null);
    List<Tuple> tuples = new ArrayList<Tuple>();
    while (iter.hasNext()) {
      tuples.add(iter.next().getTupleCopy());
    }
View Full Code Here

    Pipe p = new Each(inPipe, new Fields("value"), new ExpandProto(Example.Person.class), new Fields("id", "name", "email", "position"));

    Hfs sink = new Hfs(new TextLine(), "/tmp/output");
    new HadoopFlowConnector().connect(inTap, sink, p).complete();

    TupleEntryIterator iter = sink.openForRead(new HadoopFlowProcess());
    List<Tuple> results = new ArrayList<Tuple>();
    while (iter.hasNext()) {
      results.add(iter.next().getTupleCopy());
    }
    assertEquals(2, results.size());
View Full Code Here

  private static BloomFilter mergeBloomParts(String tapPath, long numBloomBits, long splitSize, int numBloomHashes, long numElems) throws IOException {
    FixedSizeBitSet bitSet = new FixedSizeBitSet(numBloomBits);

    if (FileSystemHelper.getFS().exists(new Path(tapPath))) {
      Hfs tap = new Hfs(new SequenceFile(new Fields("split", "filter")), tapPath);
      TupleEntryIterator itr = tap.openForRead(CascadingUtil.get().getFlowProcess());
      while (itr.hasNext()) {
        TupleEntry cur = itr.next();
        long split = cur.getLong(0);
        FixedSizeBitSet curSet = new FixedSizeBitSet(splitSize, ((BytesWritable) cur.getObject(1)).getBytes());
        for (long i = 0; i < curSet.numBits(); i++) {
View Full Code Here

      return 0;
    }

    Hfs approxCountsTap = new Hfs(new SequenceFile(new Fields("bytes")), partsDir);

    TupleEntryIterator in = approxCountsTap.openForRead(CascadingUtil.get().getFlowProcess());
    List<HyperLogLog> countParts = new LinkedList<HyperLogLog>();

    long totalSum = 0;
    while (in.hasNext()) {
      TupleEntry tuple = in.next();
View Full Code Here

    Pipe joined = new BloomJoin(source1, new Fields("field1"), source2, new Fields("field3"));

    CascadingUtil.get().getFlowConnector().connect("Example flow", sources, sink, joined).complete();

    //  Take a look at the output tuples
    TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess());
    System.out.println("Output tuples from flow:");
    while (output.hasNext()) {
      System.out.println(output.next().getTuple());
    }
  }
View Full Code Here

    sources.put("source2", ExampleFixtures.SOURCE_TAP_2);

    CascadingUtil.get().getFlowConnector().connect("Example flow", sources, sink, joined).complete();

    //  Take a look at the output tuples
    TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess());
    System.out.println("Output tuples from flow:");
    while (output.hasNext()) {
      System.out.println(output.next().getTuple());
    }
  }
View Full Code Here

    f.setFlowStepStrategy(new BloomAssemblyStrategy());

    f.complete();

    //  Take a look at the output tuples
    TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess());
    System.out.println("Output tuples from flow:");
    while (output.hasNext()) {
      System.out.println(output.next().getTuple());
    }
  }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.