Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Tuple


    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {

      // Just parsing the text input and emitting a Tuple
      Tuple tuple = new Tuple(schema);
      String[] fields = value.toString().trim().split("\t");
      tuple.set("url", fields[0]);
      tuple.set("date", fields[1]);
      tuple.set("visits", Integer.parseInt(fields[2]));
      collector.write(tuple);
    }
View Full Code Here


    job.addIntermediateSchema(INTERMEDIATE_SCHEMA);
    // perform per-category word count mapping
    job.addInput(new Path(inputExamples), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tuple = new Tuple(INTERMEDIATE_SCHEMA);

          @Override
          public void map(LongWritable toIgnore, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {

            Category category = Category.valueOf(value.toString().split("\t")[0]);
            StringTokenizer itr = new StringTokenizer(value.toString().split("\t")[1]);
            tuple.set("category", category);
            tuple.set("count", 1);
            while(itr.hasMoreTokens()) {
              tuple.set("word", normalizeWord(itr.nextToken()));
              collector.write(tuple);
            }
          }
        });

    TupleReducer countReducer = new TupleReducer<ITuple, NullWritable>() {

      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
          throws IOException, InterruptedException, TupleMRException {
        int count = 0;
        ITuple outputTuple = null;
        for(ITuple tuple : tuples) {
          count += (Integer) tuple.get("count");
          outputTuple = tuple;
        }
        outputTuple.set("count", count);
        collector.write(outputTuple, NullWritable.get());
      }
View Full Code Here

  private static class UserActivityProcessor extends TupleMapper<LongWritable, Text> {

    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      this.tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }
View Full Code Here

  public static class CountCombinerHandler extends TupleReducer<ITuple, NullWritable> {

    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }
View Full Code Here

      this.n = n;
    }

    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException,
        TupleMRException {
      outputCountTuple = new Tuple(getOutputCountSchema());
    };
View Full Code Here

  private static class TweetsMapper extends TupleMapper<AvroWrapper<Record>, NullWritable> {

    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("tweet"));
    };
View Full Code Here

  @SuppressWarnings("serial")
  private static class RetweetsMapper extends TupleMapper<LongWritable, Text> {
    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("retweet"));
    };
View Full Code Here

    job.setCustomPartitionFields("min");
    // Define the input and its associated mapper
    // The mapper will just emit the (min, max) pairs to the reduce stage
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new TupleMapper<LongWritable, Text>() {

      Tuple tuple = new Tuple(schema);

      @Override
      public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException,
          InterruptedException {
        String[] fields = value.toString().split("\t");
        tuple.set("min", Integer.parseInt(fields[0]));
        tuple.set("max", Integer.parseInt(fields[1]));
        collector.write(tuple);
      }
    });

    // Define the reducer
View Full Code Here

    protected ObjectMapper mapper;

    public void setup(TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {
      this.mapper = new ObjectMapper();
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
      record = new Record(getAvroSchema());
      tuple.set("my_avro",record);
    };
View Full Code Here

    protected Tuple tuple;
    protected ObjectMapper mapper;

    public void setup(TupleMRContext context, Collector collector) throws IOException, InterruptedException {
      this.mapper = new ObjectMapper();
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
    };
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Tuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.