Examples of com.datasalt.pangool.io.Tuple

com.datasalt.pangool.io.Tuple
This is the basic implementation of {@link ITuple}.

    // Define the input and its associated mapper
    // The mapper will just emit the (min, max) pairs to the reduce stage
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {


          Tuple tuple = new Tuple(schema);


          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] fields = value.toString().split("\t");
            tuple.set("min", Integer.parseInt(fields[0]));
            tuple.set("max", Integer.parseInt(fields[1]));
            collector.write(tuple);
          }
        });


    // Define the reducer

View Full Code Here


    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }

View Full Code Here


    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      jsonMapper = new ObjectMapper();
      jsonMapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }

View Full Code Here

    // Use a HashSet to calculate the total vocabulary size
    Set<String> vocabulary = new HashSet<String>();
    // Read tuples from generate job
    for(FileStatus fileStatus : fileSystem.globStatus(generatedModel)) {
      TupleFile.Reader reader = new TupleFile.Reader(fileSystem, conf, fileStatus.getPath());
      Tuple tuple = new Tuple(reader.getSchema());
      while(reader.next(tuple)) {
        // Read Tuple
        Integer count = (Integer) tuple.get("count");
        Category category = (Category) tuple.get("category");
        String word = tuple.get("word").toString();
        vocabulary.add(word);
        tokensPerCategory.put(category, MapUtils.getInteger(tokensPerCategory, category, 0) + count);
        wordCountPerCategory.get(category).put(word, count);
      }
      reader.close();

View Full Code Here


    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {
      if(tuple == null) {
        tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
      }


      String[] fields = value.toString().trim().split("\t");
      tuple.set("intField", Integer.parseInt(fields[0]));
      tuple.set("strField", fields[1]);

View Full Code Here

      this.n = n;
    }


    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException, TupleMRException {
      outputCountTuple = new Tuple(getOutputCountSchema());
    };

View Full Code Here


    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("tweet"));
    };

View Full Code Here

    job.addIntermediateSchema(INTERMEDIATE_SCHEMA);
    // perform per-category word count mapping
    job.addInput(new Path(inputExamples), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {


          ITuple tuple = new Tuple(INTERMEDIATE_SCHEMA);


          @Override
          public void map(LongWritable toIgnore, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {


            Category category = Category.valueOf(value.toString().split("\t")[0]);
            StringTokenizer itr = new StringTokenizer(value.toString().split("\t")[1]);
            tuple.set("category", category);
            tuple.set("count", 1);
            while(itr.hasMoreTokens()) {
              tuple.set("word", normalizeWord(itr.nextToken()));
              collector.write(tuple);
            }
          }
        });


    TupleReducer countReducer = new TupleReducer<ITuple, NullWritable>() {


      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context,
          Collector collector) throws IOException, InterruptedException, TupleMRException {
        int count = 0;
        ITuple outputTuple = null;
        for(ITuple tuple : tuples) {
          count += (Integer) tuple.get("count");
          outputTuple = tuple;
        }
        outputTuple.set("count", count);
        collector.write(outputTuple, NullWritable.get());
      }

View Full Code Here

  private static class RetweetsMapper extends TupleMapper<LongWritable, Text> {
    private Tuple tuple;


    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("retweet"));
    };

View Full Code Here

    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {


      // Just parsing the text input and emitting a Tuple
      Tuple tuple = new Tuple(schema);
      String[] fields = value.toString().trim().split("\t");
      tuple.set("url", fields[0]);
      tuple.set("date", fields[1]);
      tuple.set("visits", Integer.parseInt(fields[2]));
      collector.write(tuple);
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of com.datasalt.pangool.io.Tuple

com.datasalt.pangool.examples.avro.AvroCustomSerializationJob$TokenizeMapper

com.datasalt.pangool.examples.avro.AvroTopicalWordCount$TokenizeMapper

com.datasalt.pangool.examples.avro.AvroTweetsJoin$RetweetsMapper

com.datasalt.pangool.examples.avro.AvroTweetsJoin$TweetsMapper

com.datasalt.pangool.examples.avro.TestAvroTopicalWordCount

com.datasalt.pangool.examples.gameoflife.GameOfLifeJob

com.datasalt.pangool.examples.movingaverage.MovingAverage$URLVisitsProcessor

com.datasalt.pangool.examples.naivebayes.NaiveBayesClassifier

com.datasalt.pangool.examples.naivebayes.NaiveBayesGenerate

com.datasalt.pangool.examples.secondarysort.SecondarySort$IProcessor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.