Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Tuple


    // Define the input and its associated mapper
    // The mapper will just emit the (min, max) pairs to the reduce stage
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          Tuple tuple = new Tuple(schema);

          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] fields = value.toString().split("\t");
            tuple.set("min", Integer.parseInt(fields[0]));
            tuple.set("max", Integer.parseInt(fields[1]));
            collector.write(tuple);
          }
        });

    // Define the reducer
View Full Code Here


    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }
View Full Code Here

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      jsonMapper = new ObjectMapper();
      jsonMapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("my_schema"));
    }
View Full Code Here

    // Use a HashSet to calculate the total vocabulary size
    Set<String> vocabulary = new HashSet<String>();
    // Read tuples from generate job
    for(FileStatus fileStatus : fileSystem.globStatus(generatedModel)) {
      TupleFile.Reader reader = new TupleFile.Reader(fileSystem, conf, fileStatus.getPath());
      Tuple tuple = new Tuple(reader.getSchema());
      while(reader.next(tuple)) {
        // Read Tuple
        Integer count = (Integer) tuple.get("count");
        Category category = (Category) tuple.get("category");
        String word = tuple.get("word").toString();
        vocabulary.add(word);
        tokensPerCategory.put(category, MapUtils.getInteger(tokensPerCategory, category, 0) + count);
        wordCountPerCategory.get(category).put(word, count);
      }
      reader.close();
View Full Code Here

    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {
      if(tuple == null) {
        tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
      }

      String[] fields = value.toString().trim().split("\t");
      tuple.set("intField", Integer.parseInt(fields[0]));
      tuple.set("strField", fields[1]);
View Full Code Here

      this.n = n;
    }

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException, TupleMRException {
      outputCountTuple = new Tuple(getOutputCountSchema());
    };
View Full Code Here

    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("tweet"));
    };
View Full Code Here

    job.addIntermediateSchema(INTERMEDIATE_SCHEMA);
    // perform per-category word count mapping
    job.addInput(new Path(inputExamples), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tuple = new Tuple(INTERMEDIATE_SCHEMA);

          @Override
          public void map(LongWritable toIgnore, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {

            Category category = Category.valueOf(value.toString().split("\t")[0]);
            StringTokenizer itr = new StringTokenizer(value.toString().split("\t")[1]);
            tuple.set("category", category);
            tuple.set("count", 1);
            while(itr.hasMoreTokens()) {
              tuple.set("word", normalizeWord(itr.nextToken()));
              collector.write(tuple);
            }
          }
        });

    TupleReducer countReducer = new TupleReducer<ITuple, NullWritable>() {

      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context,
          Collector collector) throws IOException, InterruptedException, TupleMRException {
        int count = 0;
        ITuple outputTuple = null;
        for(ITuple tuple : tuples) {
          count += (Integer) tuple.get("count");
          outputTuple = tuple;
        }
        outputTuple.set("count", count);
        collector.write(outputTuple, NullWritable.get());
      }
View Full Code Here

  private static class RetweetsMapper extends TupleMapper<LongWritable, Text> {
    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema("retweet"));
    };
View Full Code Here

    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {

      // Just parsing the text input and emitting a Tuple
      Tuple tuple = new Tuple(schema);
      String[] fields = value.toString().trim().split("\t");
      tuple.set("url", fields[0]);
      tuple.set("date", fields[1]);
      tuple.set("visits", Integer.parseInt(fields[2]));
      collector.write(tuple);
    }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Tuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.