Package com.datasalt.pangool.tuplemr.mapred.lib.input

Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat


    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).add("date", Order.ASC));
    // Input / output and such
    mr.setTupleReducer(new MovingAverageHandler(nDaysAverage));
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new URLVisitsProcessor());
    mr.createJob().waitForCompletion(true);
    return 1;
  }
View Full Code Here


    mr.setOrderBy(new OrderBy().add("location", Order.ASC).add("date", Order.ASC).add("hashtag", Order.ASC));
    mr.setRollupFrom("date");
    // Input / output and such
    mr.setTupleReducer(new TweetsHandler(n));
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new TweetsProcessor());
    mr.createJob().waitForCompletion(true);
    return 0;
  }
View Full Code Here

    mr.setRollupFrom("user");
    // Input / output and such
    mr.setTupleCombiner(new CountCombinerHandler());
    mr.setTupleReducer(new NormalizingHandler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new UserActivityProcessor());
    mr.createJob().waitForCompletion(true);
   
    return 1;
  }
View Full Code Here

    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Secondary Sort");
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("intField", "strField");
    mr.setOrderBy(new OrderBy().add("intField", Order.ASC).add("strField", Order.ASC).add("longField", Order.ASC));
    mr.setTupleReducer(new Handler());
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new IProcessor());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        DoubleWritable.class);
    mr.createJob().waitForCompletion(true);
    return 1;
  }
View Full Code Here

    mr.addIntermediateSchema(new Schema("urlMap", urlMapFields));
    mr.addIntermediateSchema(new Schema("urlRegister", urlRegisterFields));
    mr.setGroupByFields("url");
    mr.setTupleReducer(new Handler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input1), new HadoopInputFormat(TextInputFormat.class), new UrlMapProcessor());
    mr.addInput(new Path(input2), new HadoopInputFormat(TextInputFormat.class), new UrlProcessor());
    mr.createJob().waitForCompletion(true);

    return 1;
  }
View Full Code Here

    // Note that the order in which we defined the fields of the Schema is not relevant here
    cg.setGroupByFields("topic", "word");
    // Here we instantiate a mapper with stop words:
    // Note that we don't need to use the DistributedCache for that becasuse mappers, reducers, etc themselves are instantiable
    StopWordMapper mapper = new StopWordMapper(stopWords);
    cg.addInput(new Path(args[0]), new HadoopInputFormat(TextInputFormat.class), mapper);
    // We'll use a TupleOutputFormat with the same schema than the intermediate schema
    cg.setTupleOutput(new Path(args[1]), TopicalWordCount.getSchema());
    cg.setTupleReducer(new CountReducer());
    cg.setTupleCombiner(new CountReducer());
View Full Code Here

    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Topical Word Count");
    mr.addIntermediateSchema(getSchema());
    // We will count each (topicId, word) pair
    // Note that the order in which we defined the fields of the Schema is not relevant here
    mr.setGroupByFields("topic", "word");
    mr.addInput(new Path(args[0]), new HadoopInputFormat(TextInputFormat.class), new TokenizeMapper());
    // We'll use a TupleOutputFormat with the same schema than the intermediate schema
    mr.setTupleOutput(new Path(args[1]), getSchema());
    mr.setTupleReducer(new CountReducer());
    mr.setTupleCombiner(new CountReducer());
View Full Code Here

    builder.setOrderBy(new OrderBy().add("first", Order.ASC).add("second", Order.ASC));
    // Input / output and such
    builder.setTupleReducer(new Handler());
    builder.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    builder.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new IProcessor());

    try {
      builder.createJob().waitForCompletion(true);
    } finally {
      builder.cleanUpInstanceFiles();
View Full Code Here

    // Input / output and such
    mr.setTupleCombiner(new CountCombinerHandler());
    mr.setTupleReducer(new NormalizingHandler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
        new UserActivityProcessor());

    try {
      mr.createJob().waitForCompletion(true);
    } finally {
View Full Code Here

    mr.setRollupFrom("date");
    // Input / output and such
    mr.setTupleReducer(new TweetsHandler(n));
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new TweetsProcessor());
    try {
      mr.createJob().waitForCompletion(true);
    } finally {
      mr.cleanUpInstanceFiles();
    }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.