Package com.datasalt.pangool.tuplemr.mapred.lib.input

Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat


    TupleMRBuilder job = new TupleMRBuilder(conf);
    job.addIntermediateSchema(schema);
    job.setGroupByFields("user_id");
    // Define the input and its associated mapper.
    // We'll just have a Mapper, reducer will be Identity
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          Tuple tuple = new Tuple(schema);

          @Override
View Full Code Here


        .add("name", Order.ASC).add("height", Order.DESC));
    builder.setRollupFrom("country");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
    builder.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Map());

    Job job = builder.createJob();
    try {
      job.setNumReduceTasks(1);
      assertRun(job);
View Full Code Here

        .add("name", Order.ASC).add("height", Order.DESC));
    builder.setRollupFrom("age");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
    builder.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Map());

    Job job = builder.createJob();
    try {
      job.setNumReduceTasks(1);
      assertRun(job);
View Full Code Here

        .add("name", Order.ASC));
    builder.setRollupFrom("age");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
    builder.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class),
        new DoNothingMap());

    Job job = builder.createJob();
    try {
      job.setNumReduceTasks(1);
View Full Code Here

    final Schema schema2 = new Schema("tupleInTuple2", fields);

    builder.addIntermediateSchema(schema1);
    builder.addIntermediateSchema(schema2);

    builder.addInput(new Path(INPUT1), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tupleInTuple1 = new Tuple(schema1);
          ITuple tuple1 = new Tuple(tupleSchema1);

          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            tuple1.set("a", split[0]);
            tuple1.set("b", Integer.parseInt(split[1]));

            tupleInTuple1.set("partitionId", 0);
            tupleInTuple1.set("tuple1", tuple1);
            collector.write(tupleInTuple1);
          }
        });

    builder.addInput(new Path(INPUT2), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tupleInTuple2 = new Tuple(schema2);
          ITuple tuple2 = new Tuple(tupleSchema2);
View Full Code Here

        value.set(value.toString() + "\t" + classify(value.toString()));
        context.write(value, NullWritable.get());
      }
    });
    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class));
    job.createJob().waitForCompletion(true);
   
    return 1;
  }
View Full Code Here

    deleteOutput(output);

    TupleMRBuilder job = new TupleMRBuilder(conf, "Naive Bayes Model Generator");
    job.addIntermediateSchema(INTERMEDIATE_SCHEMA);
    // perform per-category word count mapping
    job.addInput(new Path(inputExamples), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tuple = new Tuple(INTERMEDIATE_SCHEMA);

          @Override
View Full Code Here

    mr.addIntermediateSchema(getPangoolRetweetSchema());
    mr.setGroupByFields("tweet_id");
    mr.setOrderBy(new OrderBy().add("tweet_id",Order.ASC).addSchemaOrder(Order.ASC));
   
    mr.addInput(tweetsPath,new AvroInputFormat<Record>(getAvroTweetSchema()),new TweetsMapper());
    mr.addInput(retweetsPath, new HadoopInputFormat(TextInputFormat.class), new RetweetsMapper());
    mr.setOutput(outputPath,new AvroOutputFormat<Record>(getAvroOutputSchema()),
        AvroWrapper.class,NullWritable.class);

    mr.setTupleReducer(new Red());
View Full Code Here

    mr.addIntermediateSchema(getSchema());
    mr.setGroupByFields("my_avro");
    //here the custom comparator that groups by "topic,word" is used.
    MyAvroComparator customComp = new MyAvroComparator(getAvroSchema(),"topic","word");
    mr.setOrderBy(new OrderBy().add("my_avro",Order.ASC,customComp));
    mr.addInput(new Path(args[0]), new HadoopInputFormat(TextInputFormat.class), new TokenizeMapper());
    // We'll use a TupleOutputFormat with the same schema than the intermediate schema
    mr.setTupleOutput(new Path(args[1]), getSchema());
    mr.setTupleReducer(new CountReducer());
    mr.setTupleCombiner(new CountReducer());
View Full Code Here

    MapOnlyJobBuilder job = new MapOnlyJobBuilder(conf);
    TableSpec tableSpec = new TableSpec(schema, schema.getFields().get(1));
   
    job.setOutput(new Path(out, "store"), new SploutSQLProxyOutputFormat(new SQLite4JavaOutputFormat(1000000, tableSpec)), ITuple.class,
        NullWritable.class);
    job.addInput(input, new HadoopInputFormat(TextInputFormat.class), new MapOnlyMapper<LongWritable, Text, ITuple, NullWritable>() {

      ITuple metaTuple = new Tuple(schema);

      protected void map(LongWritable key, Text value, Context context) throws IOException,
          InterruptedException {
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.