Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat

Package com.datasalt.pangool.tuplemr.mapred.lib.input

Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat

com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat

    TupleMRBuilder job = new TupleMRBuilder(conf);
    job.addIntermediateSchema(schema);
    job.setGroupByFields("user_id");
    // Define the input and its associated mapper.
    // We'll just have a Mapper, reducer will be Identity
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {


          Tuple tuple = new Tuple(schema);


          @Override

View Full Code Here

        .add("name", Order.ASC).add("height", Order.DESC));
    builder.setRollupFrom("country");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
    builder.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Map());


    Job job = builder.createJob();
    try {
      job.setNumReduceTasks(1);
      assertRun(job);

View Full Code Here

        .add("name", Order.ASC).add("height", Order.DESC));
    builder.setRollupFrom("age");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
    builder.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Map());


    Job job = builder.createJob();
    try {
      job.setNumReduceTasks(1);
      assertRun(job);

View Full Code Here

        .add("name", Order.ASC));
    builder.setRollupFrom("age");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
    builder.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class),
        new DoNothingMap());


    Job job = builder.createJob();
    try {
      job.setNumReduceTasks(1);

View Full Code Here

    final Schema schema2 = new Schema("tupleInTuple2", fields);


    builder.addIntermediateSchema(schema1);
    builder.addIntermediateSchema(schema2);


    builder.addInput(new Path(INPUT1), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {


          ITuple tupleInTuple1 = new Tuple(schema1);
          ITuple tuple1 = new Tuple(tupleSchema1);


          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            tuple1.set("a", split[0]);
            tuple1.set("b", Integer.parseInt(split[1]));


            tupleInTuple1.set("partitionId", 0);
            tupleInTuple1.set("tuple1", tuple1);
            collector.write(tupleInTuple1);
          }
        });


    builder.addInput(new Path(INPUT2), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {


          ITuple tupleInTuple2 = new Tuple(schema2);
          ITuple tuple2 = new Tuple(tupleSchema2);

View Full Code Here

        value.set(value.toString() + "\t" + classify(value.toString()));
        context.write(value, NullWritable.get());
      }
    });
    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class));
    job.createJob().waitForCompletion(true);
    
    return 1;
  }

View Full Code Here

    deleteOutput(output);


    TupleMRBuilder job = new TupleMRBuilder(conf, "Naive Bayes Model Generator");
    job.addIntermediateSchema(INTERMEDIATE_SCHEMA);
    // perform per-category word count mapping
    job.addInput(new Path(inputExamples), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {


          ITuple tuple = new Tuple(INTERMEDIATE_SCHEMA);


          @Override

View Full Code Here

    mr.addIntermediateSchema(getPangoolRetweetSchema());
    mr.setGroupByFields("tweet_id");
    mr.setOrderBy(new OrderBy().add("tweet_id",Order.ASC).addSchemaOrder(Order.ASC));
    
    mr.addInput(tweetsPath,new AvroInputFormat<Record>(getAvroTweetSchema()),new TweetsMapper());
    mr.addInput(retweetsPath, new HadoopInputFormat(TextInputFormat.class), new RetweetsMapper());
    mr.setOutput(outputPath,new AvroOutputFormat<Record>(getAvroOutputSchema()),
        AvroWrapper.class,NullWritable.class);


    mr.setTupleReducer(new Red());

View Full Code Here

    mr.addIntermediateSchema(getSchema());
    mr.setGroupByFields("my_avro");
    //here the custom comparator that groups by "topic,word" is used. 
    MyAvroComparator customComp = new MyAvroComparator(getAvroSchema(),"topic","word");
    mr.setOrderBy(new OrderBy().add("my_avro",Order.ASC,customComp));
    mr.addInput(new Path(args[0]), new HadoopInputFormat(TextInputFormat.class), new TokenizeMapper());
    // We'll use a TupleOutputFormat with the same schema than the intermediate schema
    mr.setTupleOutput(new Path(args[1]), getSchema());
    mr.setTupleReducer(new CountReducer());
    mr.setTupleCombiner(new CountReducer());

View Full Code Here

    MapOnlyJobBuilder job = new MapOnlyJobBuilder(conf);
    TableSpec tableSpec = new TableSpec(schema, schema.getFields().get(1));
    
    job.setOutput(new Path(out, "store"), new SploutSQLProxyOutputFormat(new SQLite4JavaOutputFormat(1000000, tableSpec)), ITuple.class,
        NullWritable.class);
    job.addInput(input, new HadoopInputFormat(TextInputFormat.class), new MapOnlyMapper<LongWritable, Text, ITuple, NullWritable>() {


      ITuple metaTuple = new Tuple(schema);


      protected void map(LongWritable key, Text value, Context context) throws IOException,
          InterruptedException {

View Full Code Here

0 1 2 3 4 5 6 7

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat

com.datasalt.pangool.examples.avro.AvroCustomSerializationJob

com.datasalt.pangool.examples.avro.AvroTopicalWordCount

com.datasalt.pangool.examples.avro.AvroTweetsJoin

com.datasalt.pangool.examples.gameoflife.GameOfLifeJob

com.datasalt.pangool.examples.Grep

com.datasalt.pangool.examples.mongo.QypeScrapper

com.datasalt.pangool.examples.movingaverage.MovingAverage

com.datasalt.pangool.examples.naivebayes.NaiveBayesClassifier

com.datasalt.pangool.examples.naivebayes.NaiveBayesGenerate

com.datasalt.pangool.examples.secondarysort.SecondarySort

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.