Package com.datasalt.pangool.tuplemr.mapred.lib.input

Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat


    mr.setFieldAliases("urlMap",new Aliases().add("url","nonCanonicalUrl"));
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).addSchemaOrder(Order.ASC));
    mr.setTupleReducer(new Handler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input1), new HadoopInputFormat(TextInputFormat.class), new UrlMapProcessor());
    mr.addInput(new Path(input2), new HadoopInputFormat(TextInputFormat.class), new UrlProcessor());
    mr.createJob().waitForCompletion(true);

    return 1;
  }
View Full Code Here


    builder.setGroupByFields("first");
    builder.setOrderBy(new OrderBy().add("first",Order.ASC).add("second",Order.ASC));
    // Input / output and such
    builder.setTupleReducer(new Handler());
    builder.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    builder.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new IProcessor());
    builder.createJob().waitForCompletion(true);

    return 1;
  }
View Full Code Here

   
    for(Category category : Category.values()) { // For each Category
      String categoryString = category.toString().toLowerCase();
      // Add the category, book title input spec with the associated CategoryMapper
      for(FileStatus fileStatus: fileSystem.listStatus(new Path(input + "/" + categoryString))) {
        job.addInput(fileStatus.getPath(), new HadoopInputFormat(TextInputFormat.class),
            new CategoryMapper(category, fileStatus.getPath().getName()));
      }
      // Add a named output for each category
      job.addNamedOutput(categoryString, new TupleSolrOutputFormat(new File("src/test/resources/shakespeare-solr"),
          conf), ITuple.class, NullWritable.class);
View Full Code Here

    delete(output);
   
    MapOnlyJobBuilder b = new MapOnlyJobBuilder(conf);
    b.setMapper(new GrepHandler(regex));
    b.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    b.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class));
    b.createJob().waitForCompletion(true);
   
    return 0;
  }
View Full Code Here

    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Secondary Sort");
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("intField", "strField");
    mr.setOrderBy(new OrderBy().add("intField", Order.ASC).add("strField", Order.ASC).add("longField", Order.ASC));
    mr.setTupleReducer(new Handler());
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new IProcessor());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        DoubleWritable.class);
    mr.createJob().waitForCompletion(true);
    return 1;
  }
View Full Code Here

    mr.setOrderBy(new OrderBy().add("location", Order.ASC).add("date", Order.ASC).add("hashtag", Order.ASC));
    mr.setRollupFrom("date");
    // Input / output and such
    mr.setTupleReducer(new TweetsHandler(n));
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new TweetsProcessor());
    mr.createJob().waitForCompletion(true);
    return 0;
  }
View Full Code Here

        value.set(value.toString() + "\t" + classify(value.toString()));
        context.write(value, NullWritable.get());
      }
    });
    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class));
    job.createJob().waitForCompletion(true);
   
    return 1;
  }
View Full Code Here

    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).add("date", Order.ASC));
    // Input / output and such
    mr.setTupleReducer(new MovingAverageHandler(nDaysAverage));
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new URLVisitsProcessor());
    mr.createJob().waitForCompletion(true);
    return 1;
  }
View Full Code Here

    mr.setRollupFrom("user");
    // Input / output and such
    mr.setTupleCombiner(new CountCombinerHandler());
    mr.setTupleReducer(new NormalizingHandler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new UserActivityProcessor());
    mr.createJob().waitForCompletion(true);
   
    return 1;
  }
View Full Code Here

    delete(output);

    TupleMRBuilder job = new TupleMRBuilder(conf, "Naive Bayes Model Generator");
    job.addIntermediateSchema(INTERMEDIATE_SCHEMA);
    // perform per-category word count mapping
    job.addInput(new Path(inputExamples), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tuple = new Tuple(INTERMEDIATE_SCHEMA);

          @Override
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.