Package com.cloudera.crunch.impl.mr

Examples of com.cloudera.crunch.impl.mr.MRPipeline.readTextFile()


public class SortCrunchTest implements Serializable {
 
  @Test
  public void test() throws IOException {
    Pipeline pipeline = new MRPipeline(SortCrunchTest.class);
    PCollection<String> records = pipeline.readTextFile("sort/A");
   
    PCollection<Pair<Integer, Integer>> pairs = records.parallelDo(new DoFn<String, Pair<Integer, Integer>>() {
      @Override
      public void process(String input, Emitter<Pair<Integer, Integer>> emitter) {
        Iterator<String> split = Splitter.on('\t').split(input).iterator();
View Full Code Here


  private static final int MISSING = 9999;
 
  @Test
  public void test() throws IOException {
    Pipeline pipeline = new MRPipeline(MaxTemperatureCrunchTest.class);
    PCollection<String> records = pipeline.readTextFile("input");
   
    PTable<String, Integer> maxTemps = records
      .parallelDo(toYearTempPairsFn(), tableOf(strings(), ints()))
      .groupByKey()
      .combineValues(CombineFn.<String> MAX_INTS());
View Full Code Here

public class JoinCrunchTest implements Serializable {
 
  @Test
  public void test() throws IOException {
    Pipeline pipeline = new MRPipeline(JoinCrunchTest.class);
    PCollection<String> a = pipeline.readTextFile("join/A");
    PCollection<String> b = pipeline.readTextFile("join/B");
   
    PTable<String, String> aTable = a.parallelDo(new DoFn<String, Pair<String, String>>() {
    @Override
    public void process(String input, Emitter<Pair<String, String>> emitter) {
View Full Code Here

 
  @Test
  public void test() throws IOException {
    Pipeline pipeline = new MRPipeline(JoinCrunchTest.class);
    PCollection<String> a = pipeline.readTextFile("join/A");
    PCollection<String> b = pipeline.readTextFile("join/B");
   
    PTable<String, String> aTable = a.parallelDo(new DoFn<String, Pair<String, String>>() {
    @Override
    public void process(String input, Emitter<Pair<String, String>> emitter) {
      Iterator<String> split = Splitter.on('\t').split(input).iterator();
View Full Code Here

public class CogroupCrunchTest implements Serializable {
 
  @Test
  public void test() throws IOException {
    Pipeline pipeline = new MRPipeline(CogroupCrunchTest.class);
    PCollection<String> a = pipeline.readTextFile("join/A");
    PCollection<String> b = pipeline.readTextFile("join/B");
   
    PTable<String, String> aTable = a.parallelDo(new DoFn<String, Pair<String, String>>() {
    @Override
    public void process(String input, Emitter<Pair<String, String>> emitter) {
View Full Code Here

 
  @Test
  public void test() throws IOException {
    Pipeline pipeline = new MRPipeline(CogroupCrunchTest.class);
    PCollection<String> a = pipeline.readTextFile("join/A");
    PCollection<String> b = pipeline.readTextFile("join/B");
   
    PTable<String, String> aTable = a.parallelDo(new DoFn<String, Pair<String, String>>() {
    @Override
    public void process(String input, Emitter<Pair<String, String>> emitter) {
      Iterator<String> split = Splitter.on('\t').split(input).iterator();
View Full Code Here

    Path output = new Path(args[1]);
    output.getFileSystem(conf).delete(output, true);

    Pipeline pipeline = new MRPipeline(SimpleTokenize.class, conf);

    PCollection<String> lines = pipeline.readTextFile(args[0]);

    PCollection<String> words = lines.parallelDo(
        "tokenize",
        new DoFn<String, String>() {
          @Override
View Full Code Here

    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(InvertedIndex.class, conf);

    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Define a function that splits each line in a PCollection of Strings into a
    // PCollection made up of the individual words in the file.
    PTable<String, String> wordDocs = CrunchUtils.extractWordFileTable(lines);
View Full Code Here

    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(PopularLinks.class, conf);

    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Define a function that splits each line in a PCollection of Strings into a
    // PCollection made up of the individual words in the file.
    PCollection<CommonLogEntry> logs = CrunchUtils.logs(lines);
View Full Code Here

    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(JoinLogsAndUsers.class, conf);

    // Reference a given text file as a collection of Strings.
    PCollection<String> rawLogs = pipeline.readTextFile(args[0]);

    // Reference a given text file as a collection of Strings.
    PCollection<String> rawUsers = pipeline.readTextFile(args[1]);

    // Define a function that splits each line in a PCollection of Strings into a
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.