Package org.apache.crunch

Examples of org.apache.crunch.Pipeline.readTextFile()


  @Test
  public void testAvro() throws Exception {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
    runMinMax(shakes, AvroTypeFamily.getInstance());
    pipeline.done();
  }

  @Test
View Full Code Here


  @Test
  public void testCollectUrls() throws Exception {
    Pipeline p = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    String urlsInputPath = tmpDir.copyResourceFileName("urls.txt");
    PTable<String, Collection<String>> urls = Aggregate.collectValues(p.readTextFile(urlsInputPath).parallelDo(
        new SplitFn(), tableOf(strings(), strings())));
    for (Pair<String, Collection<String>> e : urls.materialize()) {
      String key = e.first();
      int expectedSize = 0;
      if ("www.A.com".equals(key)) {
View Full Code Here

  }

  @Test
  public void testCollectValues_Writables() throws IOException {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    Map<Integer, Collection<Text>> collectionMap = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
        .parallelDo(new MapStringToTextPair(), Writables.tableOf(Writables.ints(), Writables.writables(Text.class)))
        .collectValues().materializeToMap();

    assertEquals(1, collectionMap.size());
View Full Code Here

  @Test
  public void testCollectValues_Avro() throws IOException {

    MapStringToEmployeePair mapFn = new MapStringToEmployeePair();
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    Map<Integer, Collection<Employee>> collectionMap = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
        .parallelDo(mapFn, Avros.tableOf(Avros.ints(), Avros.records(Employee.class))).collectValues()
        .materializeToMap();

    assertEquals(1, collectionMap.size());
View Full Code Here

      return 1;
    }
    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(AverageBytesByIP.class, getConf());
    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Combiner used for summing up response size and count
    CombineFn<String, Pair<Long, Long>> stringPairOfLongsSumCombiner = CombineFn.pairAggregator(CombineFn.SUM_LONGS,
        CombineFn.SUM_LONGS);
View Full Code Here

      return 1;
    }
    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(TotalBytesByIP.class, getConf());
    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Combiner used for summing up response size
    CombineFn<String, Long> longSumCombiner = CombineFn.SUM_LONGS();

    // Table of (ip, sum(response size))
View Full Code Here

      return 1;
    }
    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(WordCount.class, getConf());
    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[1]);

    // Define a function that splits each line in a PCollection of Strings into
    // a
    // PCollection made up of the individual words in the file.
    PCollection<String> words = lines.parallelDo(new DoFn<String, String>() {
View Full Code Here

      return 1;
    }
    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(AverageBytesByIP.class, getConf());
    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Aggregator used for summing up response size and count
    Aggregator<Pair<Long, Long>> agg = pairAggregator(SUM_LONGS(), SUM_LONGS());

    // Table of (ip, sum(response size), count)
View Full Code Here

  @Test
  public void testAvroReflectSortPair() throws IOException {
    Pipeline pipeline = new MRPipeline(SortIT.class, tmpDir.getDefaultConfiguration());
    pipeline.enableDebug();
    String rsrc = tmpDir.copyResourceFileName("set2.txt");
    PCollection<Pair<String, StringWrapper>> in = pipeline.readTextFile(rsrc)
        .parallelDo(new MapFn<String, Pair<String, StringWrapper>>() {

          @Override
          public Pair<String, StringWrapper> map(String input) {
            return Pair.of(input, wrap(input));
View Full Code Here

      return 1;
    }
    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(TotalBytesByIP.class, getConf());
    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Aggregator used for summing up response size
    Aggregator<Long> agg = Aggregators.SUM_LONGS();

    // Table of (ip, sum(response size))
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.