Examples of MRPipeline


Examples of org.apache.crunch.impl.mr.MRPipeline

    writer.write(s2);
    writer.write(s3);
    writer.write(s4);
    writer.close();
   
    Pipeline pipeline = new MRPipeline(OrcFileSourceTargetIT.class, conf);
    OrcFileSource<Person> source = new OrcFileSource<Person>(inputPath, Orcs.reflects(Person.class));
    PCollection<Person> rows = pipeline.read(source);
    PTable<Person, Long> count = rows.count();

    List<Pair<Person, Long>> result = Lists.newArrayList(count.materialize());
    List<Pair<Person, Long>> expected = Lists.newArrayList(
        Pair.of(new Person("Alice", 23, Arrays.asList("444-333-9999")), 1L),
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

   * get failed. The rest of running jobs should be killed.
   */
  @Test
  public void testStopPipelineImmediatelyOnJobFailure() throws Exception {
    String inPath = tmpDir.copyResourceFileName("shakes.txt");
    MRPipeline pipeline = new MRPipeline(MRExecutorIT.class);

    // Issue two jobs that sleep forever.
    PCollection<String> in = pipeline.read(From.textFile(inPath));
    for (int i = 0; i < 2; i++) {
      in.count()
          .values()
          .parallelDo(new SleepForeverFn(), longs())
          .write(To.textFile(tmpDir.getPath("out_" + i)));
    }
    MRPipelineExecution exec = pipeline.runAsync();

    // Wait until both of the two jobs are submitted.
    List<MRJob> jobs = exec.getJobs();
    assertEquals(2, jobs.size());
    StopWatch watch = new StopWatch();
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    List<KeyValue> kvs = generateKeyValues(100);
    Path inputPath = tmpDir.getPath("in");
    Path outputPath = tmpDir.getPath("out");
    writeKeyValuesToHFile(inputPath, kvs);

    Pipeline pipeline = new MRPipeline(HFileSourceIT.class, conf);
    PCollection<KeyValue> in = pipeline.read(FromHBase.hfile(inputPath));
    PCollection<String> texts = in.parallelDo(new MapFn<KeyValue, String>() {
      @Override
      public String map(KeyValue input) {
        return input.toString();
      }
    }, strings());
    texts.write(To.textFile(outputPath));
    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());

    List<String> lines = FileUtils.readLines(new File(outputPath.toString(), "part-m-00000"));
    assertEquals(kvs.size(), lines.size());
    for (int i = 0; i < kvs.size(); i++) {
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

  private List<Result> doTestScanHFiles(List<KeyValue> kvs, Scan scan) throws IOException {
    Path inputPath = tmpDir.getPath("in");
    writeKeyValuesToHFile(inputPath, kvs);

    Pipeline pipeline = new MRPipeline(HFileSourceIT.class, conf);
    PCollection<Result> results = HFileUtils.scanHFiles(pipeline, inputPath, scan);
    return ImmutableList.copyOf(results.materialize());
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

  private List<KeyValue> doTestReadHFiles(List<KeyValue> kvs, Scan scan) throws IOException {
    Path inputPath = tmpDir.getPath("in");
    writeKeyValuesToHFile(inputPath, kvs);

    Pipeline pipeline = new MRPipeline(HFileSourceIT.class, conf);
    PCollection<KeyValue> results = pipeline.read(FromHBase.hfile(inputPath));
    return ImmutableList.copyOf(results.materialize());
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    hbaseTestUtil.startMiniHBaseCluster(1, 1);
  }

  @Test
  public void testWordCount() throws Exception {
    run(new MRPipeline(WordCountHBaseIT.class, hbaseTestUtil.getConfiguration()));
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    pipeline.done();

    //verify HBaseTarget supports deletes.
    Scan clearScan = new Scan();
    clearScan.addFamily(COUNTS_COLFAM);
    pipeline = new MRPipeline(WordCountHBaseIT.class, hbaseTestUtil.getConfiguration());
    HBaseSourceTarget clearSource = new HBaseSourceTarget(outputTableName, clearScan);
    PTable<ImmutableBytesWritable, Result> counts = pipeline.read(clearSource);
    pipeline.write(clearCounts(counts), new HBaseTarget(outputTableName));
    pipeline.done();
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

  @Rule
  public TemporaryPath tmpDir = TemporaryPaths.create();

  @Test
  public void testNoBreakpoint() throws Exception {
    run(new MRPipeline(BreakpointIT.class, tmpDir.getDefaultConfiguration()),
        tmpDir.copyResourceFileName("shakes.txt"),
        tmpDir.getFileName("out1"),
        tmpDir.getFileName("out2"),
        false);
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

        false);
  }

  @Test
  public void testBreakpoint() throws Exception {
    run(new MRPipeline(BreakpointIT.class, tmpDir.getDefaultConfiguration()),
        tmpDir.copyResourceFileName("shakes.txt"),
        tmpDir.getFileName("out1"),
        tmpDir.getFileName("out2"),
        true);
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

  public transient TemporaryPath tmpDir = TemporaryPaths.create();

  @Test
  public void materializedColShouldBeWritten() throws Exception {
    File textFile = tmpDir.copyResourceFile("shakes.txt");
    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> genericCollection = pipeline.readTextFile(textFile.getAbsolutePath());
    pipeline.run();
    PCollection<String> filter = genericCollection.filter("Filtering data", FilterFns.<String>ACCEPT_ALL());
    filter.materialize();
    pipeline.run();
    File file = tmpDir.getFile("output.txt");
    Target outFile = To.textFile(file.getAbsolutePath());
    PCollection<String> write = filter.write(outFile);
    write.materialize();
    pipeline.run();
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.