Examples of MRPipeline


Examples of org.apache.crunch.impl.mr.MRPipeline

 
 
 
  @Test
  public void testPGroupedTableToMultipleOutputs() throws IOException{
    Pipeline pipeline = new MRPipeline(MRPipelineIT.class, tmpDir.getDefaultConfiguration());
    PGroupedTable<String, String> groupedLineTable = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt")).by(IdentityFn.<String>getInstance(), Writables.strings()).groupByKey();
   
    PTable<String, String> ungroupedTableA = groupedLineTable.ungroup();
    PTable<String, String> ungroupedTableB = groupedLineTable.ungroup();
   
    File outputDirA = tmpDir.getFile("output_a");
    File outputDirB = tmpDir.getFile("output_b");
   
    pipeline.writeTextFile(ungroupedTableA, outputDirA.getAbsolutePath());
    pipeline.writeTextFile(ungroupedTableB, outputDirB.getAbsolutePath());
    PipelineResult result = pipeline.done();
    for(StageResult stageResult : result.getStageResults()){
      assertTrue(stageResult.getStageName().length() > 1);
      assertTrue(stageResult.getStageId().length() > 1);
    }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    MemPipeline.clearCounters();
  }
 
  @Test
  public void testStageResultsCountersMRWritables() throws Exception {
    testSpecialKeywordCount(new MRPipeline(StageResultsCountersIT.class, tmpDir.getDefaultConfiguration()),
        WritableTypeFamily.getInstance());
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

        WritableTypeFamily.getInstance());
  }

  @Test
  public void testStageResultsCountersMRAvro() throws Exception {
    testSpecialKeywordCount(new MRPipeline(StageResultsCountersIT.class, tmpDir.getDefaultConfiguration()),
        AvroTypeFamily.getInstance());
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

  @Test
  public void testVanillaCSV() throws Exception {
    String[] expectedFileContents = { "1,2,3,4", "5,6,7,8", "9,10,11", "12,13,14" };

    String vanillaCSVFile = tmpDir.copyResourceFileName("vanilla.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(vanillaCSVFile)));
    pipeline.run();

    Collection<String> csvLinesList = csvLines.asCollection().getValue();

    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    String[] expectedFileContents = {
        "\"Champion, Mac\",\"1234 Hoth St.\n\tApartment 101\n\tAtlanta, GA\n\t64086\",\"30\",\"M\",\"5/28/2010 12:00:00 AM\",\"Just some guy\"",
        "\"Champion, Mac\",\"5678 Tatooine Rd. Apt 5, Mobile, AL 36608\",\"30\",\"M\",\"Some other date\",\"short description\"" };

    String csvWithNewlines = tmpDir.copyResourceFileName("withNewlines.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(csvWithNewlines)));
    pipeline.run();

    Collection<String> csvLinesList = csvLines.asCollection().getValue();

    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    String[] expectedFileContents = {
        "*Champion, Mac*,*1234 Hoth St.\n\tApartment 101\n\tAtlanta, GA\n\t64086*,*30*,*M*,*5/28/2010 12:00:00 AM*,*Just some guy*",
        "*Mac, Champion*,*5678 Tatooine Rd. Apt 5, Mobile, AL 36608*,*30*,*M*,*Some other date*,*short description*" };

    String csvWithNewlines = tmpDir.copyResourceFileName("customQuoteCharWithNewlines.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(csvWithNewlines),
        CSVLineReader.DEFAULT_BUFFER_SIZE, CSVLineReader.DEFAULT_INPUT_FILE_ENCODING, '*', '*',
        CSVLineReader.DEFAULT_ESCAPE_CHARACTER));
    pipeline.run();

    Collection<String> csvLinesList = csvLines.asCollection().getValue();

    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

  public void testBrokenLineParsingInChinese() throws IOException {
    final String[] expectedChineseLines = { "您好我叫马克,我从亚拉巴马州来,我是软件工程师,我二十八岁", "我有一个宠物,它是一个小猫,它六岁,它很漂亮",
        "我喜欢吃饭,“我觉得这个饭最好\n*蛋糕\n*包子\n*冰淇淋\n*啤酒“,他们都很好,我也很喜欢奶酪但它是不健康的", "我是男的,我的头发很短,我穿蓝色的裤子,“我穿黑色的、“衣服”" };
    String chineseLines = tmpDir.copyResourceFileName("brokenChineseLines.csv");

    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(chineseLines),
        CSVLineReader.DEFAULT_BUFFER_SIZE, CSVLineReader.DEFAULT_INPUT_FILE_ENCODING, '“', '”', '、'));
    pipeline.run();
    Collection<String> csvLinesList = csvLines.asCollection().getValue();
    for (int i = 0; i < expectedChineseLines.length; i++) {
      assertTrue(csvLinesList.contains(expectedChineseLines[i]));
    }
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    runMapsideLeftOuterJoin(MemPipeline.getInstance(), true);
  }

  @Test
  public void testMapsideJoin_RightSideIsEmpty() throws IOException {
    MRPipeline pipeline = new MRPipeline(MapsideJoinStrategyIT.class, tmpDir.getDefaultConfiguration());
    PTable<Integer, String> customerTable = readTable(pipeline, "customers.txt");
    PTable<Integer, String> orderTable = readTable(pipeline, "orders.txt");

    PTable<Integer, String> filteredOrderTable = orderTable
        .parallelDo(FilterFns.<Pair<Integer, String>>REJECT_ALL(), orderTable.getPTableType());
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    assertTrue(materializedJoin.isEmpty());
  }

  @Test
  public void testMapsideJoin() throws IOException {
    runMapsideJoin(new MRPipeline(MapsideJoinStrategyIT.class, tmpDir.getDefaultConfiguration()), false);
  }
View Full Code Here

Examples of org.apache.crunch.impl.mr.MRPipeline

    runMapsideJoin(new MRPipeline(MapsideJoinStrategyIT.class, tmpDir.getDefaultConfiguration()), false);
  }
 
  @Test
  public void testMapsideJoin_LeftOuterJoin() throws IOException {
    runMapsideLeftOuterJoin(new MRPipeline(MapsideJoinStrategyIT.class, tmpDir.getDefaultConfiguration()), false);
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.