Package eu.stratosphere.api.java.record.operators

Examples of eu.stratosphere.api.java.record.operators.FileDataSource


  //                      /
  //    Sc3(id,y) --------
  @Override
  protected Plan getTestJob() {
    // Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
    FileDataSource sc1 = new FileDataSource(new CsvInputFormat(), sc1Path);
    CsvInputFormat.configureRecordFormat(sc1).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1)
        .field(IntValue.class, 2).field(IntValue.class, 3);

    // Sc2 generates N x values to be evaluated with the polynomial identified by id
    FileDataSource sc2 = new FileDataSource(new CsvInputFormat(), sc2Path);
    CsvInputFormat.configureRecordFormat(sc2).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);

    // Sc3 generates N y values to be evaluated with the polynomial identified by id
    FileDataSource sc3 = new FileDataSource(new CsvInputFormat(), sc3Path);
    CsvInputFormat.configureRecordFormat(sc3).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);

    // Jn1 matches x and y values on id and emits (id, x, y) triples
    JoinOperator jn1 = JoinOperator.builder(Jn1.class, StringValue.class, 0, 0).input1(sc2).input2(sc3).build();
View Full Code Here


  }

  @Override
  protected Plan getTestJob() {
    FileDataSource input_left =  new FileDataSource(new CoGroupTestInFormat(), leftInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_left)
      .recordDelimiter('\n');
    input_left.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));

    FileDataSource input_right =  new FileDataSource(new CoGroupTestInFormat(), rightInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_right)
      .recordDelimiter('\n');
    input_right.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));

    CoGroupOperator testCoGrouper = CoGroupOperator.builder(new TestCoGrouper(), StringValue.class, 0, 0)
      .build();
    testCoGrouper.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));
    testCoGrouper.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
View Full Code Here

    return toParameterList(config1);
  }
 
  private static Plan getPlan(int numSubTasks, String input, String output) {
    FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
    initialInput.setDegreeOfParallelism(1);
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(2);
View Full Code Here

  protected JobGraph getJobGraph() throws Exception {
   
    String path1 = config.getBoolean("input1PathHasData", false) ? textInput : emptyInput;
    String path2 = config.getBoolean("input2PathHasData", false) ? textInput : emptyInput;
   
    FileDataSource input1 = new FileDataSource(new ContractITCaseInputFormat(), path1);
    FileDataSource input2 = new FileDataSource(new ContractITCaseInputFormat(), path2);
   
    MapOperator testMapper1 = MapOperator.builder(new TestMapper()).build();
    MapOperator testMapper2 = MapOperator.builder(new TestMapper()).build();

    FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultDir);
View Full Code Here

  }

  @Override
  protected Plan getTestJob() {
    FileDataSource input_left = new FileDataSource(
        new ContractITCaseInputFormat(), leftInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_left)
      .recordDelimiter('\n');
    input_left.setDegreeOfParallelism(config.getInteger("MatchTest#NoSubtasks", 1));

    FileDataSource input_right = new FileDataSource(
        new ContractITCaseInputFormat(), rightInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_right)
      .recordDelimiter('\n');
    input_right.setDegreeOfParallelism(config.getInteger("MatchTest#NoSubtasks", 1));

    JoinOperator testMatcher = JoinOperator.builder(new TestMatcher(), StringValue.class, 0, 0)
      .build();
    testMatcher.setDegreeOfParallelism(config.getInteger("MatchTest#NoSubtasks", 1));
    testMatcher.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
View Full Code Here

      WordCount wc = new WordCount();
      Plan p = wc.getPlan(DEFAULT_PARALLELISM_STRING, IN_FILE, OUT_FILE);
     
      OptimizedPlan plan;
      if (estimates) {
        FileDataSource source = getContractResolver(p).getNode("Input Lines");
        setSourceStatistics(source, 1024*1024*1024*1024L, 24f);
        plan = compileWithStats(p);
      } else {
        plan = compileNoStats(p);
      }
View Full Code Here

    checkWordCountWithSortedSink(false);
  }
 
  private void checkWordCountWithSortedSink(boolean estimates) {
    try {
      FileDataSource sourceNode = new FileDataSource(new TextInputFormat(), IN_FILE, "Input Lines");
      MapOperator mapNode = MapOperator.builder(new TokenizeLine())
        .input(sourceNode)
        .name("Tokenize Lines")
        .build();
      ReduceOperator reduceNode = ReduceOperator.builder(new CountWords(), StringValue.class, 0)
View Full Code Here

    KMeansBroadcast kmi = new KMeansBroadcast();
    Plan p = kmi.getPlan(String.valueOf(DEFAULT_PARALLELISM), IN_FILE, IN_FILE, OUT_FILE, String.valueOf(20));
   
    // set the statistics
    OperatorResolver cr = getContractResolver(p);
    FileDataSource pointsSource = cr.getNode(DATAPOINTS);
    FileDataSource centersSource = cr.getNode(CENTERS);
    setSourceStatistics(pointsSource, 100l*1024*1024*1024, 32f);
    setSourceStatistics(centersSource, 1024*1024, 32f);
   
    OptimizedPlan plan = compileWithStats(p);
    checkPlan(plan);
View Full Code Here

      boolean hashJoinFirstOkay, boolean hashJoinSecondOkay, boolean mergeJoinOkay)
  {
    try {
      // set statistics
      OperatorResolver cr = getContractResolver(p);
      FileDataSource ordersSource = cr.getNode(ORDERS);
      FileDataSource lineItemSource = cr.getNode(LINEITEM);
      MapOperator mapper = cr.getNode(MAPPER_NAME);
      JoinOperator joiner = cr.getNode(JOIN_NAME);
      setSourceStatistics(ordersSource, orderSize, 100f);
      setSourceStatistics(lineItemSource, lineitemSize, 140f);
      mapper.getCompilerHints().setAvgOutputRecordSize(16f);
View Full Code Here

    KMeansSingleStep kmi = new KMeansSingleStep();
    Plan p = kmi.getPlan(String.valueOf(DEFAULT_PARALLELISM), IN_FILE, IN_FILE, OUT_FILE, String.valueOf(20));
   
    // set the statistics
    OperatorResolver cr = getContractResolver(p);
    FileDataSource pointsSource = cr.getNode(DATAPOINTS);
    FileDataSource centersSource = cr.getNode(CENTERS);
    setSourceStatistics(pointsSource, 100l*1024*1024*1024, 32f);
    setSourceStatistics(centersSource, 1024*1024, 32f);
   
    OptimizedPlan plan = compileWithStats(p);
    checkPlan(plan);
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.operators.FileDataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.