Package eu.stratosphere.api.java.record.operators

Examples of eu.stratosphere.api.java.record.operators.FileDataSource


  @SuppressWarnings("unchecked")
  private static Plan getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(
      int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations)
  {
    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here


public class GroupOrderTest extends CompilerTestBase  {

  @Test
  public void testReduceWithGroupOrder() {
    // construct the plan
    FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
   
    ReduceOperator reduce = ReduceOperator.builder(new IdentityReduce()).keyField(IntValue.class, 2).name("Reduce").input(source).build();
    Ordering groupOrder = new Ordering(5, StringValue.class, Order.DESCENDING);
    reduce.setGroupOrder(groupOrder);
   
View Full Code Here

  }
 
  @Test
  public void testCoGroupWithGroupOrder() {
    // construct the plan
    FileDataSource source1 = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source1");
    FileDataSource source2 = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source2");
   
    CoGroupOperator coGroup = CoGroupOperator.builder(new DummyCoGroupStub(), IntValue.class, 3, 6)
        .keyField(LongValue.class, 0, 0)
        .name("CoGroup").input1(source1).input2(source2).build();
   
View Full Code Here

    final String edgeInput = args.length > 1 ? args[1] : "";
    final String output    = args.length > 2 ? args[2] : "";
    final char delimiter   = args.length > 3 ? (char) Integer.parseInt(args[3]) : ',';
   

    FileDataSource edges = new FileDataSource(new EdgeInputFormat(), edgeInput, "Input Edges");
    edges.setParameter(EdgeInputFormat.ID_DELIMITER_CHAR, delimiter);

    // =========================== Vertex Degree ============================
   
    MapOperator projectEdge = MapOperator.builder(new ProjectEdge())
        .input(edges).name("Project Edge").build();
View Full Code Here

    /*
     * Output Schema:
     * 0: CUSTOMER_ID
     */
    // create DataSourceContract for Orders input
    FileDataSource orders = new FileDataSource(new CsvInputFormat(), ordersPath, "Orders");
    orders.setDegreeOfParallelism(numSubtasks);
    CsvInputFormat.configureRecordFormat(orders)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(IntValue.class, 1);
   
    /*
     * Output Schema:
     * 0: CUSTOMER_ID
     * 1: MKT_SEGMENT
     */
    // create DataSourceContract for Customer input
    FileDataSource customers = new FileDataSource(new CsvInputFormat(), customerPath, "Customers");
    customers.setDegreeOfParallelism(numSubtasks);
    CsvInputFormat.configureRecordFormat(customers)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(IntValue.class, 0)
      .field(StringValue.class, 6);
View Full Code Here

 
  @SuppressWarnings("unchecked")
  public static Plan getPlan(int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations) {

    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here

  }

  @Override
  protected Plan getTestJob() {

    FileDataSource input_left = new FileDataSource(
        new ContractITCaseInputFormat(), leftInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_left)
      .recordDelimiter('\n');
    input_left.setDegreeOfParallelism(config.getInteger("CrossTest#NoSubtasks", 1));

    FileDataSource input_right = new FileDataSource(
        new ContractITCaseInputFormat(), rightInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_right)
      .recordDelimiter('\n');
    input_right.setDegreeOfParallelism(config.getInteger("CrossTest#NoSubtasks", 1));

    CrossOperator testCross = CrossOperator.builder(new TestCross()).build();
    testCross.setDegreeOfParallelism(config.getInteger("CrossTest#NoSubtasks", 1));
    testCross.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
        config.getString("CrossTest#LocalStrategy", ""));
View Full Code Here

      this.degreeOfParallelism = Integer.parseInt(args[0]);
      this.lineItemInputPath = args[1];
      this.outputPath = args[2];
    }
   
    FileDataSource lineItems =
      new FileDataSource(new IntTupleDataInFormat(), this.lineItemInputPath, "LineItems");
    lineItems.setDegreeOfParallelism(this.degreeOfParallelism);
   
    FileDataSink result =
      new FileDataSink(new StringTupleDataOutFormat(), this.outputPath, "Output");
    result.setDegreeOfParallelism(this.degreeOfParallelism);
   
View Full Code Here

    // parse job parameters
    int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String edgeInput = (args.length > 1 ? args[1] : "");
    String output    = (args.length > 2 ? args[2] : "");

    FileDataSource edges = new FileDataSource(new EdgeInFormat(), edgeInput, "BTC Edges");
   
    ReduceOperator buildTriads = ReduceOperator.builder(new BuildTriads(), StringValue.class, 0)
      .name("Build Triads")
      .build();
View Full Code Here

    return getTestPlanPlan(4, dataPath, resultPath);
  }
 
  private static Plan getTestPlanPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(5);
    Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1);
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.operators.FileDataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.