Package eu.stratosphere.api.java.record.operators

Examples of eu.stratosphere.api.java.record.operators.FileDataSource


    final String ordersPath    = (args.length > 1 ? args[1] : "");
    final String lineitemsPath = (args.length > 2 ? args[2] : "");
    final String output        = (args.length > 3 ? args[3] : "");

    // create DataSourceContract for Orders input
    FileDataSource orders = new FileDataSource(new CsvInputFormat(), ordersPath, "Orders");
    CsvInputFormat.configureRecordFormat(orders)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)    // order id
      .field(IntValue.class, 7)     // ship prio
      .field(StringValue.class, 2, 2// order status
      .field(StringValue.class, 4, 10// order date
      .field(StringValue.class, 5, 8)// order prio

    // create DataSourceContract for LineItems input
    FileDataSource lineitems = new FileDataSource(new CsvInputFormat(), lineitemsPath, "LineItems");
    CsvInputFormat.configureRecordFormat(lineitems)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)    // order id
      .field(DoubleValue.class, 5)// extended price
View Full Code Here


      outputPath = args[3];
      numIterations = Integer.parseInt(args[4]);
      numVertices = Long.parseLong(args[5]);
    }
   
    FileDataSource pageWithRankInput = new FileDataSource(new DanglingPageRankInputFormat(),
      pageWithRankInputPath, "PageWithRank Input");
    pageWithRankInput.getParameters().setLong(NUM_VERTICES_CONFIG_PARAM, numVertices);
   
    BulkIteration iteration = new BulkIteration("Page Rank Loop");
    iteration.setInput(pageWithRankInput);
   
    FileDataSource adjacencyListInput = new FileDataSource(new ImprovedAdjacencyListInputFormat(),
      adjacencyListInputPath, "AdjancencyListInput");
   
    JoinOperator join = JoinOperator.builder(new JoinVerexWithEdgesMatch(), LongValue.class, 0, 0)
        .input1(iteration.getPartialSolution())
        .input2(adjacencyListInput)
View Full Code Here

    return getTestPlanPlan(4, dataPath, resultPath);
  }
 
  private static Plan getTestPlanPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(5);
    Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1);
View Full Code Here

     * 1: DOCUMENT_TEXT
     */
    // Create DataSourceContract for documents relation
    @SuppressWarnings("unchecked")
    CsvInputFormat docsFormat = new CsvInputFormat('|', StringValue.class, StringValue.class);
    FileDataSource docs = new FileDataSource(docsFormat, docsInput, "Docs Input");
   
    /*
     * Output Format:
     * 0: URL
     * 1: RANK
     * 2: AVG_DURATION
     */
    // Create DataSourceContract for ranks relation
    FileDataSource ranks = new FileDataSource(new CsvInputFormat(), ranksInput, "Ranks input");
    CsvInputFormat.configureRecordFormat(ranks)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(StringValue.class, 1)
      .field(IntValue.class, 0)
      .field(IntValue.class, 2);

    /*
     * Output Format:
     * 0: URL
     * 1: DATE
     */
    // Create DataSourceContract for visits relation
    @SuppressWarnings("unchecked")
    CsvInputFormat visitsFormat = new CsvInputFormat('|', null, StringValue.class, StringValue.class);
    FileDataSource visits = new FileDataSource(visitsFormat, visitsInput, "Visits input:q");

    // Create MapOperator for filtering the entries from the documents
    // relation
    MapOperator filterDocs = MapOperator.builder(new FilterDocs())
      .input(docs)
View Full Code Here

      // parse program parameters
      final int numSubtasks     = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
      final String recordsPath = (args.length > 1 ? args[1] : "");
      final String output      = (args.length > 2 ? args[2] : "");
     
      @SuppressWarnings("unchecked")
      FileDataSource source = new FileDataSource(new CsvInputFormat(',', IntValue.class, IntValue.class, IntValue.class), recordsPath);
     
      FileDataSink sink = new FileDataSink(CsvOutputFormat.class, output);
      CsvOutputFormat.configureRecordFormat(sink)
        .recordDelimiter('\n')
        .fieldDelimiter(',')
View Full Code Here

   * First cross has SameKeyFirst output contract
   */
  @Test
  public void testTicket158() {
    // construct the plan
    FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
   
    MapOperator map = MapOperator.builder(new IdentityMap()).name("Map1").input(source).build();
   
    ReduceOperator reduce1 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce1").input(map).build();
   
View Full Code Here

  }

 
  private static Plan getTestPlanPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(5);
   
View Full Code Here

    }else
    {
      setArgs(args);
    }
   
    FileDataSource orders =
      new FileDataSource(new IntTupleDataInFormat(), this.ordersInputPath, "Orders");
    orders.setDegreeOfParallelism(this.degreeOfParallelism);
    //orders.setOutputContract(UniqueKey.class);
   
    FileDataSource lineItems =
      new FileDataSource(new IntTupleDataInFormat(), this.lineItemInputPath, "LineItems");
    lineItems.setDegreeOfParallelism(this.degreeOfParallelism);
   
    FileDataSink result =
        new FileDataSink(new StringTupleDataOutFormat(), this.outputPath, "Output");
    result.setDegreeOfParallelism(degreeOfParallelism);
   
View Full Code Here

    // parse job parameters
    int numSubTasks   = args.length > 0 ? Integer.parseInt(args[0]) : 1;
    String edgeInput = args.length > 1 ? args[1] : "";
    String output    = args.length > 2 ? args[2] : "";

    FileDataSource edges = new FileDataSource(new EdgeWithDegreesInputFormat(), edgeInput, "Input Edges with Degrees");
    edges.setParameter(EdgeWithDegreesInputFormat.VERTEX_DELIMITER_CHAR, '|');
    edges.setParameter(EdgeWithDegreesInputFormat.DEGREE_DELIMITER_CHAR, ',');

    // =========================== Triangle Enumeration ============================
   
    MapOperator toLowerDegreeEdge = MapOperator.builder(new ProjectToLowerDegreeVertex())
        .input(edges)
View Full Code Here

public class ReduceAllTest extends CompilerTestBase {

  @Test
  public void testReduce() {
    // construct the plan
    FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
    ReduceOperator reduce1 = ReduceOperator.builder(new IdentityReduce()).name("Reduce1").input(source).build();
    FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, "Sink");
    sink.setInput(reduce1);
    Plan plan = new Plan(sink, "AllReduce Test");
    plan.setDefaultParallelism(DEFAULT_PARALLELISM);
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.operators.FileDataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.