Package eu.stratosphere.api.java.record.operators

Examples of eu.stratosphere.api.java.record.operators.FileDataSource


    int numSubtasksInput2 = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // create DataSourceContract for Orders input
    @SuppressWarnings("unchecked")
    CsvInputFormat format1 = new CsvInputFormat('|', IntValue.class, IntValue.class);
    FileDataSource input1 = new FileDataSource(format1, input1Path, "Input 1");
   
    ReduceOperator aggInput1 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0)
      .input(input1)
      .name("AggOrders")
      .build();

   
    // create DataSourceContract for Orders input
    @SuppressWarnings("unchecked")
    CsvInputFormat format2 = new CsvInputFormat('|', IntValue.class, IntValue.class);
    FileDataSource input2 = new FileDataSource(format2, input2Path, "Input 2");
    input2.setDegreeOfParallelism(numSubtasksInput2);

    ReduceOperator aggInput2 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0)
      .input(input2)
      .name("AggLines")
      .build();
View Full Code Here


  @Override
  protected Plan getTestJob() {
    String input1Path = config.getString("UnionTest#Input1Path", "").equals("empty") ? emptyInPath : inPath;
    String input2Path = config.getString("UnionTest#Input2Path", "").equals("empty") ? emptyInPath : inPath;

    FileDataSource input1 = new FileDataSource(
      new ContractITCaseInputFormat(), input1Path);
    DelimitedInputFormat.configureDelimitedFormat(input1)
      .recordDelimiter('\n');
    input1.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));
   
    FileDataSource input2 = new FileDataSource(
        new ContractITCaseInputFormat(), input2Path);
    DelimitedInputFormat.configureDelimitedFormat(input2)
      .recordDelimiter('\n');
    input2.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));
   
    MapOperator testMapper = MapOperator.builder(new TestMapper()).build();
    testMapper.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));

    FileDataSink output = new FileDataSink(
View Full Code Here

   
    String lineitemsPath = (args.length > 5 ? args[5] : "");
    String output        = (args.length > 6 ? args[6] : "");

    // create DataSourceContract for Orders input
    FileDataSource orders1 = new FileDataSource(new CsvInputFormat(), orders1Path, "Orders 1");
    CsvInputFormat.configureRecordFormat(orders1)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)    // order id
      .field(IntValue.class, 7)     // ship prio
      .field(StringValue.class, 2, 2// order status
      .field(StringValue.class, 4, 10// order date
      .field(StringValue.class, 5, 8)// order prio
   
    FileDataSource orders2 = new FileDataSource(new CsvInputFormat(), orders2Path, "Orders 2");
    CsvInputFormat.configureRecordFormat(orders2)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)    // order id
      .field(IntValue.class, 7)     // ship prio
      .field(StringValue.class, 2, 2// order status
      .field(StringValue.class, 4, 10// order date
      .field(StringValue.class, 5, 8)// order prio
   
    // create DataSourceContract for LineItems input
    FileDataSource lineitems = new FileDataSource(new CsvInputFormat(), lineitemsPath, "LineItems");
    CsvInputFormat.configureRecordFormat(lineitems)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)
      .field(DoubleValue.class, 5);

    // create MapOperator for filtering Orders tuples
    MapOperator filterO1 = MapOperator.builder(new FilterO())
      .name("FilterO")
      .input(orders1)
      .build();
    // filter configuration
    filterO1.setParameter(TPCHQuery3.YEAR_FILTER, 1993);
    filterO1.setParameter(TPCHQuery3.PRIO_FILTER, "5");
    filterO1.getCompilerHints().setFilterFactor(0.05f);
   
    // create MapOperator for filtering Orders tuples
    MapOperator filterO2 = MapOperator.builder(new FilterO())
      .name("FilterO")
      .input(orders2)
      .build();
    // filter configuration
    filterO2.setParameter(TPCHQuery3.YEAR_FILTER, 1993);
    filterO2.setParameter(TPCHQuery3.PRIO_FILTER, "5");

    // create JoinOperator for joining Orders and LineItems
    @SuppressWarnings("unchecked")
    JoinOperator joinLiO = JoinOperator.builder(new JoinLiO(), LongValue.class, 0, 0)
      .input1(filterO2, filterO1)
      .input2(lineitems)
      .name("JoinLiO")
      .build();
   
    FileDataSource partJoin1 = new FileDataSource(new CsvInputFormat(), partJoin1Path, "Part Join 1");
    CsvInputFormat.configureRecordFormat(partJoin1)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)
      .field(IntValue.class, 1)
      .field(DoubleValue.class, 2);
   
    FileDataSource partJoin2 = new FileDataSource(new CsvInputFormat(), partJoin2Path, "Part Join 2");
    CsvInputFormat.configureRecordFormat(partJoin2)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)
      .field(IntValue.class, 1)
View Full Code Here

    int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String paths     = (args.length > 1 ? args[1] : "");
    String output    = (args.length > 2 ? args[2] : "");
    boolean rdfInput = (args.length > 3 ? Boolean.parseBoolean(args[3]) : false);

    FileDataSource pathsInput;
   
    if(rdfInput) {
      pathsInput = new FileDataSource(new RDFTripleInFormat(), paths, "RDF Triples");
    } else {
      pathsInput = new FileDataSource(new PathInFormat(), paths, "Paths");
    }
    pathsInput.setDegreeOfParallelism(numSubTasks);

    JoinOperator concatPaths =
        JoinOperator.builder(new ConcatPaths(), StringValue.class, 0, 1)
      .name("Concat Paths")
      .build();
View Full Code Here

        }
    }

    static Plan getTestPlan(int numSubTasks, String input, String output) {

        FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
        initialInput.setDegreeOfParallelism(1);

        BulkIteration iteration = new BulkIteration("Loop");
        iteration.setInput(initialInput);
        iteration.setMaximumNumberOfIterations(2);
View Full Code Here

      customersPath = args[3];
      nationsPath = args[4];
      resultPath = args[5];
    }
   
    FileDataSource orders = new FileDataSource(new IntTupleDataInFormat(), ordersPath, "Orders");
    // orders.setOutputContract(UniqueKey.class);
    // orders.getCompilerHints().setAvgNumValuesPerKey(1);

    FileDataSource lineitems = new FileDataSource(new IntTupleDataInFormat(), lineitemsPath, "LineItems");
    // lineitems.getCompilerHints().setAvgNumValuesPerKey(4);

    FileDataSource customers = new FileDataSource(new IntTupleDataInFormat(), customersPath, "Customers");

    FileDataSource nations = new FileDataSource(new IntTupleDataInFormat(), nationsPath, "Nations");


    MapOperator mapO = MapOperator.builder(FilterO.class)
      .name("FilterO")
      .build();
View Full Code Here

 
  @SuppressWarnings("unchecked")
  public static Plan getPlan(int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations, boolean extraMap) {

    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    // the loop takes the vertices as the solution set and changed vertices as the workset
    // initially, all vertices are changed
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // data source for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // join workset (changed vertices) with the edges to propagate changes to neighbors
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here

    }
  }

  @Override
  protected Plan getTestJob() {
    FileDataSource input = new FileDataSource(
        new ContractITCaseInputFormat(), inPath);
    DelimitedInputFormat.configureDelimitedFormat(input)
      .recordDelimiter('\n');
    input.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1));

    MapOperator testMapper = MapOperator.builder(new TestMapper()).build();
    testMapper.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1));

    FileDataSink output = new FileDataSink(
View Full Code Here

  public Plan getPlan(String... args) {
    int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output = (args.length > 2 ? args[2] : "");

    FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");

    MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
   
    ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
        .name("Count Words").build();
View Full Code Here

    }
  }

  @Override
  protected JobGraph getJobGraph() throws Exception {
    FileDataSource input = new FileDataSource(
        new ContractITCaseInputFormat(), inPath);
    DelimitedInputFormat.configureDelimitedFormat(input)
      .recordDelimiter('\n');
    input.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1));

    ReduceOperator testReducer = ReduceOperator.builder(new TestReducer(), StringValue.class, 0)
      .build();
    testReducer.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1));
    testReducer.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.operators.FileDataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.