Examples of eu.stratosphere.api.java.record.operators.FileDataSource

Package eu.stratosphere.api.java.record.operators

Examples of eu.stratosphere.api.java.record.operators.FileDataSource

eu.stratosphere.api.java.record.operators.FileDataSource
Operator for input nodes which read data from files. (For Record data model)

    final String clusterInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);


    // create DataSourceContract for cluster center input
    FileDataSource initialClusterPoints = new FileDataSource(new PointInFormat(), clusterInput, "Centers");
    initialClusterPoints.setDegreeOfParallelism(1);
    
    BulkIteration iteration = new BulkIteration("K-Means Loop");
    iteration.setInput(initialClusterPoints);
    iteration.setMaximumNumberOfIterations(numIterations);
    
    // create DataSourceContract for data point input
    FileDataSource dataPoints = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points");


    // create CrossOperator for distance computation
    CrossOperator computeDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints)
        .input2(iteration.getPartialSolution())
        .name("Compute Distances")
        .build();


    // create ReduceOperator for finding the nearest cluster centers
    ReduceOperator findNearestClusterCenters = ReduceOperator.builder(new FindNearestCenter(), IntValue.class, 0)
        .input(computeDistance)
        .name("Find Nearest Centers")
        .build();


    // create ReduceOperator for computing new cluster positions
    ReduceOperator recomputeClusterCenter = ReduceOperator.builder(new RecomputeClusterCenter(), IntValue.class, 0)
        .input(findNearestClusterCenters)
        .name("Recompute Center Positions")
        .build();
    iteration.setNextPartialSolution(recomputeClusterCenter);
    
    // create DataSourceContract for data point input
    FileDataSource dataPoints2 = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points 2");
    
    // compute distance of points to final clusters 
    CrossOperator computeFinalDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints2)
        .input2(iteration)

View Full Code Here

    
    int dop = this.config.getInteger("GroupOrderTest#NumSubtasks", 1);
    
    @SuppressWarnings("unchecked")
    CsvInputFormat format = new CsvInputFormat(',', IntValue.class, IntValue.class);
    FileDataSource source = new FileDataSource(format, this.textPath, "Source");
    
    ReduceOperator reducer = ReduceOperator.builder(CheckingReducer.class)
      .keyField(IntValue.class, 0)
      .input(source)
      .name("Ordered Reducer")

View Full Code Here

    String dataInput = (args.length > 1 ? args[1] : "");
    String output = (args.length > 2 ? args[2] : "");


    @SuppressWarnings("unchecked")
    CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class);
    FileDataSource input = new FileDataSource(format, dataInput, "Input");
    
    // create the reduce contract and sets the key to the first field
    ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0)
      .input(input)
      .name("Reducer")

View Full Code Here

    final String edgeInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int maxIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);


    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
    
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
    
    // the loop takes the vertices as the solution set and changed vertices as the workset
    // initially, all vertices are changed
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
    
    // data source for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");


    // join workset (changed vertices) with the edges to propagate changes to neighbors
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)

View Full Code Here

    String clusterInput = (args.length > 2 ? args[2] : "");
    String output = (args.length > 3 ? args[3] : "");
    int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 2);


    // data source data point input
    @SuppressWarnings("unchecked")
    FileDataSource pointsSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), dataPointInput, "Data Points");


    // data source for cluster center input
    @SuppressWarnings("unchecked")
    FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
    
    MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
    
    MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();

View Full Code Here

    final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    final String input = (args.length > 1 ? args[1] : "");
    final String output = (args.length > 2 ? args[2] : "");


    // This task will read the input data and generate the key/value pairs
    final FileDataSource source = 
        new FileDataSource(new TeraInputFormat(), input, "Data Source");
    source.setDegreeOfParallelism(numSubTasks);


    // This task writes the sorted data back to disk
    final FileDataSink sink = 
        new FileDataSink(new TeraOutputFormat(), output, "Data Sink");
    sink.setDegreeOfParallelism(numSubTasks);

View Full Code Here


  @Override
  protected JobGraph getFailingJobGraph() throws Exception {
    
    // init data source 
    FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath);


    // init failing map task
    MapOperator testMapper = MapOperator.builder(FailingMapper.class).build();


    // init data sink

View Full Code Here


  @Override
  protected JobGraph getJobGraph() throws Exception {
    
    // init data source 
    FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath);


    // init (working) map task
    MapOperator testMapper = MapOperator.builder(TestMapper.class).build();


    // init data sink

View Full Code Here

  @Test
  public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
    final int degOfPar = DEFAULT_PARALLELISM;
    
    // construct the plan
    FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
    source.setDegreeOfParallelism(degOfPar);
    
    MapOperator map1 = MapOperator.builder(new IdentityMap()).name("Map1").build();
    map1.setDegreeOfParallelism(degOfPar);
    map1.setInput(source);

View Full Code Here

      // parse program parameters
      int numSubtasks       = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
      String recordsPath    = (args.length > 1 ? args[1] : "");
      String output        = (args.length > 2 ? args[2] : "");
      
      FileDataSource source = new FileDataSource(CsvInputFormat.class, recordsPath);
      source.setDegreeOfParallelism(numSubtasks);
      CsvInputFormat.configureRecordFormat(source)
        .recordDelimiter('\n')
        .fieldDelimiter('|')
        .field(IntValue.class, 0);

View Full Code Here

0 1 2 3 4 5 6 7 8

TOP

Related Classes of eu.stratosphere.api.java.record.operators.FileDataSource

eu.stratosphere.pact.compiler.AdditionalOperatorsTest

eu.stratosphere.pact.compiler.BranchingPlansCompilerTest

eu.stratosphere.pact.compiler.DOPChangeTest

eu.stratosphere.pact.compiler.GroupOrderTest

eu.stratosphere.pact.compiler.HardPlansCompilationTest

eu.stratosphere.pact.compiler.ReduceAllTest

eu.stratosphere.pact.compiler.UnionPropertyPropagationTest

eu.stratosphere.pact.compiler.WorksetIterationsRecordApiCompilerTest

eu.stratosphere.test.accumulators.AccumulatorITCase

eu.stratosphere.test.accumulators.AccumulatorIterativeITCase

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.