Package eu.stratosphere.api.java.record.operators

Examples of eu.stratosphere.api.java.record.operators.FileDataSource


    final String clusterInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // create DataSourceContract for cluster center input
    FileDataSource initialClusterPoints = new FileDataSource(new PointInFormat(), clusterInput, "Centers");
    initialClusterPoints.setDegreeOfParallelism(1);
   
    BulkIteration iteration = new BulkIteration("K-Means Loop");
    iteration.setInput(initialClusterPoints);
    iteration.setMaximumNumberOfIterations(numIterations);
   
    // create DataSourceContract for data point input
    FileDataSource dataPoints = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points");

    // create CrossOperator for distance computation
    CrossOperator computeDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints)
        .input2(iteration.getPartialSolution())
        .name("Compute Distances")
        .build();

    // create ReduceOperator for finding the nearest cluster centers
    ReduceOperator findNearestClusterCenters = ReduceOperator.builder(new FindNearestCenter(), IntValue.class, 0)
        .input(computeDistance)
        .name("Find Nearest Centers")
        .build();

    // create ReduceOperator for computing new cluster positions
    ReduceOperator recomputeClusterCenter = ReduceOperator.builder(new RecomputeClusterCenter(), IntValue.class, 0)
        .input(findNearestClusterCenters)
        .name("Recompute Center Positions")
        .build();
    iteration.setNextPartialSolution(recomputeClusterCenter);
   
    // create DataSourceContract for data point input
    FileDataSource dataPoints2 = new FileDataSource(new PointInFormat(), dataPointInput, "Data Points 2");
   
    // compute distance of points to final clusters
    CrossOperator computeFinalDistance = CrossOperator.builder(new ComputeDistance())
        .input1(dataPoints2)
        .input2(iteration)
View Full Code Here


   
    int dop = this.config.getInteger("GroupOrderTest#NumSubtasks", 1);
   
    @SuppressWarnings("unchecked")
    CsvInputFormat format = new CsvInputFormat(',', IntValue.class, IntValue.class);
    FileDataSource source = new FileDataSource(format, this.textPath, "Source");
   
    ReduceOperator reducer = ReduceOperator.builder(CheckingReducer.class)
      .keyField(IntValue.class, 0)
      .input(source)
      .name("Ordered Reducer")
View Full Code Here

    String dataInput = (args.length > 1 ? args[1] : "");
    String output = (args.length > 2 ? args[2] : "");

    @SuppressWarnings("unchecked")
    CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class);
    FileDataSource input = new FileDataSource(format, dataInput, "Input");
   
    // create the reduce contract and sets the key to the first field
    ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0)
      .input(input)
      .name("Reducer")
View Full Code Here

    final String edgeInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int maxIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    // the loop takes the vertices as the solution set and changed vertices as the workset
    // initially, all vertices are changed
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // data source for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // join workset (changed vertices) with the edges to propagate changes to neighbors
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here

    String clusterInput = (args.length > 2 ? args[2] : "");
    String output = (args.length > 3 ? args[3] : "");
    int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 2);

    // data source data point input
    @SuppressWarnings("unchecked")
    FileDataSource pointsSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), dataPointInput, "Data Points");

    // data source for cluster center input
    @SuppressWarnings("unchecked")
    FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
   
    MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
   
    MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();
   
View Full Code Here

    final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    final String input = (args.length > 1 ? args[1] : "");
    final String output = (args.length > 2 ? args[2] : "");

    // This task will read the input data and generate the key/value pairs
    final FileDataSource source =
        new FileDataSource(new TeraInputFormat(), input, "Data Source");
    source.setDegreeOfParallelism(numSubTasks);

    // This task writes the sorted data back to disk
    final FileDataSink sink =
        new FileDataSink(new TeraOutputFormat(), output, "Data Sink");
    sink.setDegreeOfParallelism(numSubTasks);
View Full Code Here

  @Override
  protected JobGraph getFailingJobGraph() throws Exception {
   
    // init data source
    FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath);

    // init failing map task
    MapOperator testMapper = MapOperator.builder(FailingMapper.class).build();

    // init data sink
View Full Code Here

  @Override
  protected JobGraph getJobGraph() throws Exception {
   
    // init data source
    FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath);

    // init (working) map task
    MapOperator testMapper = MapOperator.builder(TestMapper.class).build();

    // init data sink
View Full Code Here

  @Test
  public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
    final int degOfPar = DEFAULT_PARALLELISM;
   
    // construct the plan
    FileDataSource source = new FileDataSource(new DummyInputFormat(), IN_FILE, "Source");
    source.setDegreeOfParallelism(degOfPar);
   
    MapOperator map1 = MapOperator.builder(new IdentityMap()).name("Map1").build();
    map1.setDegreeOfParallelism(degOfPar);
    map1.setInput(source);
   
View Full Code Here

      // parse program parameters
      int numSubtasks       = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
      String recordsPath    = (args.length > 1 ? args[1] : "");
      String output        = (args.length > 2 ? args[2] : "");
     
      FileDataSource source = new FileDataSource(CsvInputFormat.class, recordsPath);
      source.setDegreeOfParallelism(numSubtasks);
      CsvInputFormat.configureRecordFormat(source)
        .recordDelimiter('\n')
        .fieldDelimiter('|')
        .field(IntValue.class, 0);
     
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.operators.FileDataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.