Package eu.stratosphere.api.java.record.operators

Examples of eu.stratosphere.api.java.record.operators.FileDataSink


      .keyField(IntValue.class, 1)
      .input(edgeCounter)
      .name("Join Counts")
      .build();

    FileDataSink triangles = new FileDataSink(new EdgeWithDegreesOutputFormat(), output, countJoiner, "Unique Edges With Degrees");

    Plan p = new Plan(triangles, "Normalize Edges and compute Vertex Degrees");
    p.setDefaultParallelism(numSubTasks);
    return p;
  }
View Full Code Here


   
    iteration.setNextWorkset(minAndUpdate);
    iteration.setSolutionSetDelta(minAndUpdate);

    // create DataSinkContract for writing the new cluster positions
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
    CsvOutputFormat.configureRecordFormat(result)
      .recordDelimiter('\n')
      .fieldDelimiter(' ')
      .field(LongValue.class, 0)
      .field(LongValue.class, 1);
View Full Code Here

      .build();
    ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
      .input(mapper)
      .name("Count Words")
      .build();
    @SuppressWarnings("unchecked")
    FileDataSink out = new FileDataSink(new CsvOutputFormat("\n"," ", StringValue.class, IntValue.class), output, reducer, "Word Counts");

    Plan plan = new Plan(out, "WordCount Example");
    plan.setDefaultParallelism(numSubTasks);
   
    return plan;
View Full Code Here

        .input(computeFinalDistance)
        .name("Find Nearest Final Centers")
        .build();

    // create DataSinkContract for writing the new cluster positions
    FileDataSink finalClusters = new FileDataSink(new PointOutFormat(), output+"/centers", iteration, "Cluster Positions");

    // write assigned clusters
    FileDataSink clusterAssignments = new FileDataSink(new PointOutFormat(), output+"/points", findNearestFinalCluster, "Cluster Assignments");
   
    List<FileDataSink> sinks = new ArrayList<FileDataSink>();
    sinks.add(finalClusters);
    sinks.add(clusterAssignments);
   
View Full Code Here

      .input(source)
      .name("Ordered Reducer")
      .build();
    reducer.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));
   
    FileDataSink sink = new FileDataSink(CsvOutputFormat.class, this.resultPath, reducer, "Sink");
    CsvOutputFormat.configureRecordFormat(sink)
      .recordDelimiter('\n')
      .fieldDelimiter(',')
      .field(IntValue.class, 0)
      .field(IntValue.class, 1);
View Full Code Here

      .build();
    // sets the group sorting to the second field
    sorter.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));

    // create and configure the output format
    FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, sorter, "Sorted Output");
    CsvOutputFormat.configureRecordFormat(out)
      .recordDelimiter('\n')
      .fieldDelimiter(' ')
      .field(IntValue.class, 0)
      .field(IntValue.class, 1);
View Full Code Here

   
    iteration.setNextWorkset(updateComponentId);
    iteration.setSolutionSetDelta(updateComponentId);

    // sink is the iteration result
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result");
    CsvOutputFormat.configureRecordFormat(result)
      .recordDelimiter('\n')
      .fieldDelimiter(' ')
      .field(LongValue.class, 0)
      .field(LongValue.class, 1);
View Full Code Here

    iter.setNextPartialSolution(recomputeClusterCenter);

    // ---------------------- End K-Means Loop ---------------------
   
    // create DataSinkContract for writing the new cluster positions
    FileDataSink newClusterPoints = new FileDataSink(new PointOutFormat(), output, iter, "New Center Positions");

    Plan plan = new Plan(newClusterPoints, "K-Means");
    plan.setDefaultParallelism(degreeOfParallelism);
    return plan;
  }
View Full Code Here

    final FileDataSource source =
        new FileDataSource(new TeraInputFormat(), input, "Data Source");
    source.setDegreeOfParallelism(numSubTasks);

    // This task writes the sorted data back to disk
    final FileDataSink sink =
        new FileDataSink(new TeraOutputFormat(), output, "Data Sink");
    sink.setDegreeOfParallelism(numSubTasks);
    sink.setGlobalOrder(new Ordering(0, TeraKey.class, Order.ASCENDING), new TeraDistribution());

    sink.setInput(source);

    return new Plan(sink, "TeraSort");
  }
View Full Code Here

   
    ReduceOperator reduce2 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce 2").build();
    reduce2.setDegreeOfParallelism(degOfPar * 2);
    reduce2.setInput(map2);
   
    FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, "Sink");
    sink.setDegreeOfParallelism(degOfPar * 2);
    sink.setInput(reduce2);
   
    Plan plan = new Plan(sink, "Test Increasing Degree Of Parallelism");
   
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.operators.FileDataSink

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.