Examples of org.apache.flink.api.java.ExecutionEnvironment.readCsvFile()

Class org.apache.flink.api.java.ExecutionEnvironment

Examples of org.apache.flink.api.java.ExecutionEnvironment.readCsvFile()

org.apache.flink.api.java.ExecutionEnvironment.readCsvFile()
Creates a CSV reader to read a comma separated value (CSV) file. The reader has options to define parameters and field types and will eventually produce the DataSet that corresponds to the read and parsed CSV input. @param filePath The path of the CSV file. @return A CsvReader that can be used to configure the CSV input.

  private Plan getTestPlanRightStatic(String strategy) {
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
    
    DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");
    
    DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile");
    
    IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);

View Full Code Here

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
    
    DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");
    
    DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile");
    
    IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
    
    Configuration joinStrategy = new Configuration();
    joinStrategy.setString(PactCompiler.HINT_SHIP_STRATEGY, PactCompiler.HINT_SHIP_STRATEGY_REPARTITION_HASH);

View Full Code Here

  }


  @Test
  public void testCoGroupSolutionSet() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple1<Integer>> raw = env.readCsvFile(IN_FILE).types(Integer.class);


    DeltaIteration<Tuple1<Integer>, Tuple1<Integer>> iteration = raw.iterateDelta(raw, 1000, 0);


    DataSet<Tuple1<Integer>> test = iteration.getWorkset().map(new SimpleMap());
    DataSet<Tuple1<Integer>> delta = iteration.getSolutionSet().coGroup(test).where(0).equalTo(0).with(new SimpleCGroup());

View Full Code Here


  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


    DataSet<Tuple3<String, Integer, String>> input = env.readCsvFile(inputPath)
      .fieldDelimiter('|')
      .types(String.class, Integer.class, String.class);


    //output the data with AvroOutputFormat for specific user type
    DataSet<User> specificUser = input.map(new ConvertToUser());

View Full Code Here


    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(4);


    // get input data
    DataSet<Point> points = env.readCsvFile(pointsPath)
        .fieldDelimiter('|')
        .includeFields(true, true)
        .types(Double.class, Double.class)
        .map(new TuplePointConverter());

View Full Code Here

        .fieldDelimiter('|')
        .includeFields(true, true)
        .types(Double.class, Double.class)
        .map(new TuplePointConverter());


    DataSet<Centroid> centroids = env.readCsvFile(centersPath)
        .fieldDelimiter('|')
        .includeFields(true, true, true)
        .types(Integer.class, Double.class, Double.class)
        .map(new TupleCentroidConverter());

View Full Code Here

  protected void testProgram() throws Exception {
    // set up execution environment
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
    // read vertex and edge data
    DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
    
    DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(' ').types(Long.class, Long.class)
                        .flatMap(new UndirectEdge());
        
    // assign the initial components (equal to the vertex id)

View Full Code Here

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
    // read vertex and edge data
    DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);
    
    DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(' ').types(Long.class, Long.class)
                        .flatMap(new UndirectEdge());
        
    // assign the initial components (equal to the vertex id)
    DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

View Full Code Here

  public void testIterationPushingWorkOut() throws Exception {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
      
      DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);
      
      doBulkIteration(input1, input2).print();

View Full Code Here

      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(8);
      
      DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
      
      DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);
      
      doBulkIteration(input1, input2).print();
      
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.