Examples of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

  private static class SumMinMaxProgs {


    public static String runProgram(int progId, String resultPath) throws Exception {
      switch(progId) {
        case 1: {
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, Long>> sumDs = ds
              .sum(0)
              .andMax(1)
              .project(0, 1).types(Integer.class, Long.class);


          sumDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "231,6\n";
        }
        case 2: {
        /*
         * Grouped Aggregate
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
              .sum(0)
              .project(1, 0).types(Long.class, Integer.class);


          aggregateDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1\n" +
              "2,5\n" +
              "3,15\n" +
              "4,34\n" +
              "5,65\n" +
              "6,111\n";
        }
        case 3: {
        /*
         * Nested Aggregate
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
              .min(0)
              .min(0)
              .project(0).types(Integer.class);


          aggregateDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1\n";
        }
        default:

View Full Code Here

  
  private static class DependencyConnectedComponentsProgram {
    
    public static String runProgram(String resultPath) throws Exception {
      
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DOP);
      
      DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
      DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);
      int keyPosition = 0;
      
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
          initialSolutionSet.iterateDelta(initialSolutionSet, MAX_ITERATIONS, keyPosition);
      
      DataSet<Long> candidates = iteration.getWorkset().join(edges).where(0).equalTo(0)
          .with(new FindCandidatesJoin())
          .groupBy(new KeySelector<Long, Long>() { 
                        public Long getKey(Long id) { return id; } 
                      }).reduceGroup(new RemoveDuplicatesReduce());
      
      DataSet<Tuple2<Long, Long>> candidatesDependencies = 
          candidates.join(edges)
          .where(new KeySelector<Long, Long>() { 
                        public Long getKey(Long id) { return id; } 
                      }).equalTo(new KeySelector<Tuple2<Long, Long>, Long>() { 
                        public Long getKey(Tuple2<Long, Long> vertexWithId) 
                        { return vertexWithId.f1; } 
                      }).with(new FindCandidatesDependenciesJoin());
      
      DataSet<Tuple2<Long, Long>> verticesWithNewComponents = 
          candidatesDependencies.join(iteration.getSolutionSet()).where(0).equalTo(0)
          .with(new NeighborWithComponentIDJoin())
          .groupBy(0).reduceGroup(new MinimumReduce());
      
      DataSet<Tuple2<Long, Long>> updatedComponentId = 
          verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0)
          .flatMap(new MinimumIdFilter());
      
      iteration.closeWith(updatedComponentId, updatedComponentId).writeAsText(resultPath);
      
      env.execute();
      
      return resultPath;
    }

View Full Code Here

      case 1: {
        /*
         * Test non-passing flatmap
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
        DataSet<String> nonPassingFlatMapDs = ds.
            flatMap(new FlatMapFunction<String, String>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(String value, Collector<String> out) throws Exception {
                if ( value.contains("bananas") ) {
                  out.collect(value);
                }
              }
            });
        
        nonPassingFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "\n";
      }
      case 2: {
        /*
         * Test data duplicating flatmap
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
        DataSet<String> duplicatingFlatMapDs = ds.
            flatMap(new FlatMapFunction<String, String>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(String value, Collector<String> out) throws Exception {
                  out.collect(value);
                  out.collect(value.toUpperCase());
              }
            });
        
        duplicatingFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "Hi\n" + "HI\n" +
            "Hello\n" + "HELLO\n" +
            "Hello world\n" + "HELLO WORLD\n" +
            "Hello world, how are you?\n" + "HELLO WORLD, HOW ARE YOU?\n" +
            "I am fine.\n" + "I AM FINE.\n" +
            "Luke Skywalker\n" + "LUKE SKYWALKER\n" +
            "Random comment\n" + "RANDOM COMMENT\n" +
            "LOL\n" + "LOL\n";
      }
      case 3: {
        /*
         * Test flatmap with varying number of emitted tuples
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> varyingTuplesMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                final int numTuples = value.f0 % 3; 
                for ( int i = 0; i < numTuples; i++ ) {
                  out.collect(value);
                }
              }
            });
        
        varyingTuplesMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return  "1,1,Hi\n" +
            "2,2,Hello\n" + "2,2,Hello\n" +
            "4,3,Hello world, how are you?\n" +
            "5,3,I am fine.\n" + "5,3,I am fine.\n" +
            "7,4,Comment#1\n" +
            "8,4,Comment#2\n" + "8,4,Comment#2\n" + 
            "10,4,Comment#4\n" +
            "11,5,Comment#5\n" + "11,5,Comment#5\n" +
            "13,5,Comment#7\n" +
            "14,5,Comment#8\n" + "14,5,Comment#8\n" +
            "16,6,Comment#10\n" +
            "17,6,Comment#11\n" + "17,6,Comment#11\n" +
            "19,6,Comment#13\n" +
            "20,6,Comment#14\n" + "20,6,Comment#14\n";
      }
      case 4: {
        /*
         * Test type conversion flatmapper (Custom -> Tuple)
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> typeConversionFlatMapDs = ds.
            flatMap(new FlatMapFunction<CustomType, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;
              private final Tuple3<Integer, Long, String> outTuple = 
                  new Tuple3<Integer, Long, String>();
              
              @Override
              public void flatMap(CustomType value, Collector<Tuple3<Integer, Long, String>> out)
                  throws Exception {
                outTuple.setField(value.myInt, 0);
                outTuple.setField(value.myLong, 1);
                outTuple.setField(value.myString, 2);
                out.collect(outTuple);
              }
            });
        
        typeConversionFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return   "1,0,Hi\n" +
            "2,1,Hello\n" +
            "2,2,Hello world\n" +
            "3,3,Hello world, how are you?\n" +
            "3,4,I am fine.\n" +
            "3,5,Luke Skywalker\n" +
            "4,6,Comment#1\n" +
            "4,7,Comment#2\n" +
            "4,8,Comment#3\n" +
            "4,9,Comment#4\n" +
            "5,10,Comment#5\n" +
            "5,11,Comment#6\n" +
            "5,12,Comment#7\n" +
            "5,13,Comment#8\n" +
            "5,14,Comment#9\n" +
            "6,15,Comment#10\n" +
            "6,16,Comment#11\n" +
            "6,17,Comment#12\n" +
            "6,18,Comment#13\n" +
            "6,19,Comment#14\n" +
            "6,20,Comment#15\n";
      }
      case 5: {
        /*
         * Test type conversion flatmapper (Tuple -> Basic)
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<String> typeConversionFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
              private static final long serialVersionUID = 1L;
              
              @Override
              public void flatMap(Tuple3<Integer, Long, String> value, 
                  Collector<String> out) throws Exception {
                out.collect(value.f2);
              }
            });
        
        typeConversionFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "Hi\n" + "Hello\n" + "Hello world\n" +
            "Hello world, how are you?\n" +
            "I am fine.\n" + "Luke Skywalker\n" +
            "Comment#1\n" +  "Comment#2\n" +
            "Comment#3\n" +  "Comment#4\n" +
            "Comment#5\n" +  "Comment#6\n" +
            "Comment#7\n" + "Comment#8\n" +
            "Comment#9\n" +  "Comment#10\n" +
            "Comment#11\n" + "Comment#12\n" +
            "Comment#13\n" + "Comment#14\n" +
            "Comment#15\n";
      }
      case 6: {
        /*
         * Test flatmapper if UDF returns input object 
         * multiple times and changes it in between
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> inputObjFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;
              
              @Override
              public void flatMap( Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                final int numTuples = value.f0 % 4;
                for ( int i = 0; i < numTuples; i++ ) {
                  value.setField(i, 0);
                  out.collect(value);
                }              
              }
            });
        
        inputObjFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return  "0,1,Hi\n" +
            "0,2,Hello\n" + "1,2,Hello\n" +
            "0,2,Hello world\n" + "1,2,Hello world\n" + "2,2,Hello world\n" +
            "0,3,I am fine.\n" +
            "0,3,Luke Skywalker\n" + "1,3,Luke Skywalker\n" +
            "0,4,Comment#1\n" + "1,4,Comment#1\n" + "2,4,Comment#1\n" +
            "0,4,Comment#3\n" +
            "0,4,Comment#4\n" + "1,4,Comment#4\n" +
            "0,5,Comment#5\n" + "1,5,Comment#5\n" + "2,5,Comment#5\n" +
            "0,5,Comment#7\n" +
            "0,5,Comment#8\n" + "1,5,Comment#8\n" +
            "0,5,Comment#9\n" + "1,5,Comment#9\n" + "2,5,Comment#9\n" +
            "0,6,Comment#11\n" +
            "0,6,Comment#12\n" + "1,6,Comment#12\n" +
            "0,6,Comment#13\n" + "1,6,Comment#13\n" + "2,6,Comment#13\n" +
            "0,6,Comment#15\n";
      }
      case 7: {
        /*
         * Test flatmap with broadcast set 
         */
          
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> ints = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> bcFlatMapDs = ds.
            flatMap(new RichFlatMapFunction<Tuple3<Integer,Long,String>, Tuple3<Integer,Long,String>>() {
              private static final long serialVersionUID = 1L;
              private final Tuple3<Integer, Long, String> outTuple = 
                  new Tuple3<Integer, Long, String>();
              private Integer f2Replace = 0;
              
              @Override
              public void open(Configuration config) {
                Collection<Integer> ints = this.getRuntimeContext().getBroadcastVariable("ints");
                int sum = 0;
                for(Integer i : ints) {
                  sum += i;
                }
                f2Replace = sum;
              }
              
              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                outTuple.setFields(f2Replace, value.f1, value.f2);
                out.collect(outTuple);
              }
            }).withBroadcastSet(ints, "ints");
        bcFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return   "55,1,Hi\n" +
            "55,2,Hello\n" +
            "55,2,Hello world\n" +

View Full Code Here

    
    final int NUM_ITERS = 4;
    final double expectedFactor = (int) Math.pow(7, NUM_ITERS);
    
    // this is an artificial program, it does not compute anything sensical
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    @SuppressWarnings("unchecked")
    DataSet<Tuple2<Long, Double>> initialData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0), new Tuple2<Long, Double>(2L, 2.0),
                              new Tuple2<Long, Double>(3L, 3.0), new Tuple2<Long, Double>(4L, 4.0),
                              new Tuple2<Long, Double>(5L, 5.0), new Tuple2<Long, Double>(6L, 6.0));
    
    DataSet<Tuple2<Long, Double>> result = MultipleJoinsWithSolutionSetCompilerTest.constructPlan(initialData, NUM_ITERS);
    
    List<Tuple2<Long, Double>> resultCollector = new ArrayList<Tuple2<Long,Double>>();
    result.output(new LocalCollectionOutputFormat<Tuple2<Long,Double>>(resultCollector));
    
    env.execute();
    
    for (Tuple2<Long, Double> tuple : resultCollector) {
      Assert.assertEquals(expectedFactor * tuple.f0, tuple.f1.doubleValue(), 0.0);
    }
  }

View Full Code Here

      case 1: {
        /*
         * Reduce on tuples with key field selector
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new Tuple3Reduce("B-)"));
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,B-)\n" +
            "15,3,B-)\n" +
            "34,4,B-)\n" +
            "65,5,B-)\n" +
            "111,6,B-)\n";
      }
      case 2: {
        /*
         * Reduce on tuples with multiple key field selectors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
            groupBy(4,0).reduce(new Tuple5Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "3,9,0,P-),2\n" +
            "3,6,5,BCD,3\n" +
            "4,17,0,P-),1\n" +
            "4,17,0,P-),2\n" +
            "5,11,10,GHI,1\n" +
            "5,29,0,P-),2\n" +
            "5,25,0,P-),3\n";
      } 
      case 3: {
        /*
         * Reduce on tuples with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Long getKey(Tuple3<Integer, Long, String> in) {
                    return in.f1;
                  }
                }).reduce(new Tuple3Reduce("B-)"));
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,B-)\n" +
            "15,3,B-)\n" +
            "34,4,B-)\n" +
            "65,5,B-)\n" +
            "111,6,B-)\n";
        
      }
      case 4: {
        /*
         * Reduce on custom type with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            groupBy(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).reduce(new CustomTypeReduce());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,Hi\n" +
            "2,3,Hello!\n" +
            "3,12,Hello!\n" +
            "4,30,Hello!\n" +
            "5,60,Hello!\n" +
            "6,105,Hello!\n";
      }
      case 5: {
        /*
         * All-reduce for tuple
         */


        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            reduce(new AllAddingTuple3Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "231,91,Hello World\n";
      }
      case 6: {
        /*
         * All-reduce for custom types
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            reduce(new AllAddingCustomTypeReduce());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "91,210,Hello!";
      }
      case 7: {
        
        /*
         * Reduce with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new BCTuple3Reduce()).withBroadcastSet(intDs, "ints");
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,55\n" +
            "15,3,55\n" +
            "34,4,55\n" +
            "65,5,55\n" +
            "111,6,55\n";
      }
      case 8: {
        /*
         * Reduce with UDF that returns the second input object (check mutable object handling)
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new InputReturningTuple3Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,Hi again!\n" +
            "15,3,Hi again!\n" +
            "34,4,Hi again!\n" +
            "65,5,Hi again!\n" +
            "111,6,Hi again!\n";
      }
      case 9: {
        /*
         * Reduce with a Tuple-returning KeySelector 
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> reduceDs = ds .
            groupBy(
                new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
                  private static final long serialVersionUID = 1L;
    
                  @Override
                  public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                    return new Tuple2<Integer, Long>(t.f0, t.f4);
                  }
                }).reduce(new Tuple5Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "3,9,0,P-),2\n" +
            "3,6,5,BCD,3\n" +
            "4,17,0,P-),1\n" +
            "4,17,0,P-),2\n" +
            "5,11,10,GHI,1\n" +
            "5,29,0,P-),2\n" +
            "5,25,0,P-),3\n";
      }
      case 10: {
        /*
         * Case 2 with String-based field expression
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
            groupBy("f4","f0").reduce(new Tuple5Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +

View Full Code Here

public class BulkIterationWithAllReducerITCase extends JavaProgramTestBase {


  @Override
  protected void testProgram() throws Exception {
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);
    
    IterativeDataSet<Integer> iteration = data.iterate(10);
    
    DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");
    
    final List<Integer> resultList = new ArrayList<Integer>();
    iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));
    
    env.execute();
    
    Assert.assertEquals(8, resultList.get(0).intValue());
  }

View Full Code Here

      case 0: {
        /*
         * Test hash partition by key field
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(1)
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 1: {
        /*
         * Test hash partition by key selector
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
              private static final long serialVersionUID = 1L;


              @Override
              public Long getKey(Tuple3<Integer, Long, String> value) throws Exception {
                return value.f1;
              }
              
            })
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 2: {
        /*
         * Test forced rebalancing
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


        // generate some number in parallel
        DataSet<Long> ds = env.generateSequence(1,3000);
        DataSet<Tuple2<Integer, Integer>> uniqLongs = ds
            // introduce some partition skew by filtering
            .filter(new FilterFunction<Long>() {
              private static final long serialVersionUID = 1L;


              @Override
              public boolean filter(Long value) throws Exception {
                if (value <= 780) {
                  return false;
                } else {
                  return true;
                }
              }
            })
            // rebalance
            .rebalance()
            // count values in each partition
            .map(new PartitionIndexMapper())
            .groupBy(0)
            .reduce(new ReduceFunction<Tuple2<Integer, Integer>>() {
              private static final long serialVersionUID = 1L;


              public Tuple2<Integer, Integer> reduce(Tuple2<Integer, Integer> v1, Tuple2<Integer, Integer> v2) {
                return new Tuple2<Integer, Integer>(v1.f0, v1.f1+v2.f1);
              }
            })
            // round counts to mitigate runtime scheduling effects (lazy split assignment)
            .map(new MapFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>>(){
              private static final long serialVersionUID = 1L;


              @Override
              public Tuple2<Integer, Integer> map(Tuple2<Integer, Integer> value) throws Exception {
                value.f1 = (value.f1 / 10);
                return value;
              }
              
            });
        
        uniqLongs.writeAsText(resultPath);
        
        env.execute();
        
        StringBuilder result = new StringBuilder();
        int numPerPartition = 2220 / env.getDegreeOfParallelism() / 10;
        for (int i = 0; i < env.getDegreeOfParallelism(); i++) {
          result.append('(').append(i).append(',').append(numPerPartition).append(")\n");
        }
        // return expected result
        return result.toString();
      }
      case 3: {
        /*
         * Test hash partition by key field and different DOP
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(3);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(1).setParallelism(4)
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
        
        env.execute();
        
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 4: {
        /*
         * Test hash partition with key expression
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(3);
        
        DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash("nestedPojo.longNumber").setParallelism(4)
            .mapPartition(new UniqueNestedPojoLongMapper());
        uniqLongs.writeAsText(resultPath);
        
        env.execute();
        
        // return expected result
        return   "10000\n" +
            "20000\n" +
            "30000\n";

View Full Code Here

    compareResultsByLinesInMemory(DATAPOINTS + DATAPOINTS + DATAPOINTS + DATAPOINTS, resultPath);
  }


  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Record> initialInput = env.readFile(new PointInFormat(), this.dataPath).setParallelism(1);
    
    IterativeDataSet<Record> iteration = initialInput.iterate(2);
    
    DataSet<Record> result = iteration.union(iteration).map(new IdentityMapper());
    
    iteration.closeWith(result).write(new PointOutFormat(), this.resultPath);
    
    env.execute();
  }

View Full Code Here

public class BroadcastVarInitializationITCase extends JavaProgramTestBase {
  
  @Override
  protected void testProgram() throws Exception {
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(4);
    
    DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);
    
    IterativeDataSet<Integer> iteration = data.iterate(10);
    
    DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");
    
    final List<Integer> resultList = new ArrayList<Integer>();
    iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));
    
    env.execute();
    
    Assert.assertEquals(8, resultList.get(0).intValue());
  }

View Full Code Here

        
        /*
         * CoGroup on tuples with key field selector
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<Integer, Integer>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5CoGroup());
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,0\n" +
            "2,6\n" +
            "3,24\n" +
            "4,60\n" +
            "5,120\n";
      }
      case 2: {
        
        /*
         * CoGroup on two custom type inputs with key extractors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2).where(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).equalTo(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).with(new CustomTypeCoGroup());
        
        coGroupDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,test\n" +
            "2,6,test\n" +
            "3,24,test\n" +
            "4,60,test\n" +
            "5,120,test\n" +
            "6,210,test\n";
      }
      case 3: {
        
        /*
         * check correctness of cogroup if UDF returns left input objects
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple3ReturnLeft());
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "2,2,Hello\n" +
            "3,2,Hello world\n" +
            "4,3,Hello world, how are you?\n" +
            "5,3,I am fine.\n";
        
      }
      case 4: {
        
        /*
         * check correctness of cogroup if UDF returns right input objects
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5ReturnRight());
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "3,4,3,Hallo Welt wie gehts?,2\n" +
            "3,5,4,ABC,2\n" +
            "3,6,5,BCD,3\n";
        
      }
      case 5: {
        
        /*
         * Reduce with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Integer, Integer>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5CoGroupBC()).withBroadcastSet(intDs, "ints");
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,55\n" +
            "2,6,55\n" +
            "3,24,55\n" +
            "4,60,55\n" +
            "5,120,55\n";
      }
      case 6: {
        
        /*
         * CoGroup on a tuple input with key field selector and a custom type input with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> coGroupDs = ds.coGroup(ds2).where(2).equalTo(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).with(new MixedCoGroup());
        
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "0,1,test\n" +
            "1,2,test\n" +
            "2,5,test\n" +
            "3,15,test\n" +
            "4,33,test\n" +
            "5,63,test\n" +
            "6,109,test\n" +
            "7,4,test\n" + 
            "8,4,test\n" + 
            "9,4,test\n" + 
            "10,5,test\n" + 
            "11,5,test\n" + 
            "12,5,test\n" + 
            "13,5,test\n" +
            "14,5,test\n"; 
            
      }
      case 7: {
        
        /*
         * CoGroup on a tuple input with key field selector and a custom type input with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> coGroupDs = ds2.coGroup(ds).where(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).equalTo(2).with(new MixedCoGroup2());
        
        coGroupDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "0,1,test\n" +
            "1,2,test\n" +
            "2,5,test\n" +
            "3,15,test\n" +
            "4,33,test\n" +
            "5,63,test\n" +
            "6,109,test\n" +
            "7,4,test\n" + 
            "8,4,test\n" + 
            "9,4,test\n" + 
            "10,5,test\n" + 
            "11,5,test\n" + 
            "12,5,test\n" + 
            "13,5,test\n" +
            "14,5,test\n"; 
        
      }
      case 8: {
        /*
         * CoGroup with multiple key fields
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> coGrouped = ds1.coGroup(ds2).
            where(0,4).equalTo(0,1).with(new Tuple5Tuple3CoGroup());
        
        coGrouped.writeAsCsv(resultPath);
        env.execute();
        
        return "1,1,Hallo\n" +
            "2,2,Hallo Welt\n" +
            "3,2,Hallo Welt wie gehts?\n" +
            "3,2,ABC\n" +
            "5,3,HIJ\n" +
            "5,3,IJK\n";
      }
      case 9: {
        /*
         * CoGroup with multiple key fields
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> coGrouped = ds1.coGroup(ds2).
            where(new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
              private static final long serialVersionUID = 1L;
              
              @Override
              public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                return new Tuple2<Integer, Long>(t.f0, t.f4);
              }
            }).
            equalTo(new KeySelector<Tuple3<Integer,Long,String>, Tuple2<Integer, Long>>() {
              private static final long serialVersionUID = 1L;
              
              @Override
              public Tuple2<Integer, Long> getKey(Tuple3<Integer,Long,String> t) {
                return new Tuple2<Integer, Long>(t.f0, t.f1);
              }
            }).with(new Tuple5Tuple3CoGroup());
        
        coGrouped.writeAsCsv(resultPath);
        env.execute();
        
        return "1,1,Hallo\n" +
            "2,2,Hallo Welt\n" +
            "3,2,Hallo Welt wie gehts?\n" +
            "3,2,ABC\n" +
            "5,3,HIJ\n" +
            "5,3,IJK\n";
      }
      case 10: {
        /*
         * CoGroup on two custom type inputs using expression keys
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2).where("myInt").equalTo("myInt").with(new CustomTypeCoGroup());
        
        coGroupDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,test\n" +
            "2,6,test\n" +
            "3,24,test\n" +
            "4,60,test\n" +
            "5,120,test\n" +
            "6,210,test\n";
      }
      case 11: {
        /*
         * CoGroup on two custom type inputs using expression keys
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2)
            .where("nestedPojo.longNumber").equalTo(6).with(new CoGroupFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>, CustomType>() {
            private static final long serialVersionUID = 1L;


            @Override
            public void coGroup(
                Iterable<POJO> first,
                Iterable<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> second,
                Collector<CustomType> out) throws Exception {
              for(POJO p : first) {
                for(Tuple7<Integer, String, Integer, Integer, Long, String, Long> t: second) {
                  Assert.assertTrue(p.nestedPojo.longNumber == t.f6);
                  out.collect(new CustomType(-1, p.nestedPojo.longNumber, "Flink"));
                }
              }
            }
        });
        coGroupDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "-1,20000,Flink\n" +
            "-1,10000,Flink\n" +
            "-1,30000,Flink\n";
      }
      case 12: {
        /*
         * CoGroup field-selector (expression keys) + key selector function
         * The key selector is unnecessary complicated (Tuple1) ;)
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2)
            .where(new KeySelector<POJO, Tuple1<Long>>() {
              private static final long serialVersionUID = 1L;


              @Override
              public Tuple1<Long> getKey(POJO value)
                  throws Exception {
                return new Tuple1<Long>(value.nestedPojo.longNumber);
              }
            }).equalTo(6).with(new CoGroupFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>, CustomType>() {
              private static final long serialVersionUID = 1L;


            @Override
            public void coGroup(
                Iterable<POJO> first,
                Iterable<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> second,
                Collector<CustomType> out) throws Exception {
              for(POJO p : first) {
                for(Tuple7<Integer, String, Integer, Integer, Long, String, Long> t: second) {
                  Assert.assertTrue(p.nestedPojo.longNumber == t.f6);
                  out.collect(new CustomType(-1, p.nestedPojo.longNumber, "Flink"));
                }
              }
            }
        });
        coGroupDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "-1,20000,Flink\n" +
            "-1,10000,Flink\n" +
            "-1,30000,Flink\n";
      }
      case 13: {
        /*
         * CoGroup field-selector (expression keys) + key selector function
         * The key selector is simple here
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2)
            .where(new KeySelector<POJO, Long>() {
              private static final long serialVersionUID = 1L;


              @Override
              public Long getKey(POJO value)
                  throws Exception {
                return value.nestedPojo.longNumber;
              }
            }).equalTo(6).with(new CoGroupFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>, CustomType>() {
              private static final long serialVersionUID = 1L;


            @Override
            public void coGroup(
                Iterable<POJO> first,
                Iterable<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> second,
                Collector<CustomType> out) throws Exception {
              for(POJO p : first) {
                for(Tuple7<Integer, String, Integer, Integer, Long, String, Long> t: second) {
                  Assert.assertTrue(p.nestedPojo.longNumber == t.f6);
                  out.collect(new CustomType(-1, p.nestedPojo.longNumber, "Flink"));
                }
              }
            }
        });
        coGroupDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "-1,20000,Flink\n" +
            "-1,10000,Flink\n" +
            "-1,30000,Flink\n";

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.avro.AvroOutputFormatTest

org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry

org.apache.flink.api.common.operators.CollectionExecutionIterationTest

org.apache.flink.api.common.operators.CollectionExecutionWithBroadcastVariableTest

org.apache.flink.api.java.functions.SemanticPropertiesProjectionTest

org.apache.flink.api.java.functions.SemanticPropertiesTranslationTest

org.apache.flink.api.java.io.AvroInputFormatTypeExtractionTest

org.apache.flink.api.java.io.CsvReader

org.apache.flink.api.java.io.jdbc.example.JDBCExample

org.apache.flink.api.java.io.TextInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.