Package org.apache.flink.api.java

Examples of org.apache.flink.api.java.ExecutionEnvironment


  private static class SumMinMaxProgs {

    public static String runProgram(int progId, String resultPath) throws Exception {
      switch(progId) {
        case 1: {
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, Long>> sumDs = ds
              .sum(0)
              .andMax(1)
              .project(0, 1).types(Integer.class, Long.class);

          sumDs.writeAsCsv(resultPath);
          env.execute();

          // return expected result
          return "231,6\n";
        }
        case 2: {
        /*
         * Grouped Aggregate
         */

          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
              .sum(0)
              .project(1, 0).types(Long.class, Integer.class);

          aggregateDs.writeAsCsv(resultPath);
          env.execute();

          // return expected result
          return "1,1\n" +
              "2,5\n" +
              "3,15\n" +
              "4,34\n" +
              "5,65\n" +
              "6,111\n";
        }
        case 3: {
        /*
         * Nested Aggregate
         */

          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
              .min(0)
              .min(0)
              .project(0).types(Integer.class);

          aggregateDs.writeAsCsv(resultPath);
          env.execute();

          // return expected result
          return "1\n";
        }
        default:
View Full Code Here


 
  private static class DependencyConnectedComponentsProgram {
   
    public static String runProgram(String resultPath) throws Exception {
     
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DOP);
     
      DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
      DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);
      int keyPosition = 0;
     
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
          initialSolutionSet.iterateDelta(initialSolutionSet, MAX_ITERATIONS, keyPosition);
     
      DataSet<Long> candidates = iteration.getWorkset().join(edges).where(0).equalTo(0)
          .with(new FindCandidatesJoin())
          .groupBy(new KeySelector<Long, Long>() {
                        public Long getKey(Long id) { return id; }
                      }).reduceGroup(new RemoveDuplicatesReduce());
     
      DataSet<Tuple2<Long, Long>> candidatesDependencies =
          candidates.join(edges)
          .where(new KeySelector<Long, Long>() {
                        public Long getKey(Long id) { return id; }
                      }).equalTo(new KeySelector<Tuple2<Long, Long>, Long>() {
                        public Long getKey(Tuple2<Long, Long> vertexWithId)
                        { return vertexWithId.f1; }
                      }).with(new FindCandidatesDependenciesJoin());
     
      DataSet<Tuple2<Long, Long>> verticesWithNewComponents =
          candidatesDependencies.join(iteration.getSolutionSet()).where(0).equalTo(0)
          .with(new NeighborWithComponentIDJoin())
          .groupBy(0).reduceGroup(new MinimumReduce());
     
      DataSet<Tuple2<Long, Long>> updatedComponentId =
          verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0)
          .flatMap(new MinimumIdFilter());
     
      iteration.closeWith(updatedComponentId, updatedComponentId).writeAsText(resultPath);
     
      env.execute();
     
      return resultPath;
    }
View Full Code Here

      case 1: {
        /*
         * Test non-passing flatmap
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
        DataSet<String> nonPassingFlatMapDs = ds.
            flatMap(new FlatMapFunction<String, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void flatMap(String value, Collector<String> out) throws Exception {
                if ( value.contains("bananas") ) {
                  out.collect(value);
                }
              }
            });
       
        nonPassingFlatMapDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "\n";
      }
      case 2: {
        /*
         * Test data duplicating flatmap
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
        DataSet<String> duplicatingFlatMapDs = ds.
            flatMap(new FlatMapFunction<String, String>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void flatMap(String value, Collector<String> out) throws Exception {
                  out.collect(value);
                  out.collect(value.toUpperCase());
              }
            });
       
        duplicatingFlatMapDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "Hi\n" + "HI\n" +
            "Hello\n" + "HELLO\n" +
            "Hello world\n" + "HELLO WORLD\n" +
            "Hello world, how are you?\n" + "HELLO WORLD, HOW ARE YOU?\n" +
            "I am fine.\n" + "I AM FINE.\n" +
            "Luke Skywalker\n" + "LUKE SKYWALKER\n" +
            "Random comment\n" + "RANDOM COMMENT\n" +
            "LOL\n" + "LOL\n";
      }
      case 3: {
        /*
         * Test flatmap with varying number of emitted tuples
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> varyingTuplesMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;

              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                final int numTuples = value.f0 % 3;
                for ( int i = 0; i < numTuples; i++ ) {
                  out.collect(value);
                }
              }
            });
       
        varyingTuplesMapDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return  "1,1,Hi\n" +
            "2,2,Hello\n" + "2,2,Hello\n" +
            "4,3,Hello world, how are you?\n" +
            "5,3,I am fine.\n" + "5,3,I am fine.\n" +
            "7,4,Comment#1\n" +
            "8,4,Comment#2\n" + "8,4,Comment#2\n" +
            "10,4,Comment#4\n" +
            "11,5,Comment#5\n" + "11,5,Comment#5\n" +
            "13,5,Comment#7\n" +
            "14,5,Comment#8\n" + "14,5,Comment#8\n" +
            "16,6,Comment#10\n" +
            "17,6,Comment#11\n" + "17,6,Comment#11\n" +
            "19,6,Comment#13\n" +
            "20,6,Comment#14\n" + "20,6,Comment#14\n";
      }
      case 4: {
        /*
         * Test type conversion flatmapper (Custom -> Tuple)
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> typeConversionFlatMapDs = ds.
            flatMap(new FlatMapFunction<CustomType, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;
              private final Tuple3<Integer, Long, String> outTuple =
                  new Tuple3<Integer, Long, String>();
             
              @Override
              public void flatMap(CustomType value, Collector<Tuple3<Integer, Long, String>> out)
                  throws Exception {
                outTuple.setField(value.myInt, 0);
                outTuple.setField(value.myLong, 1);
                outTuple.setField(value.myString, 2);
                out.collect(outTuple);
              }
            });
       
        typeConversionFlatMapDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return   "1,0,Hi\n" +
            "2,1,Hello\n" +
            "2,2,Hello world\n" +
            "3,3,Hello world, how are you?\n" +
            "3,4,I am fine.\n" +
            "3,5,Luke Skywalker\n" +
            "4,6,Comment#1\n" +
            "4,7,Comment#2\n" +
            "4,8,Comment#3\n" +
            "4,9,Comment#4\n" +
            "5,10,Comment#5\n" +
            "5,11,Comment#6\n" +
            "5,12,Comment#7\n" +
            "5,13,Comment#8\n" +
            "5,14,Comment#9\n" +
            "6,15,Comment#10\n" +
            "6,16,Comment#11\n" +
            "6,17,Comment#12\n" +
            "6,18,Comment#13\n" +
            "6,19,Comment#14\n" +
            "6,20,Comment#15\n";
      }
      case 5: {
        /*
         * Test type conversion flatmapper (Tuple -> Basic)
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<String> typeConversionFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
              private static final long serialVersionUID = 1L;
             
              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<String> out) throws Exception {
                out.collect(value.f2);
              }
            });
       
        typeConversionFlatMapDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "Hi\n" + "Hello\n" + "Hello world\n" +
            "Hello world, how are you?\n" +
            "I am fine.\n" + "Luke Skywalker\n" +
            "Comment#1\n" "Comment#2\n" +
            "Comment#3\n" "Comment#4\n" +
            "Comment#5\n" "Comment#6\n" +
            "Comment#7\n" + "Comment#8\n" +
            "Comment#9\n" "Comment#10\n" +
            "Comment#11\n" + "Comment#12\n" +
            "Comment#13\n" + "Comment#14\n" +
            "Comment#15\n";
      }
      case 6: {
        /*
         * Test flatmapper if UDF returns input object
         * multiple times and changes it in between
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> inputObjFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;
             
              @Override
              public void flatMap( Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                final int numTuples = value.f0 % 4;
                for ( int i = 0; i < numTuples; i++ ) {
                  value.setField(i, 0);
                  out.collect(value);
                }             
              }
            });
       
        inputObjFlatMapDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return  "0,1,Hi\n" +
            "0,2,Hello\n" + "1,2,Hello\n" +
            "0,2,Hello world\n" + "1,2,Hello world\n" + "2,2,Hello world\n" +
            "0,3,I am fine.\n" +
            "0,3,Luke Skywalker\n" + "1,3,Luke Skywalker\n" +
            "0,4,Comment#1\n" + "1,4,Comment#1\n" + "2,4,Comment#1\n" +
            "0,4,Comment#3\n" +
            "0,4,Comment#4\n" + "1,4,Comment#4\n" +
            "0,5,Comment#5\n" + "1,5,Comment#5\n" + "2,5,Comment#5\n" +
            "0,5,Comment#7\n" +
            "0,5,Comment#8\n" + "1,5,Comment#8\n" +
            "0,5,Comment#9\n" + "1,5,Comment#9\n" + "2,5,Comment#9\n" +
            "0,6,Comment#11\n" +
            "0,6,Comment#12\n" + "1,6,Comment#12\n" +
            "0,6,Comment#13\n" + "1,6,Comment#13\n" + "2,6,Comment#13\n" +
            "0,6,Comment#15\n";
      }
      case 7: {
        /*
         * Test flatmap with broadcast set
         */
         
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Integer> ints = CollectionDataSets.getIntegerDataSet(env);
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> bcFlatMapDs = ds.
            flatMap(new RichFlatMapFunction<Tuple3<Integer,Long,String>, Tuple3<Integer,Long,String>>() {
              private static final long serialVersionUID = 1L;
              private final Tuple3<Integer, Long, String> outTuple =
                  new Tuple3<Integer, Long, String>();
              private Integer f2Replace = 0;
             
              @Override
              public void open(Configuration config) {
                Collection<Integer> ints = this.getRuntimeContext().getBroadcastVariable("ints");
                int sum = 0;
                for(Integer i : ints) {
                  sum += i;
                }
                f2Replace = sum;
              }
             
              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                outTuple.setFields(f2Replace, value.f1, value.f2);
                out.collect(outTuple);
              }
            }).withBroadcastSet(ints, "ints");
        bcFlatMapDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return   "55,1,Hi\n" +
            "55,2,Hello\n" +
            "55,2,Hello world\n" +
View Full Code Here

   
    final int NUM_ITERS = 4;
    final double expectedFactor = (int) Math.pow(7, NUM_ITERS);
   
    // this is an artificial program, it does not compute anything sensical
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    @SuppressWarnings("unchecked")
    DataSet<Tuple2<Long, Double>> initialData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0), new Tuple2<Long, Double>(2L, 2.0),
                              new Tuple2<Long, Double>(3L, 3.0), new Tuple2<Long, Double>(4L, 4.0),
                              new Tuple2<Long, Double>(5L, 5.0), new Tuple2<Long, Double>(6L, 6.0));
   
    DataSet<Tuple2<Long, Double>> result = MultipleJoinsWithSolutionSetCompilerTest.constructPlan(initialData, NUM_ITERS);
   
    List<Tuple2<Long, Double>> resultCollector = new ArrayList<Tuple2<Long,Double>>();
    result.output(new LocalCollectionOutputFormat<Tuple2<Long,Double>>(resultCollector));
   
    env.execute();
   
    for (Tuple2<Long, Double> tuple : resultCollector) {
      Assert.assertEquals(expectedFactor * tuple.f0, tuple.f1.doubleValue(), 0.0);
    }
  }
View Full Code Here

      case 1: {
        /*
         * Reduce on tuples with key field selector
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new Tuple3Reduce("B-)"));
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,Hi\n" +
            "5,2,B-)\n" +
            "15,3,B-)\n" +
            "34,4,B-)\n" +
            "65,5,B-)\n" +
            "111,6,B-)\n";
      }
      case 2: {
        /*
         * Reduce on tuples with multiple key field selectors
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
            groupBy(4,0).reduce(new Tuple5Reduce());
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "3,9,0,P-),2\n" +
            "3,6,5,BCD,3\n" +
            "4,17,0,P-),1\n" +
            "4,17,0,P-),2\n" +
            "5,11,10,GHI,1\n" +
            "5,29,0,P-),2\n" +
            "5,25,0,P-),3\n";
      }
      case 3: {
        /*
         * Reduce on tuples with key extractor
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Long getKey(Tuple3<Integer, Long, String> in) {
                    return in.f1;
                  }
                }).reduce(new Tuple3Reduce("B-)"));
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,Hi\n" +
            "5,2,B-)\n" +
            "15,3,B-)\n" +
            "34,4,B-)\n" +
            "65,5,B-)\n" +
            "111,6,B-)\n";
       
      }
      case 4: {
        /*
         * Reduce on custom type with key extractor
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            groupBy(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).reduce(new CustomTypeReduce());
       
        reduceDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return "1,0,Hi\n" +
            "2,3,Hello!\n" +
            "3,12,Hello!\n" +
            "4,30,Hello!\n" +
            "5,60,Hello!\n" +
            "6,105,Hello!\n";
      }
      case 5: {
        /*
         * All-reduce for tuple
         */

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            reduce(new AllAddingTuple3Reduce());
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "231,91,Hello World\n";
      }
      case 6: {
        /*
         * All-reduce for custom types
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            reduce(new AllAddingCustomTypeReduce());
       
        reduceDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return "91,210,Hello!";
      }
      case 7: {
       
        /*
         * Reduce with broadcast set
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new BCTuple3Reduce()).withBroadcastSet(intDs, "ints");
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,Hi\n" +
            "5,2,55\n" +
            "15,3,55\n" +
            "34,4,55\n" +
            "65,5,55\n" +
            "111,6,55\n";
      }
      case 8: {
        /*
         * Reduce with UDF that returns the second input object (check mutable object handling)
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new InputReturningTuple3Reduce());
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,Hi\n" +
            "5,2,Hi again!\n" +
            "15,3,Hi again!\n" +
            "34,4,Hi again!\n" +
            "65,5,Hi again!\n" +
            "111,6,Hi again!\n";
      }
      case 9: {
        /*
         * Reduce with a Tuple-returning KeySelector
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> reduceDs = ds .
            groupBy(
                new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
                  private static final long serialVersionUID = 1L;
   
                  @Override
                  public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                    return new Tuple2<Integer, Long>(t.f0, t.f4);
                  }
                }).reduce(new Tuple5Reduce());
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "3,9,0,P-),2\n" +
            "3,6,5,BCD,3\n" +
            "4,17,0,P-),1\n" +
            "4,17,0,P-),2\n" +
            "5,11,10,GHI,1\n" +
            "5,29,0,P-),2\n" +
            "5,25,0,P-),3\n";
      }
      case 10: {
        /*
         * Case 2 with String-based field expression
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
            groupBy("f4","f0").reduce(new Tuple5Reduce());
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +
View Full Code Here

public class BulkIterationWithAllReducerITCase extends JavaProgramTestBase {

  @Override
  protected void testProgram() throws Exception {
   
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);
   
    IterativeDataSet<Integer> iteration = data.iterate(10);
   
    DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");
   
    final List<Integer> resultList = new ArrayList<Integer>();
    iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));
   
    env.execute();
   
    Assert.assertEquals(8, resultList.get(0).intValue());
  }
View Full Code Here

      case 0: {
        /*
         * Test hash partition by key field
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(1)
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 1: {
        /*
         * Test hash partition by key selector
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
              private static final long serialVersionUID = 1L;

              @Override
              public Long getKey(Tuple3<Integer, Long, String> value) throws Exception {
                return value.f1;
              }
             
            })
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 2: {
        /*
         * Test forced rebalancing
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // generate some number in parallel
        DataSet<Long> ds = env.generateSequence(1,3000);
        DataSet<Tuple2<Integer, Integer>> uniqLongs = ds
            // introduce some partition skew by filtering
            .filter(new FilterFunction<Long>() {
              private static final long serialVersionUID = 1L;

              @Override
              public boolean filter(Long value) throws Exception {
                if (value <= 780) {
                  return false;
                } else {
                  return true;
                }
              }
            })
            // rebalance
            .rebalance()
            // count values in each partition
            .map(new PartitionIndexMapper())
            .groupBy(0)
            .reduce(new ReduceFunction<Tuple2<Integer, Integer>>() {
              private static final long serialVersionUID = 1L;

              public Tuple2<Integer, Integer> reduce(Tuple2<Integer, Integer> v1, Tuple2<Integer, Integer> v2) {
                return new Tuple2<Integer, Integer>(v1.f0, v1.f1+v2.f1);
              }
            })
            // round counts to mitigate runtime scheduling effects (lazy split assignment)
            .map(new MapFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>>(){
              private static final long serialVersionUID = 1L;

              @Override
              public Tuple2<Integer, Integer> map(Tuple2<Integer, Integer> value) throws Exception {
                value.f1 = (value.f1 / 10);
                return value;
              }
             
            });
       
        uniqLongs.writeAsText(resultPath);
       
        env.execute();
       
        StringBuilder result = new StringBuilder();
        int numPerPartition = 2220 / env.getDegreeOfParallelism() / 10;
        for (int i = 0; i < env.getDegreeOfParallelism(); i++) {
          result.append('(').append(i).append(',').append(numPerPartition).append(")\n");
        }
        // return expected result
        return result.toString();
      }
      case 3: {
        /*
         * Test hash partition by key field and different DOP
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(3);
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(1).setParallelism(4)
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
       
        env.execute();
       
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 4: {
        /*
         * Test hash partition with key expression
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(3);
       
        DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash("nestedPojo.longNumber").setParallelism(4)
            .mapPartition(new UniqueNestedPojoLongMapper());
        uniqLongs.writeAsText(resultPath);
       
        env.execute();
       
        // return expected result
        return   "10000\n" +
            "20000\n" +
            "30000\n";
View Full Code Here

    compareResultsByLinesInMemory(DATAPOINTS + DATAPOINTS + DATAPOINTS + DATAPOINTS, resultPath);
  }

  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<Record> initialInput = env.readFile(new PointInFormat(), this.dataPath).setParallelism(1);
   
    IterativeDataSet<Record> iteration = initialInput.iterate(2);
   
    DataSet<Record> result = iteration.union(iteration).map(new IdentityMapper());
   
    iteration.closeWith(result).write(new PointOutFormat(), this.resultPath);
   
    env.execute();
  }
View Full Code Here

public class BroadcastVarInitializationITCase extends JavaProgramTestBase {
 
  @Override
  protected void testProgram() throws Exception {
   
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(4);
   
    DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);
   
    IterativeDataSet<Integer> iteration = data.iterate(10);
   
    DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");
   
    final List<Integer> resultList = new ArrayList<Integer>();
    iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));
   
    env.execute();
   
    Assert.assertEquals(8, resultList.get(0).intValue());
  }
View Full Code Here

       
        /*
         * CoGroup on tuples with key field selector
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<Integer, Integer>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5CoGroup());
       
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,0\n" +
            "2,6\n" +
            "3,24\n" +
            "4,60\n" +
            "5,120\n";
      }
      case 2: {
       
        /*
         * CoGroup on two custom type inputs with key extractors
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2).where(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).equalTo(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).with(new CustomTypeCoGroup());
       
        coGroupDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return "1,0,test\n" +
            "2,6,test\n" +
            "3,24,test\n" +
            "4,60,test\n" +
            "5,120,test\n" +
            "6,210,test\n";
      }
      case 3: {
       
        /*
         * check correctness of cogroup if UDF returns left input objects
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple3ReturnLeft());
       
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,Hi\n" +
            "2,2,Hello\n" +
            "3,2,Hello world\n" +
            "4,3,Hello world, how are you?\n" +
            "5,3,I am fine.\n";
       
      }
      case 4: {
       
        /*
         * check correctness of cogroup if UDF returns right input objects
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5ReturnRight());
       
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "3,4,3,Hallo Welt wie gehts?,2\n" +
            "3,5,4,ABC,2\n" +
            "3,6,5,BCD,3\n";
       
      }
      case 5: {
       
        /*
         * Reduce with broadcast set
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Integer, Integer>> coGroupDs = ds.coGroup(ds2).where(0).equalTo(0).with(new Tuple5CoGroupBC()).withBroadcastSet(intDs, "ints");
       
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,0,55\n" +
            "2,6,55\n" +
            "3,24,55\n" +
            "4,60,55\n" +
            "5,120,55\n";
      }
      case 6: {
       
        /*
         * CoGroup on a tuple input with key field selector and a custom type input with key extractor
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> coGroupDs = ds.coGroup(ds2).where(2).equalTo(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).with(new MixedCoGroup());
       
        coGroupDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "0,1,test\n" +
            "1,2,test\n" +
            "2,5,test\n" +
            "3,15,test\n" +
            "4,33,test\n" +
            "5,63,test\n" +
            "6,109,test\n" +
            "7,4,test\n" +
            "8,4,test\n" +
            "9,4,test\n" +
            "10,5,test\n" +
            "11,5,test\n" +
            "12,5,test\n" +
            "13,5,test\n" +
            "14,5,test\n";
           
      }
      case 7: {
       
        /*
         * CoGroup on a tuple input with key field selector and a custom type input with key extractor
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> coGroupDs = ds2.coGroup(ds).where(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).equalTo(2).with(new MixedCoGroup2());
       
        coGroupDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return "0,1,test\n" +
            "1,2,test\n" +
            "2,5,test\n" +
            "3,15,test\n" +
            "4,33,test\n" +
            "5,63,test\n" +
            "6,109,test\n" +
            "7,4,test\n" +
            "8,4,test\n" +
            "9,4,test\n" +
            "10,5,test\n" +
            "11,5,test\n" +
            "12,5,test\n" +
            "13,5,test\n" +
            "14,5,test\n";
       
      }
      case 8: {
        /*
         * CoGroup with multiple key fields
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
       
        DataSet<Tuple3<Integer, Long, String>> coGrouped = ds1.coGroup(ds2).
            where(0,4).equalTo(0,1).with(new Tuple5Tuple3CoGroup());
       
        coGrouped.writeAsCsv(resultPath);
        env.execute();
       
        return "1,1,Hallo\n" +
            "2,2,Hallo Welt\n" +
            "3,2,Hallo Welt wie gehts?\n" +
            "3,2,ABC\n" +
            "5,3,HIJ\n" +
            "5,3,IJK\n";
      }
      case 9: {
        /*
         * CoGroup with multiple key fields
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
       
        DataSet<Tuple3<Integer, Long, String>> coGrouped = ds1.coGroup(ds2).
            where(new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
              private static final long serialVersionUID = 1L;
             
              @Override
              public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                return new Tuple2<Integer, Long>(t.f0, t.f4);
              }
            }).
            equalTo(new KeySelector<Tuple3<Integer,Long,String>, Tuple2<Integer, Long>>() {
              private static final long serialVersionUID = 1L;
             
              @Override
              public Tuple2<Integer, Long> getKey(Tuple3<Integer,Long,String> t) {
                return new Tuple2<Integer, Long>(t.f0, t.f1);
              }
            }).with(new Tuple5Tuple3CoGroup());
       
        coGrouped.writeAsCsv(resultPath);
        env.execute();
       
        return "1,1,Hallo\n" +
            "2,2,Hallo Welt\n" +
            "3,2,Hallo Welt wie gehts?\n" +
            "3,2,ABC\n" +
            "5,3,HIJ\n" +
            "5,3,IJK\n";
      }
      case 10: {
        /*
         * CoGroup on two custom type inputs using expression keys
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2).where("myInt").equalTo("myInt").with(new CustomTypeCoGroup());
       
        coGroupDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return "1,0,test\n" +
            "2,6,test\n" +
            "3,24,test\n" +
            "4,60,test\n" +
            "5,120,test\n" +
            "6,210,test\n";
      }
      case 11: {
        /*
         * CoGroup on two custom type inputs using expression keys
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<POJO> ds = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2)
            .where("nestedPojo.longNumber").equalTo(6).with(new CoGroupFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>, CustomType>() {
            private static final long serialVersionUID = 1L;

            @Override
            public void coGroup(
                Iterable<POJO> first,
                Iterable<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> second,
                Collector<CustomType> out) throws Exception {
              for(POJO p : first) {
                for(Tuple7<Integer, String, Integer, Integer, Long, String, Long> t: second) {
                  Assert.assertTrue(p.nestedPojo.longNumber == t.f6);
                  out.collect(new CustomType(-1, p.nestedPojo.longNumber, "Flink"));
                }
              }
            }
        });
        coGroupDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "-1,20000,Flink\n" +
            "-1,10000,Flink\n" +
            "-1,30000,Flink\n";
      }
      case 12: {
        /*
         * CoGroup field-selector (expression keys) + key selector function
         * The key selector is unnecessary complicated (Tuple1) ;)
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<POJO> ds = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2)
            .where(new KeySelector<POJO, Tuple1<Long>>() {
              private static final long serialVersionUID = 1L;

              @Override
              public Tuple1<Long> getKey(POJO value)
                  throws Exception {
                return new Tuple1<Long>(value.nestedPojo.longNumber);
              }
            }).equalTo(6).with(new CoGroupFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>, CustomType>() {
              private static final long serialVersionUID = 1L;

            @Override
            public void coGroup(
                Iterable<POJO> first,
                Iterable<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> second,
                Collector<CustomType> out) throws Exception {
              for(POJO p : first) {
                for(Tuple7<Integer, String, Integer, Integer, Long, String, Long> t: second) {
                  Assert.assertTrue(p.nestedPojo.longNumber == t.f6);
                  out.collect(new CustomType(-1, p.nestedPojo.longNumber, "Flink"));
                }
              }
            }
        });
        coGroupDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "-1,20000,Flink\n" +
            "-1,10000,Flink\n" +
            "-1,30000,Flink\n";
      }
      case 13: {
        /*
         * CoGroup field-selector (expression keys) + key selector function
         * The key selector is simple here
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<POJO> ds = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<CustomType> coGroupDs = ds.coGroup(ds2)
            .where(new KeySelector<POJO, Long>() {
              private static final long serialVersionUID = 1L;

              @Override
              public Long getKey(POJO value)
                  throws Exception {
                return value.nestedPojo.longNumber;
              }
            }).equalTo(6).with(new CoGroupFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>, CustomType>() {
              private static final long serialVersionUID = 1L;

            @Override
            public void coGroup(
                Iterable<POJO> first,
                Iterable<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> second,
                Collector<CustomType> out) throws Exception {
              for(POJO p : first) {
                for(Tuple7<Integer, String, Integer, Integer, Long, String, Long> t: second) {
                  Assert.assertTrue(p.nestedPojo.longNumber == t.f6);
                  out.collect(new CustomType(-1, p.nestedPojo.longNumber, "Flink"));
                }
              }
            }
        });
        coGroupDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "-1,20000,Flink\n" +
            "-1,10000,Flink\n" +
            "-1,30000,Flink\n";
View Full Code Here

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.