Examples of ExecutionEnvironment

com.intellij.execution.runners.ExecutionEnvironment
eu.stratosphere.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment
org.apache.flink.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment
org.hibernate.junit.functional.ExecutionEnvironment
{@inheritDoc} @author Steve Ebersole
org.hibernate.testing.junit.functional.ExecutionEnvironment
{@inheritDoc} @author Steve Ebersole

Examples of org.apache.flink.api.java.ExecutionEnvironment

        
        /*
         * UDF Join on tuples with key field positions
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
            .where(1)
            .equalTo(1)
            .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      case 2: {
        
        /*
         * UDF Join on tuples with multiple key field positions
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
               .where(0,1)
               .equalTo(0,4)
               .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt wie gehts?\n" +
            "Hello world,ABC\n" +
            "I am fine.,HIJ\n" +
            "I am fine.,IJK\n";
        
      }
      case 3: {
        
        /*
         * Default Join on tuples
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<Tuple3<Integer, Long, String>,Tuple5<Integer, Long, Integer, String, Long>>> joinDs = 
            ds1.join(ds2)
               .where(0)
               .equalTo(2);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "(1,1,Hi),(2,2,1,Hallo Welt,2)\n" +
            "(2,2,Hello),(2,3,2,Hallo Welt wie,1)\n" +
            "(3,2,Hello world),(3,4,3,Hallo Welt wie gehts?,2)\n";
      
      }
      case 4: {
        
        /*
         * Join with Huge
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = ds1.joinWithHuge(ds2)
                              .where(1)
                              .equalTo(1)
                              .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      case 5: {
        
        /*
         * Join with Tiny
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.joinWithTiny(ds2)
               .where(1)
               .equalTo(1)
               .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt\n";
        
      }
      
      case 6: {
        
        /*
         * Join that returns the left input object
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .with(new LeftReturningJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "2,2,Hello\n" +
            "3,2,Hello world\n";
      }
      case 7: {
        
        /*
         * Join that returns the right input object
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .with(new RightReturningJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "2,2,1,Hallo Welt,2\n";
      }
      case 8: {
        
        /*
         * Join with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
        DataSet<Tuple3<String, String, Integer>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(4)
               .with(new T3T5BCJoin())
               .withBroadcastSet(intDs, "ints");
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo,55\n" +
            "Hi,Hallo Welt wie,55\n" +
            "Hello,Hallo Welt,55\n" +
            "Hello world,Hallo Welt,55\n";
      }
      case 9: {
      
        /*
         * Join on a tuple input with key field selector and a custom type input with key extractor
         */


        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


        DataSet<CustomType> ds1 = CollectionDataSets.getSmallCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs =
            ds1.join(ds2)
               .where(new KeySelector<CustomType, Integer>() {
                    @Override
                    public Integer getKey(CustomType value) {
                      return value.myInt;
                    }
                  }
               )
               .equalTo(0)
               .with(new CustT3Join());


        joinDs.writeAsCsv(resultPath);
        env.execute();


        // return expected result
        return "Hi,Hi\n" +
            "Hello,Hello\n" +
            "Hello world,Hello\n";


        }
      case 10: {
        
        /*
         * Project join on a tuple input 1
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .projectFirst(2,1)
               .projectSecond(3)
               .projectFirst(0)
               .projectSecond(4,1)
               .types(String.class, Long.class, String.class, Integer.class, Long.class, Long.class);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,1,Hallo,1,1,1\n" +
            "Hello,2,Hallo Welt,2,2,2\n" +
            "Hello world,2,Hallo Welt,3,2,2\n";
        
      }
      case 11: {
        
        /*
         * Project join on a tuple input 2
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple6<String, String, Long, Long, Long, Integer>> joinDs = 
            ds1.join(ds2)
               .where(1)
               .equalTo(1)
               .projectSecond(3)
               .projectFirst(2,1)
               .projectSecond(4,1)
               .projectFirst(0)
               .types(String.class, String.class, Long.class, Long.class, Long.class, Integer.class);
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hallo,Hi,1,1,1,1\n" +
            "Hallo Welt,Hello,2,2,2,2\n" +
            "Hallo Welt,Hello world,2,2,2,3\n";
      }
        
      case 12: {
        
        /*
         * Join on a tuple input with key field selector and a custom type input with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
               .where(1).equalTo(new KeySelector<CustomType, Long>() {
                     @Override
                     public Long getKey(CustomType value) {
                       return value.myLong;
                     }
                   })
               .with(new T3CustJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hello\n" +
            "Hello,Hello world\n" +
            "Hello world,Hello world\n";
            
      }
      
      case 13: {
        
        /*
         * (Default) Join on two custom type inputs with key extractors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds1 = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> ds2 = CollectionDataSets.getSmallCustomTypeDataSet(env);
        
        DataSet<Tuple2<CustomType, CustomType>> joinDs = 
          ds1.join(ds2)
             .where(
                 new KeySelector<CustomType, Integer>() {
                   @Override
                   public Integer getKey(CustomType value) {
                     return value.myInt;
                   }
                 }
                )
            .equalTo(
                new KeySelector<CustomType, Integer>() {
                     @Override
                     public Integer getKey(CustomType value) {
                       return value.myInt;
                     }
                   }
                );
                                        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,Hi,1,0,Hi\n" +
            "2,1,Hello,2,1,Hello\n" +
            "2,1,Hello,2,2,Hello world\n" +
            "2,2,Hello world,2,1,Hello\n" +
            "2,2,Hello world,2,2,Hello world\n";
  
      }
      case 14: {
        /*
         * UDF Join on tuples with tuple-returning key selectors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<String, String>> joinDs = 
            ds1.join(ds2)
               .where(new KeySelector<Tuple3<Integer,Long,String>, Tuple2<Integer, Long>>() {
                private static final long serialVersionUID = 1L;
                
                @Override
                public Tuple2<Integer, Long> getKey(Tuple3<Integer,Long,String> t) {
                  return new Tuple2<Integer, Long>(t.f0, t.f1);
                }
              })
               .equalTo(new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
                private static final long serialVersionUID = 1L;
                
                @Override
                public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                  return new Tuple2<Integer, Long>(t.f0, t.f4);
                }
              })
               .with(new T3T5FlatJoin());
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "Hi,Hallo\n" +
            "Hello,Hallo Welt\n" +
            "Hello world,Hallo Welt wie gehts?\n" +
            "Hello world,ABC\n" +
            "I am fine.,HIJ\n" +
            "I am fine.,IJK\n";
      }
      /**
       *  Joins with POJOs
       */
      case 15: {
        /*
         * Join nested pojo against tuple (selected using a string)
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedPojo.longNumber").equalTo("f6");
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
      }
      
      case 16: {
        /*
         * Join nested pojo against tuple (selected as an integer)
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedPojo.longNumber").equalTo(6); // <--- difference!
        
        joinDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
      }
      case 17: {
        /*
         * selecting multiple fields using expression language
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedPojo.longNumber", "number", "str").equalTo("f6","f0","f1");
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
        
      }
      case 18: {
        /*
         * nested into tuple
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedPojo.longNumber", "number","nestedTupleWithCustom.f0").equalTo("f6","f0","f2");
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
        
      }
      case 19: {
        /*
         * nested into tuple into pojo
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
        DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long> >> joinDs = 
            ds1.join(ds2).where("nestedTupleWithCustom.f0","nestedTupleWithCustom.f1.myInt","nestedTupleWithCustom.f1.myLong").equalTo("f2","f3","f4");
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
             "2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
             "3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";
        
      }
      case 20: {
        /*
         * Non-POJO test to verify that full-tuple keys are working.
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds1 = CollectionDataSets.getSmallNestedTupleDataSet(env);
        DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds2 = CollectionDataSets.getSmallNestedTupleDataSet(env);
        DataSet<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String> >> joinDs = 
            ds1.join(ds2).where(0).equalTo("f0.f0", "f0.f1"); // key is now Tuple2<Integer, Integer>
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "((1,1),one),((1,1),one)\n" +
             "((2,2),two),((2,2),two)\n" +
             "((3,3),three),((3,3),three)\n";
        
      }
      case 21: {
        /*
         * Non-POJO test to verify "nested" tuple-element selection.
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds1 = CollectionDataSets.getSmallNestedTupleDataSet(env);
        DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds2 = CollectionDataSets.getSmallNestedTupleDataSet(env);
        DataSet<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String> >> joinDs = 
            ds1.join(ds2).where("f0.f0").equalTo("f0.f0"); // key is now Integer from Tuple2<Integer, Integer>
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "((1,1),one),((1,1),one)\n" +
             "((2,2),two),((2,2),two)\n" +
             "((3,3),three),((3,3),three)\n";
        
      }
      case 22: {
        /*
         * full pojo with full tuple
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
        DataSet<Tuple7<Long, Integer, Integer, Long, String, Integer, String>> ds2 = CollectionDataSets.getSmallTuplebasedDataSetMatchingPojo(env);
        DataSet<Tuple2<POJO, Tuple7<Long, Integer, Integer, Long, String, Integer, String> >> joinDs = 
            ds1.join(ds2).where("*").equalTo("*");
        
        joinDs.writeAsCsv(resultPath);
        env.setDegreeOfParallelism(1);
        env.execute();
        
        // return expected result
        return "1 First (10,100,1000,One) 10000,(10000,10,100,1000,One,1,First)\n"+
            "2 Second (20,200,2000,Two) 20000,(20000,20,200,2000,Two,2,Second)\n"+
            "3 Third (30,300,3000,Three) 30000,(30000,30,300,3000,Three,3,Third)\n";

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

        
        /*
         * check correctness of groupReduce on tuples with key field selector
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, Long>> reduceDs = ds.
              groupBy(1).reduceGroup(new Tuple3GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1\n" +
              "5,2\n" +
              "15,3\n" +
              "34,4\n" +
              "65,5\n" +
              "111,6\n";
        }
        case 2: {
        
        /*
         * check correctness of groupReduce on tuples with multiple key field selector
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
          DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
              groupBy(4, 0).reduceGroup(new Tuple5GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,0,P-),1\n" +
              "2,3,0,P-),1\n" +
              "2,2,0,P-),2\n" +
              "3,9,0,P-),2\n" +
              "3,6,0,P-),3\n" +
              "4,17,0,P-),1\n" +
              "4,17,0,P-),2\n" +
              "5,11,0,P-),1\n" +
              "5,29,0,P-),2\n" +
              "5,25,0,P-),3\n";
        }
        case 3: {
        
        /*
         * check correctness of groupReduce on tuples with key field selector and group sorting
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).sortGroup(2, Order.ASCENDING).reduceGroup(new Tuple3SortedGroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,Hi\n" +
              "5,2,Hello-Hello world\n" +
              "15,3,Hello world, how are you?-I am fine.-Luke Skywalker\n" +
              "34,4,Comment#1-Comment#2-Comment#3-Comment#4\n" +
              "65,5,Comment#5-Comment#6-Comment#7-Comment#8-Comment#9\n" +
              "111,6,Comment#10-Comment#11-Comment#12-Comment#13-Comment#14-Comment#15\n";


        }
        case 4: {
        /*
         * check correctness of groupReduce on tuples with key extractor
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, Long>> reduceDs = ds.
              groupBy(new KeySelector<Tuple3<Integer, Long, String>, Long>() {
                private static final long serialVersionUID = 1L;


                @Override
                public Long getKey(Tuple3<Integer, Long, String> in) {
                  return in.f1;
                }
              }).reduceGroup(new Tuple3GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1\n" +
              "5,2\n" +
              "15,3\n" +
              "34,4\n" +
              "65,5\n" +
              "111,6\n";


        }
        case 5: {
        
        /*
         * check correctness of groupReduce on custom type with type extractor
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
          DataSet<CustomType> reduceDs = ds.
              groupBy(new KeySelector<CustomType, Integer>() {
                private static final long serialVersionUID = 1L;


                @Override
                public Integer getKey(CustomType in) {
                  return in.myInt;
                }
              }).reduceGroup(new CustomTypeGroupReduce());


          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          return "1,0,Hello!\n" +
              "2,3,Hello!\n" +
              "3,12,Hello!\n" +
              "4,30,Hello!\n" +
              "5,60,Hello!\n" +
              "6,105,Hello!\n";
        }
        case 6: {
        
        /*
         * check correctness of all-groupreduce for tuples
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "231,91,Hello World\n";
        }
        case 7: {
        /*
         * check correctness of all-groupreduce for custom types
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
          DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());


          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          return "91,210,Hello!";
        }
        case 8: {
        
        /*
         * check correctness of groupReduce with broadcast set
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).reduceGroup(new BCTuple3GroupReduce()).withBroadcastSet(intDs, "ints");


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,55\n" +
              "5,2,55\n" +
              "15,3,55\n" +
              "34,4,55\n" +
              "65,5,55\n" +
              "111,6,55\n";
        }
        case 9: {
        
        /*
         * check correctness of groupReduce if UDF returns input objects multiple times and changes it in between
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).reduceGroup(new InputReturningTuple3GroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "11,1,Hi!\n" +
              "21,1,Hi again!\n" +
              "12,2,Hi!\n" +
              "22,2,Hi again!\n" +
              "13,2,Hi!\n" +
              "23,2,Hi again!\n";
        }
        case 10: {
        
        /*
         * check correctness of groupReduce on custom type with key extractor and combine
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
          DataSet<CustomType> reduceDs = ds.
              groupBy(new KeySelector<CustomType, Integer>() {
                private static final long serialVersionUID = 1L;


                @Override
                public Integer getKey(CustomType in) {
                  return in.myInt;
                }
              }).reduceGroup(new CustomTypeGroupReduceWithCombine());


          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          if (collectionExecution) {
            return null;


          } else {
            return "1,0,test1\n" +
                "2,3,test2\n" +
                "3,12,test3\n" +
                "4,30,test4\n" +
                "5,60,test5\n" +
                "6,105,test6\n";
          }
        }
        case 11: {
        
        /*
         * check correctness of groupReduce on tuples with combine
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(2); // important because it determines how often the combiner is called


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, String>> reduceDs = ds.
              groupBy(1).reduceGroup(new Tuple3GroupReduceWithCombine());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          if (collectionExecution) {
            return null;


          } else {
            return "1,test1\n" +
                "5,test2\n" +
                "15,test3\n" +
                "34,test4\n" +
                "65,test5\n" +
                "111,test6\n";
          }
        }
        // all-groupreduce with combine
        case 12: {
        
        /*
         * check correctness of all-groupreduce for tuples with combine
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
              .map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4);


          Configuration cfg = new Configuration();
          cfg.setString(PactCompiler.HINT_SHIP_STRATEGY, PactCompiler.HINT_SHIP_STRATEGY_REPARTITION);
          DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
              .withParameters(cfg);


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          if (collectionExecution) {
            return null;
          } else {
            return "322,testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";
          }
        }
        case 13: {
        
        /*
         * check correctness of groupReduce with descending group sort
         */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).sortGroup(2, Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,Hi\n" +
              "5,2,Hello world-Hello\n" +
              "15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
              "34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
              "65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
              "111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";


        }
        case 14: {
          /*
           * check correctness of groupReduce on tuples with tuple-returning key selector
           */


            final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


            DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
            DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
                groupBy(
                    new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
                      private static final long serialVersionUID = 1L;
        
                      @Override
                      public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                        return new Tuple2<Integer, Long>(t.f0, t.f4);
                      }
                    }).reduceGroup(new Tuple5GroupReduce());


            reduceDs.writeAsCsv(resultPath);
            env.execute();


            // return expected result
            return "1,1,0,P-),1\n" +
                "2,3,0,P-),1\n" +
                "2,2,0,P-),2\n" +
                "3,9,0,P-),2\n" +
                "3,6,0,P-),3\n" +
                "4,17,0,P-),1\n" +
                "4,17,0,P-),2\n" +
                "5,11,0,P-),1\n" +
                "5,29,0,P-),2\n" +
                "5,25,0,P-),3\n";
        }
        case 15: {
          /*
           * check that input of combiner is also sorted for combinable groupReduce with group sorting
           */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).sortGroup(0, Order.ASCENDING).reduceGroup(new OrderCheckingCombinableReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,Hi\n" +
              "2,2,Hello\n" +
              "4,3,Hello world, how are you?\n" +
              "7,4,Comment#1\n" +
              "11,5,Comment#5\n" +
              "16,6,Comment#10\n";
          
        }
        case 16: {
          /*
           * Deep nesting test
           * + null value in pojo
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          
          DataSet<CrazyNested> ds = CollectionDataSets.getCrazyNestedDataSet(env);
          DataSet<Tuple2<String, Integer>> reduceDs = ds.groupBy("nest_Lvl1.nest_Lvl2.nest_Lvl3.nest_Lvl4.f1nal")
              .reduceGroup(new GroupReduceFunction<CollectionDataSets.CrazyNested, Tuple2<String, Integer>>() {
                private static final long serialVersionUID = 1L;


                @Override
                public void reduce(Iterable<CrazyNested> values,
                    Collector<Tuple2<String, Integer>> out)
                    throws Exception {
                  int c = 0; String n = null;
                  for(CrazyNested v : values) {
                    c++; // haha
                    n = v.nest_Lvl1.nest_Lvl2.nest_Lvl3.nest_Lvl4.f1nal;
                  }
                  out.collect(new Tuple2<String, Integer>(n,c));
                }});
          
          reduceDs.writeAsCsv(resultPath);
          env.execute();
          
          // return expected result
          return "aa,1\nbb,2\ncc,3\n";
        } 
        case 17: {
          /*
           * Test Pojo extending from tuple WITH custom fields
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          
          DataSet<FromTupleWithCTor> ds = CollectionDataSets.getPojoExtendingFromTuple(env);
          DataSet<Integer> reduceDs = ds.groupBy("special", "f2")
              .reduceGroup(new GroupReduceFunction<FromTupleWithCTor, Integer>() {
                private static final long serialVersionUID = 1L;
                @Override
                public void reduce(Iterable<FromTupleWithCTor> values,
                    Collector<Integer> out)
                    throws Exception {
                  int c = 0;
                  for(FromTuple v : values) {
                    c++;
                  }
                  out.collect(c);
                }});
          
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "3\n2\n";
        } 
        case 18: {
          /*
           * Test Pojo containing a Writable and Tuples
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          
          DataSet<PojoContainingTupleAndWritable> ds = CollectionDataSets.getPojoContainingTupleAndWritable(env);
          DataSet<Integer> reduceDs = ds.groupBy("hadoopFan", "theTuple.*") // full tuple selection
              .reduceGroup(new GroupReduceFunction<PojoContainingTupleAndWritable, Integer>() {
                private static final long serialVersionUID = 1L;
                @Override
                public void reduce(Iterable<PojoContainingTupleAndWritable> values,
                    Collector<Integer> out)
                    throws Exception {
                  int c = 0;
                  for(PojoContainingTupleAndWritable v : values) {
                    c++;
                  }
                  out.collect(c);
                }});
          
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "1\n5\n";
        } 
        case 19: {
          /*
           * Test Tuple containing pojos and regular fields
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          
          DataSet<Tuple3<Integer,CrazyNested, POJO>> ds = CollectionDataSets.getTupleContainingPojos(env);
          DataSet<Integer> reduceDs = ds.groupBy("f0", "f1.*") // nested full tuple selection
              .reduceGroup(new GroupReduceFunction<Tuple3<Integer,CrazyNested, POJO>, Integer>() {
                private static final long serialVersionUID = 1L;
                @Override
                public void reduce(Iterable<Tuple3<Integer,CrazyNested, POJO>> values,
                    Collector<Integer> out)
                    throws Exception {
                  int c = 0;
                  for(Tuple3<Integer,CrazyNested, POJO> v : values) {
                    c++;
                  }
                  out.collect(c);
                }});
          
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "3\n1\n";
        }
        case 20: {
          /*
           * Test string-based definition on group sort, based on test:
           * check correctness of groupReduce with descending group sort
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
              groupBy(1).sortGroup("f2", Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce());


          reduceDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1,Hi\n" +
              "5,2,Hello world-Hello\n" +
              "15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
              "34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
              "65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
              "111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";


        }
        case 21: {
          /*
           * Test int-based definition on group sort, for (full) nested Tuple
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
          DataSet<String> reduceDs = ds.groupBy("f1").sortGroup(0, Order.DESCENDING).reduceGroup(new NestedTupleReducer());
          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          return "a--(2,1)-(1,3)-(1,2)-\n" +
              "b--(2,2)-\n"+
              "c--(4,9)-(3,6)-(3,3)-\n";
        }
        case 22: {
          /*
           * Test int-based definition on group sort, for (partial) nested Tuple ASC
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("f1")
              .sortGroup("f0.f0", Order.ASCENDING)
              .sortGroup("f0.f1", Order.ASCENDING)
              .reduceGroup(new NestedTupleReducer());
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "a--(1,2)-(1,3)-(2,1)-\n" +
              "b--(2,2)-\n"+
              "c--(3,3)-(3,6)-(4,9)-\n";
        }
        case 23: {
          /*
           * Test string-based definition on group sort, for (partial) nested Tuple DESC
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("f1").sortGroup("f0.f0", Order.DESCENDING).reduceGroup(new NestedTupleReducer());
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "a--(2,1)-(1,3)-(1,2)-\n" +
              "b--(2,2)-\n"+
              "c--(4,9)-(3,3)-(3,6)-\n";
        }
        case 24: {
          /*
           * Test string-based definition on group sort, for two grouping keys
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("f1").sortGroup("f0.f0", Order.DESCENDING).sortGroup("f0.f1", Order.DESCENDING).reduceGroup(new NestedTupleReducer());
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "a--(2,1)-(1,3)-(1,2)-\n" +
              "b--(2,2)-\n"+
              "c--(4,9)-(3,6)-(3,3)-\n";
        }
        case 25: {
          /*
           * Test string-based definition on group sort, for two grouping keys with Pojos
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<PojoContainingTupleAndWritable> ds = CollectionDataSets.getGroupSortedPojoContainingTupleAndWritable(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("hadoopFan").sortGroup("theTuple.f0", Order.DESCENDING).sortGroup("theTuple.f1", Order.DESCENDING)
              .reduceGroup(new GroupReduceFunction<CollectionDataSets.PojoContainingTupleAndWritable, String>() {
                @Override
                public void reduce(
                    Iterable<PojoContainingTupleAndWritable> values,
                    Collector<String> out) throws Exception {
                  boolean once = false;
                  StringBuilder concat = new StringBuilder();
                  for(PojoContainingTupleAndWritable value : values) {
                    if(!once) {
                      concat.append(value.hadoopFan.get());
                      concat.append("---");
                      once = true;
                    }
                    concat.append(value.theTuple);
                    concat.append("-");
                  }
                  out.collect(concat.toString());
                }
          });
          reduceDs.writeAsText(resultPath);
          env.execute();
          
          // return expected result
          return "1---(10,100)-\n" +
              "2---(30,600)-(30,400)-(30,200)-(20,201)-(20,200)-\n";
        }
        case 26: {
          /*
           * Test grouping with pojo containing multiple pojos (was a bug)
           */
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
          env.setDegreeOfParallelism(1);


          DataSet<CollectionDataSets.PojoWithMultiplePojos> ds = CollectionDataSets.getPojoWithMultiplePojos(env);
          // f0.f0 is first integer
          DataSet<String> reduceDs = ds.groupBy("p2.a2")
              .reduceGroup(new GroupReduceFunction<CollectionDataSets.PojoWithMultiplePojos, String>() {
                @Override
                public void reduce(
                    Iterable<CollectionDataSets.PojoWithMultiplePojos> values,
                    Collector<String> out) throws Exception {
                  StringBuilder concat = new StringBuilder();
                  for(CollectionDataSets.PojoWithMultiplePojos value : values) {
                    concat.append(value.p2.a2);
                  }
                  out.collect(concat.toString());
                }
              });
          reduceDs.writeAsText(resultPath);
          env.execute();


          // return expected result
          return "b\nccc\nee\n";
        }

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

  private static class SumMinMaxProgs {


    public static String runProgram(int progId, String resultPath) throws Exception {
      switch(progId) {
        case 1: {
          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Integer, Long>> sumDs = ds
              .sum(0)
              .andMax(1)
              .project(0, 1).types(Integer.class, Long.class);


          sumDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "231,6\n";
        }
        case 2: {
        /*
         * Grouped Aggregate
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
              .sum(0)
              .project(1, 0).types(Long.class, Integer.class);


          aggregateDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1,1\n" +
              "2,5\n" +
              "3,15\n" +
              "4,34\n" +
              "5,65\n" +
              "6,111\n";
        }
        case 3: {
        /*
         * Nested Aggregate
         */


          final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


          DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
          DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
              .min(0)
              .min(0)
              .project(0).types(Integer.class);


          aggregateDs.writeAsCsv(resultPath);
          env.execute();


          // return expected result
          return "1\n";
        }
        default:

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

  
  private static class DependencyConnectedComponentsProgram {
    
    public static String runProgram(String resultPath) throws Exception {
      
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DOP);
      
      DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
      DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);
      int keyPosition = 0;
      
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
          initialSolutionSet.iterateDelta(initialSolutionSet, MAX_ITERATIONS, keyPosition);
      
      DataSet<Long> candidates = iteration.getWorkset().join(edges).where(0).equalTo(0)
          .with(new FindCandidatesJoin())
          .groupBy(new KeySelector<Long, Long>() { 
                        public Long getKey(Long id) { return id; } 
                      }).reduceGroup(new RemoveDuplicatesReduce());
      
      DataSet<Tuple2<Long, Long>> candidatesDependencies = 
          candidates.join(edges)
          .where(new KeySelector<Long, Long>() { 
                        public Long getKey(Long id) { return id; } 
                      }).equalTo(new KeySelector<Tuple2<Long, Long>, Long>() { 
                        public Long getKey(Tuple2<Long, Long> vertexWithId) 
                        { return vertexWithId.f1; } 
                      }).with(new FindCandidatesDependenciesJoin());
      
      DataSet<Tuple2<Long, Long>> verticesWithNewComponents = 
          candidatesDependencies.join(iteration.getSolutionSet()).where(0).equalTo(0)
          .with(new NeighborWithComponentIDJoin())
          .groupBy(0).reduceGroup(new MinimumReduce());
      
      DataSet<Tuple2<Long, Long>> updatedComponentId = 
          verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0)
          .flatMap(new MinimumIdFilter());
      
      iteration.closeWith(updatedComponentId, updatedComponentId).writeAsText(resultPath);
      
      env.execute();
      
      return resultPath;
    }

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      case 1: {
        /*
         * Test non-passing flatmap
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
        DataSet<String> nonPassingFlatMapDs = ds.
            flatMap(new FlatMapFunction<String, String>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(String value, Collector<String> out) throws Exception {
                if ( value.contains("bananas") ) {
                  out.collect(value);
                }
              }
            });
        
        nonPassingFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "\n";
      }
      case 2: {
        /*
         * Test data duplicating flatmap
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
        DataSet<String> duplicatingFlatMapDs = ds.
            flatMap(new FlatMapFunction<String, String>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(String value, Collector<String> out) throws Exception {
                  out.collect(value);
                  out.collect(value.toUpperCase());
              }
            });
        
        duplicatingFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "Hi\n" + "HI\n" +
            "Hello\n" + "HELLO\n" +
            "Hello world\n" + "HELLO WORLD\n" +
            "Hello world, how are you?\n" + "HELLO WORLD, HOW ARE YOU?\n" +
            "I am fine.\n" + "I AM FINE.\n" +
            "Luke Skywalker\n" + "LUKE SKYWALKER\n" +
            "Random comment\n" + "RANDOM COMMENT\n" +
            "LOL\n" + "LOL\n";
      }
      case 3: {
        /*
         * Test flatmap with varying number of emitted tuples
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> varyingTuplesMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;


              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                final int numTuples = value.f0 % 3; 
                for ( int i = 0; i < numTuples; i++ ) {
                  out.collect(value);
                }
              }
            });
        
        varyingTuplesMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return  "1,1,Hi\n" +
            "2,2,Hello\n" + "2,2,Hello\n" +
            "4,3,Hello world, how are you?\n" +
            "5,3,I am fine.\n" + "5,3,I am fine.\n" +
            "7,4,Comment#1\n" +
            "8,4,Comment#2\n" + "8,4,Comment#2\n" + 
            "10,4,Comment#4\n" +
            "11,5,Comment#5\n" + "11,5,Comment#5\n" +
            "13,5,Comment#7\n" +
            "14,5,Comment#8\n" + "14,5,Comment#8\n" +
            "16,6,Comment#10\n" +
            "17,6,Comment#11\n" + "17,6,Comment#11\n" +
            "19,6,Comment#13\n" +
            "20,6,Comment#14\n" + "20,6,Comment#14\n";
      }
      case 4: {
        /*
         * Test type conversion flatmapper (Custom -> Tuple)
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> typeConversionFlatMapDs = ds.
            flatMap(new FlatMapFunction<CustomType, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;
              private final Tuple3<Integer, Long, String> outTuple = 
                  new Tuple3<Integer, Long, String>();
              
              @Override
              public void flatMap(CustomType value, Collector<Tuple3<Integer, Long, String>> out)
                  throws Exception {
                outTuple.setField(value.myInt, 0);
                outTuple.setField(value.myLong, 1);
                outTuple.setField(value.myString, 2);
                out.collect(outTuple);
              }
            });
        
        typeConversionFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return   "1,0,Hi\n" +
            "2,1,Hello\n" +
            "2,2,Hello world\n" +
            "3,3,Hello world, how are you?\n" +
            "3,4,I am fine.\n" +
            "3,5,Luke Skywalker\n" +
            "4,6,Comment#1\n" +
            "4,7,Comment#2\n" +
            "4,8,Comment#3\n" +
            "4,9,Comment#4\n" +
            "5,10,Comment#5\n" +
            "5,11,Comment#6\n" +
            "5,12,Comment#7\n" +
            "5,13,Comment#8\n" +
            "5,14,Comment#9\n" +
            "6,15,Comment#10\n" +
            "6,16,Comment#11\n" +
            "6,17,Comment#12\n" +
            "6,18,Comment#13\n" +
            "6,19,Comment#14\n" +
            "6,20,Comment#15\n";
      }
      case 5: {
        /*
         * Test type conversion flatmapper (Tuple -> Basic)
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<String> typeConversionFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
              private static final long serialVersionUID = 1L;
              
              @Override
              public void flatMap(Tuple3<Integer, Long, String> value, 
                  Collector<String> out) throws Exception {
                out.collect(value.f2);
              }
            });
        
        typeConversionFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "Hi\n" + "Hello\n" + "Hello world\n" +
            "Hello world, how are you?\n" +
            "I am fine.\n" + "Luke Skywalker\n" +
            "Comment#1\n" +  "Comment#2\n" +
            "Comment#3\n" +  "Comment#4\n" +
            "Comment#5\n" +  "Comment#6\n" +
            "Comment#7\n" + "Comment#8\n" +
            "Comment#9\n" +  "Comment#10\n" +
            "Comment#11\n" + "Comment#12\n" +
            "Comment#13\n" + "Comment#14\n" +
            "Comment#15\n";
      }
      case 6: {
        /*
         * Test flatmapper if UDF returns input object 
         * multiple times and changes it in between
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> inputObjFlatMapDs = ds.
            flatMap(new FlatMapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
              private static final long serialVersionUID = 1L;
              
              @Override
              public void flatMap( Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                final int numTuples = value.f0 % 4;
                for ( int i = 0; i < numTuples; i++ ) {
                  value.setField(i, 0);
                  out.collect(value);
                }              
              }
            });
        
        inputObjFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return  "0,1,Hi\n" +
            "0,2,Hello\n" + "1,2,Hello\n" +
            "0,2,Hello world\n" + "1,2,Hello world\n" + "2,2,Hello world\n" +
            "0,3,I am fine.\n" +
            "0,3,Luke Skywalker\n" + "1,3,Luke Skywalker\n" +
            "0,4,Comment#1\n" + "1,4,Comment#1\n" + "2,4,Comment#1\n" +
            "0,4,Comment#3\n" +
            "0,4,Comment#4\n" + "1,4,Comment#4\n" +
            "0,5,Comment#5\n" + "1,5,Comment#5\n" + "2,5,Comment#5\n" +
            "0,5,Comment#7\n" +
            "0,5,Comment#8\n" + "1,5,Comment#8\n" +
            "0,5,Comment#9\n" + "1,5,Comment#9\n" + "2,5,Comment#9\n" +
            "0,6,Comment#11\n" +
            "0,6,Comment#12\n" + "1,6,Comment#12\n" +
            "0,6,Comment#13\n" + "1,6,Comment#13\n" + "2,6,Comment#13\n" +
            "0,6,Comment#15\n";
      }
      case 7: {
        /*
         * Test flatmap with broadcast set 
         */
          
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> ints = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> bcFlatMapDs = ds.
            flatMap(new RichFlatMapFunction<Tuple3<Integer,Long,String>, Tuple3<Integer,Long,String>>() {
              private static final long serialVersionUID = 1L;
              private final Tuple3<Integer, Long, String> outTuple = 
                  new Tuple3<Integer, Long, String>();
              private Integer f2Replace = 0;
              
              @Override
              public void open(Configuration config) {
                Collection<Integer> ints = this.getRuntimeContext().getBroadcastVariable("ints");
                int sum = 0;
                for(Integer i : ints) {
                  sum += i;
                }
                f2Replace = sum;
              }
              
              @Override
              public void flatMap(Tuple3<Integer, Long, String> value,
                  Collector<Tuple3<Integer, Long, String>> out) throws Exception {
                outTuple.setFields(f2Replace, value.f1, value.f2);
                out.collect(outTuple);
              }
            }).withBroadcastSet(ints, "ints");
        bcFlatMapDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return   "55,1,Hi\n" +
            "55,2,Hello\n" +
            "55,2,Hello world\n" +

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

    
    final int NUM_ITERS = 4;
    final double expectedFactor = (int) Math.pow(7, NUM_ITERS);
    
    // this is an artificial program, it does not compute anything sensical
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    @SuppressWarnings("unchecked")
    DataSet<Tuple2<Long, Double>> initialData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0), new Tuple2<Long, Double>(2L, 2.0),
                              new Tuple2<Long, Double>(3L, 3.0), new Tuple2<Long, Double>(4L, 4.0),
                              new Tuple2<Long, Double>(5L, 5.0), new Tuple2<Long, Double>(6L, 6.0));
    
    DataSet<Tuple2<Long, Double>> result = MultipleJoinsWithSolutionSetCompilerTest.constructPlan(initialData, NUM_ITERS);
    
    List<Tuple2<Long, Double>> resultCollector = new ArrayList<Tuple2<Long,Double>>();
    result.output(new LocalCollectionOutputFormat<Tuple2<Long,Double>>(resultCollector));
    
    env.execute();
    
    for (Tuple2<Long, Double> tuple : resultCollector) {
      Assert.assertEquals(expectedFactor * tuple.f0, tuple.f1.doubleValue(), 0.0);
    }
  }

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      case 1: {
        /*
         * Reduce on tuples with key field selector
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new Tuple3Reduce("B-)"));
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,B-)\n" +
            "15,3,B-)\n" +
            "34,4,B-)\n" +
            "65,5,B-)\n" +
            "111,6,B-)\n";
      }
      case 2: {
        /*
         * Reduce on tuples with multiple key field selectors
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
            groupBy(4,0).reduce(new Tuple5Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "3,9,0,P-),2\n" +
            "3,6,5,BCD,3\n" +
            "4,17,0,P-),1\n" +
            "4,17,0,P-),2\n" +
            "5,11,10,GHI,1\n" +
            "5,29,0,P-),2\n" +
            "5,25,0,P-),3\n";
      } 
      case 3: {
        /*
         * Reduce on tuples with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Long getKey(Tuple3<Integer, Long, String> in) {
                    return in.f1;
                  }
                }).reduce(new Tuple3Reduce("B-)"));
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,B-)\n" +
            "15,3,B-)\n" +
            "34,4,B-)\n" +
            "65,5,B-)\n" +
            "111,6,B-)\n";
        
      }
      case 4: {
        /*
         * Reduce on custom type with key extractor
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            groupBy(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                }).reduce(new CustomTypeReduce());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "1,0,Hi\n" +
            "2,3,Hello!\n" +
            "3,12,Hello!\n" +
            "4,30,Hello!\n" +
            "5,60,Hello!\n" +
            "6,105,Hello!\n";
      }
      case 5: {
        /*
         * All-reduce for tuple
         */


        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            reduce(new AllAddingTuple3Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "231,91,Hello World\n";
      }
      case 6: {
        /*
         * All-reduce for custom types
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<CustomType> reduceDs = ds.
            reduce(new AllAddingCustomTypeReduce());
        
        reduceDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "91,210,Hello!";
      }
      case 7: {
        
        /*
         * Reduce with broadcast set
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new BCTuple3Reduce()).withBroadcastSet(intDs, "ints");
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,55\n" +
            "15,3,55\n" +
            "34,4,55\n" +
            "65,5,55\n" +
            "111,6,55\n";
      }
      case 8: {
        /*
         * Reduce with UDF that returns the second input object (check mutable object handling)
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
            groupBy(1).reduce(new InputReturningTuple3Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,Hi\n" +
            "5,2,Hi again!\n" +
            "15,3,Hi again!\n" +
            "34,4,Hi again!\n" +
            "65,5,Hi again!\n" +
            "111,6,Hi again!\n";
      }
      case 9: {
        /*
         * Reduce with a Tuple-returning KeySelector 
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> reduceDs = ds .
            groupBy(
                new KeySelector<Tuple5<Integer,Long,Integer,String,Long>, Tuple2<Integer, Long>>() {
                  private static final long serialVersionUID = 1L;
    
                  @Override
                  public Tuple2<Integer, Long> getKey(Tuple5<Integer,Long,Integer,String,Long> t) {
                    return new Tuple2<Integer, Long>(t.f0, t.f4);
                  }
                }).reduce(new Tuple5Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +
            "3,9,0,P-),2\n" +
            "3,6,5,BCD,3\n" +
            "4,17,0,P-),1\n" +
            "4,17,0,P-),2\n" +
            "5,11,10,GHI,1\n" +
            "5,29,0,P-),2\n" +
            "5,25,0,P-),3\n";
      }
      case 10: {
        /*
         * Case 2 with String-based field expression
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
            groupBy("f4","f0").reduce(new Tuple5Reduce());
        
        reduceDs.writeAsCsv(resultPath);
        env.execute();
        
        // return expected result
        return "1,1,0,Hallo,1\n" +
            "2,3,2,Hallo Welt wie,1\n" +
            "2,2,1,Hallo Welt,2\n" +

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

public class BulkIterationWithAllReducerITCase extends JavaProgramTestBase {


  @Override
  protected void testProgram() throws Exception {
    
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);
    
    IterativeDataSet<Integer> iteration = data.iterate(10);
    
    DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");
    
    final List<Integer> resultList = new ArrayList<Integer>();
    iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));
    
    env.execute();
    
    Assert.assertEquals(8, resultList.get(0).intValue());
  }

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      case 0: {
        /*
         * Test hash partition by key field
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(1)
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 1: {
        /*
         * Test hash partition by key selector
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
              private static final long serialVersionUID = 1L;


              @Override
              public Long getKey(Tuple3<Integer, Long, String> value) throws Exception {
                return value.f1;
              }
              
            })
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 2: {
        /*
         * Test forced rebalancing
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


        // generate some number in parallel
        DataSet<Long> ds = env.generateSequence(1,3000);
        DataSet<Tuple2<Integer, Integer>> uniqLongs = ds
            // introduce some partition skew by filtering
            .filter(new FilterFunction<Long>() {
              private static final long serialVersionUID = 1L;


              @Override
              public boolean filter(Long value) throws Exception {
                if (value <= 780) {
                  return false;
                } else {
                  return true;
                }
              }
            })
            // rebalance
            .rebalance()
            // count values in each partition
            .map(new PartitionIndexMapper())
            .groupBy(0)
            .reduce(new ReduceFunction<Tuple2<Integer, Integer>>() {
              private static final long serialVersionUID = 1L;


              public Tuple2<Integer, Integer> reduce(Tuple2<Integer, Integer> v1, Tuple2<Integer, Integer> v2) {
                return new Tuple2<Integer, Integer>(v1.f0, v1.f1+v2.f1);
              }
            })
            // round counts to mitigate runtime scheduling effects (lazy split assignment)
            .map(new MapFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>>(){
              private static final long serialVersionUID = 1L;


              @Override
              public Tuple2<Integer, Integer> map(Tuple2<Integer, Integer> value) throws Exception {
                value.f1 = (value.f1 / 10);
                return value;
              }
              
            });
        
        uniqLongs.writeAsText(resultPath);
        
        env.execute();
        
        StringBuilder result = new StringBuilder();
        int numPerPartition = 2220 / env.getDegreeOfParallelism() / 10;
        for (int i = 0; i < env.getDegreeOfParallelism(); i++) {
          result.append('(').append(i).append(',').append(numPerPartition).append(")\n");
        }
        // return expected result
        return result.toString();
      }
      case 3: {
        /*
         * Test hash partition by key field and different DOP
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(3);
        
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash(1).setParallelism(4)
            .mapPartition(new UniqueLongMapper());
        uniqLongs.writeAsText(resultPath);
        
        env.execute();
        
        // return expected result
        return   "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
      }
      case 4: {
        /*
         * Test hash partition with key expression
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(3);
        
        DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env);
        DataSet<Long> uniqLongs = ds
            .partitionByHash("nestedPojo.longNumber").setParallelism(4)
            .mapPartition(new UniqueNestedPojoLongMapper());
        uniqLongs.writeAsText(resultPath);
        
        env.execute();
        
        // return expected result
        return   "10000\n" +
            "20000\n" +
            "30000\n";

View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

    compareResultsByLinesInMemory(DATAPOINTS + DATAPOINTS + DATAPOINTS + DATAPOINTS, resultPath);
  }


  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    DataSet<Record> initialInput = env.readFile(new PointInFormat(), this.dataPath).setParallelism(1);
    
    IterativeDataSet<Record> iteration = initialInput.iterate(2);
    
    DataSet<Record> result = iteration.union(iteration).map(new IdentityMapper());
    
    iteration.closeWith(result).write(new PointOutFormat(), this.resultPath);
    
    env.execute();
  }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.