Examples of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.java.ExecutionEnvironment
The ExecutionEnviroment is the context in which a program is executed. A {@link LocalEnvironment} will cause execution in the current JVM, a{@link RemoteEnvironment} will cause execution on a remote setup.
The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).
Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods. @see LocalEnvironment @see RemoteEnvironment

  }




  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();


    DataSet<Tuple3<String, Integer, String>> input = env.readCsvFile(inputPath)
      .fieldDelimiter('|')
      .types(String.class, Integer.class, String.class);


    //output the data with AvroOutputFormat for specific user type
    DataSet<User> specificUser = input.map(new ConvertToUser());
    specificUser.write(new AvroOutputFormat<User>(User.class), outputPath1);


    //output the data with AvroOutputFormat for reflect user type
    DataSet<ReflectiveUser> reflectiveUser = specificUser.map(new ConvertToReflective());
    reflectiveUser.write(new AvroOutputFormat<ReflectiveUser>(ReflectiveUser.class), outputPath2);


    env.execute();
  }

View Full Code Here

    try {
      InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);


      TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);


      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      DataSet<MyAvroType> input = env.createInput(format);
      TypeInformation<?> typeInfoDataSet = input.getType();




      Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
      Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);

View Full Code Here

public class JDBCExample {


  public static void main(String[] args) throws Exception {
    prepareTestDb();


    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5> source
        = environment.createInput(JDBCInputFormat.buildJDBCInputFormat()
            .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
            .setDBUrl("jdbc:derby:memory:ebookshop")
            .setQuery("select * from books")
            .finish(),
            new TupleTypeInfo(Tuple5.class, INT_TYPE_INFO, STRING_TYPE_INFO, STRING_TYPE_INFO, DOUBLE_TYPE_INFO, INT_TYPE_INFO)
        );


    source.output(JDBCOutputFormat.buildJDBCOutputFormat()
        .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
        .setDBUrl("jdbc:derby:memory:ebookshop")
        .setQuery("insert into newbooks (id,title,author,price,qty) values (?,?,?,?,?)")
        .finish());
    environment.execute();
  }

View Full Code Here

    private static final long componentId = 1l;
    private static long [] aggr_value = new long [MAX_ITERATIONS];


    public static String runProgram(String resultPath) throws Exception {


      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DOP);


      DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
      DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);


      IterativeDataSet<Tuple2<Long, Long>> iteration =
          initialSolutionSet.iterate(MAX_ITERATIONS);


      // register the aggregator
      iteration.registerAggregator(ELEMENTS_IN_COMPONENT, new LongSumAggregatorWithParameter(componentId));


      DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.join(edges).where(0).equalTo(0)
          .with(new NeighborWithComponentIDJoin())
          .groupBy(0).reduceGroup(new MinimumReduce());


      DataSet<Tuple2<Long, Long>> updatedComponentId =
          verticesWithNewComponents.join(iteration).where(0).equalTo(0)
          .flatMap(new MinimumIdFilter());


      iteration.closeWith(updatedComponentId).writeAsText(resultPath);


      env.execute();


      return resultPath;
    }

View Full Code Here

    private static final String UPDATED_ELEMENTS = "updated.elements.aggr";
    private static final long convergence_threshold = 3; // the iteration stops if less than this number os elements change value


    public static String runProgram(String resultPath) throws Exception {


      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DOP);


      DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
      DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);


      IterativeDataSet<Tuple2<Long, Long>> iteration =
          initialSolutionSet.iterate(MAX_ITERATIONS);


      // register the convergence criterion
      iteration.registerAggregationConvergenceCriterion(UPDATED_ELEMENTS,
          new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergence_threshold));


      DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.join(edges).where(0).equalTo(0)
          .with(new NeighborWithComponentIDJoin())
          .groupBy(0).reduceGroup(new MinimumReduce());


      DataSet<Tuple2<Long, Long>> updatedComponentId = 
          verticesWithNewComponents.join(iteration).where(0).equalTo(0)
          .flatMap(new MinimumIdFilter());


      iteration.closeWith(updatedComponentId).writeAsText(resultPath);


      env.execute();


      return resultPath;
    }

View Full Code Here

      case 1: {
        /*
         * Test aggregator without parameter for iterate
         */


        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);


        DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
        IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);


        // register aggregator
        LongSumAggregator aggr = new LongSumAggregator();
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
        
        // register convergence criterion
        iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, 
            new NegativeElementsConvergenceCriterion());
        
        DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
        iteration.closeWith(updatedDs).writeAsText(resultPath);
        env.execute();


        // return expected result
        return "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n"
             + "-1\n" + "0\n" + "0\n" + "0\n" + "0\n"
             + "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
      }
      case 2: {
        /*
         * Test aggregator with parameter for iterate
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);


        DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
        IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);


        // register aggregator
        LongSumAggregatorWithParameter aggr = new LongSumAggregatorWithParameter(0);
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
        
        // register convergence criterion
        iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, 
            new NegativeElementsConvergenceCriterion());
        
        DataSet<Integer> updatedDs = iteration.map(new SubtractOneMapWithParam());
        iteration.closeWith(updatedDs).writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n"
             + "-1\n" + "0\n" + "0\n" + "0\n" + "0\n"
             + "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
      }
      case 3: {
        /*
         * Test convergence criterion with parameter for iterate
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);


        DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
        IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);


        // register aggregator
        LongSumAggregator aggr = new LongSumAggregator();
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
        
        // register convergence criterion
        iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, 
            new NegativeElementsConvergenceCriterionWithParam(3));
        
        DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
        iteration.closeWith(updatedDs).writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n"
             + "-1\n" + "0\n" + "0\n" + "0\n" + "0\n"
             + "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
      }
      case 4: {
        /*
         * Test aggregator without parameter for iterateDelta
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);
        
        DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
            
        DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
            initialSolutionSet, MAX_ITERATIONS, 0);


        // register aggregator
        LongSumAggregator aggr = new LongSumAggregator();
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
        
        DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
        
        DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
            .where(0).equalTo(0).flatMap(new UpdateFilter());
        
        DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
        DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
        result.writeAsText(resultPath);
        
        env.execute();
        
        // return expected result
        return "1\n" + "2\n" + "2\n" + "3\n" + "3\n"
             + "3\n" + "4\n" + "4\n" + "4\n" + "4\n"
             + "5\n" + "5\n" + "5\n" + "5\n" + "5\n";
        
      }
      case 5: {
        /*
         * Test aggregator with parameter for iterateDelta
         */
        
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);
        
        DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
            
        DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
            initialSolutionSet, MAX_ITERATIONS, 0);


        // register aggregator
        LongSumAggregator aggr = new LongSumAggregatorWithParameter(4);
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
        
        DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
        
        DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
            .where(0).equalTo(0).flatMap(new UpdateFilter());
        
        DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
        DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
        result.writeAsText(resultPath);
        
        env.execute();
        
        // return expected result
        return "1\n" + "2\n" + "2\n" + "3\n" + "3\n"
             + "3\n" + "4\n" + "4\n" + "4\n" + "4\n"
             + "5\n" + "5\n" + "5\n" + "5\n" + "5\n";

View Full Code Here

    
  @Test
  public void testMultiSolutionSetJoinPlan() {
    try {
      
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0));
      DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10);
      
      // add two sinks, to test the case of branching after an iteration
      result.print();
      result.print();
    
      Plan p = env.createProgramPlan();
      
      OptimizedPlan optPlan = compileNoStats(p);
      
      OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);

View Full Code Here

public class PageRankCompilerTest extends CompilerTestBase{
  
  @Test
  public void testPageRank() {
    try {
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      
      // get input data
      DataSet<Long> pagesInput = env.fromElements(1l);
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, Long>> linksInput =env.fromElements(new Tuple2<Long, Long>(1l, 2l));
      
      // assign initial rank to pages
      DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
          map(new RankAssigner((1.0d / 10)));
      
      // build adjacency list from link input
      DataSet<Tuple2<Long, Long[]>> adjacencyListInput = 
          linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());
      
      // set iterative data set
      IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(10);
      
      Configuration cfg = new Configuration();
      cfg.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
      
      DataSet<Tuple2<Long, Double>> newRanks = iteration
          // join pages with outgoing edges and distribute rank
          .join(adjacencyListInput).where(0).equalTo(0).withParameters(cfg)
          .flatMap(new JoinVertexWithEdgesMatch())
          // collect and sum ranks
          .groupBy(0).aggregate(SUM, 1)
          // apply dampening factor
          .map(new Dampener(0.85, 10));
      
      DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
          newRanks, 
          newRanks.join(iteration).where(0).equalTo(0)
          // termination condition
          .filter(new EpsilonFilter()));
  
      finalPageRanks.print();
  
      // get the plan and compile it
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
      
      SinkPlanNode sinkPlanNode = (SinkPlanNode) op.getDataSinks().iterator().next();
      BulkIterationPlanNode iterPlanNode = (BulkIterationPlanNode) sinkPlanNode.getInput().getSource();

View Full Code Here

      case 1: {
        /*
         * Test non-passing mapper
         */
    
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> nonPassingFlatMapDs = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new NonPassingMapper()));
        
        nonPassingFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "\n";
      }
      case 2: {
        /*
         * Test data duplicating mapper
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> duplicatingFlatMapDs = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new DuplicatingMapper()));
        
        duplicatingFlatMapDs.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(1,Hi)\n" + "(1,HI)\n" +
        "(2,Hello)\n" + "(2,HELLO)\n" +
        "(3,Hello world)\n" + "(3,HELLO WORLD)\n" +
        "(4,Hello world, how are you?)\n" + "(4,HELLO WORLD, HOW ARE YOU?)\n" +
        "(5,I am fine.)\n" + "(5,I AM FINE.)\n" +
        "(6,Luke Skywalker)\n" + "(6,LUKE SKYWALKER)\n" +
        "(7,Comment#1)\n" + "(7,COMMENT#1)\n" + 
        "(8,Comment#2)\n" + "(8,COMMENT#2)\n" + 
        "(9,Comment#3)\n" + "(9,COMMENT#3)\n" + 
        "(10,Comment#4)\n" + "(10,COMMENT#4)\n" + 
        "(11,Comment#5)\n" + "(11,COMMENT#5)\n" + 
        "(12,Comment#6)\n" + "(12,COMMENT#6)\n" + 
        "(13,Comment#7)\n" + "(13,COMMENT#7)\n" + 
        "(14,Comment#8)\n" + "(14,COMMENT#8)\n" +
        "(15,Comment#9)\n" + "(15,COMMENT#9)\n" + 
        "(16,Comment#10)\n" + "(16,COMMENT#10)\n" +
        "(17,Comment#11)\n" + "(17,COMMENT#11)\n" +
        "(18,Comment#12)\n" + "(18,COMMENT#12)\n" +
        "(19,Comment#13)\n" + "(19,COMMENT#13)\n" +
        "(20,Comment#14)\n" + "(20,COMMENT#14)\n" +
        "(21,Comment#15)\n" + "(21,COMMENT#15)\n";
      }
      case 3: {
        // Mapper configured via JobConf
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        JobConf conf = new JobConf();
        conf.set("my.filterPrefix", "Hello");
        
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> hellos = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));
        
        hellos.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(2,Hello)\n" +
        "(3,Hello world)\n" +
        "(4,Hello world, how are you?)\n";

View Full Code Here

      switch(progId) {
      case 1: {
        /*
         * Test standard grouping
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() / 5);
                return v;
              }
            });
            
        DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new CommentCntReducer()));
        
        commentCnts.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(0,0)\n"+
            "(1,3)\n" +
            "(2,5)\n" +
            "(3,5)\n" +
            "(4,2)\n";
      }
      case 2: {
        /*
         * Test ungrouped Hadoop reducer
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
            
        DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));
        
        commentCnts.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(42,15)\n";
      }
      case 3: {
        /*
         * Test configuration via JobConf
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        
        JobConf conf = new JobConf();
        conf.set("my.cntPrefix", "Hello");
        
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() % 5);
                return v;
              }
            });
            
        DataSet<Tuple2<IntWritable, IntWritable>> helloCnts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
                new ConfigurableCntReducer(), conf));
        
        helloCnts.writeAsText(resultPath);
        env.execute();
        
        // return expected result
        return   "(0,0)\n"+
            "(1,0)\n" +
            "(2,1)\n" +

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

org.apache.flink.api.avro.AvroOutputFormatTest

org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry

org.apache.flink.api.common.operators.CollectionExecutionIterationTest

org.apache.flink.api.common.operators.CollectionExecutionWithBroadcastVariableTest

org.apache.flink.api.java.functions.SemanticPropertiesProjectionTest

org.apache.flink.api.java.functions.SemanticPropertiesTranslationTest

org.apache.flink.api.java.io.AvroInputFormatTypeExtractionTest

org.apache.flink.api.java.io.CsvReader

org.apache.flink.api.java.io.jdbc.example.JDBCExample

org.apache.flink.api.java.io.TextInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.