Package org.apache.flink.api.java

Examples of org.apache.flink.api.java.ExecutionEnvironment


  }


  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<Tuple3<String, Integer, String>> input = env.readCsvFile(inputPath)
      .fieldDelimiter('|')
      .types(String.class, Integer.class, String.class);

    //output the data with AvroOutputFormat for specific user type
    DataSet<User> specificUser = input.map(new ConvertToUser());
    specificUser.write(new AvroOutputFormat<User>(User.class), outputPath1);

    //output the data with AvroOutputFormat for reflect user type
    DataSet<ReflectiveUser> reflectiveUser = specificUser.map(new ConvertToReflective());
    reflectiveUser.write(new AvroOutputFormat<ReflectiveUser>(ReflectiveUser.class), outputPath2);

    env.execute();
  }
View Full Code Here


    try {
      InputFormat<MyAvroType, ?> format = new AvroInputFormat<MyAvroType>(new Path("file:///ignore/this/file"), MyAvroType.class);

      TypeInformation<?> typeInfoDirect = TypeExtractor.getInputFormatTypes(format);

      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      DataSet<MyAvroType> input = env.createInput(format);
      TypeInformation<?> typeInfoDataSet = input.getType();


      Assert.assertTrue(typeInfoDirect instanceof PojoTypeInfo);
      Assert.assertTrue(typeInfoDataSet instanceof PojoTypeInfo);
View Full Code Here

public class JDBCExample {

  public static void main(String[] args) throws Exception {
    prepareTestDb();

    ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple5> source
        = environment.createInput(JDBCInputFormat.buildJDBCInputFormat()
            .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
            .setDBUrl("jdbc:derby:memory:ebookshop")
            .setQuery("select * from books")
            .finish(),
            new TupleTypeInfo(Tuple5.class, INT_TYPE_INFO, STRING_TYPE_INFO, STRING_TYPE_INFO, DOUBLE_TYPE_INFO, INT_TYPE_INFO)
        );

    source.output(JDBCOutputFormat.buildJDBCOutputFormat()
        .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
        .setDBUrl("jdbc:derby:memory:ebookshop")
        .setQuery("insert into newbooks (id,title,author,price,qty) values (?,?,?,?,?)")
        .finish());
    environment.execute();
  }
View Full Code Here

    private static final long componentId = 1l;
    private static long [] aggr_value = new long [MAX_ITERATIONS];

    public static String runProgram(String resultPath) throws Exception {

      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DOP);

      DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
      DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

      IterativeDataSet<Tuple2<Long, Long>> iteration =
          initialSolutionSet.iterate(MAX_ITERATIONS);

      // register the aggregator
      iteration.registerAggregator(ELEMENTS_IN_COMPONENT, new LongSumAggregatorWithParameter(componentId));

      DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.join(edges).where(0).equalTo(0)
          .with(new NeighborWithComponentIDJoin())
          .groupBy(0).reduceGroup(new MinimumReduce());

      DataSet<Tuple2<Long, Long>> updatedComponentId =
          verticesWithNewComponents.join(iteration).where(0).equalTo(0)
          .flatMap(new MinimumIdFilter());

      iteration.closeWith(updatedComponentId).writeAsText(resultPath);

      env.execute();

      return resultPath;
    }
View Full Code Here

    private static final String UPDATED_ELEMENTS = "updated.elements.aggr";
    private static final long convergence_threshold = 3; // the iteration stops if less than this number os elements change value

    public static String runProgram(String resultPath) throws Exception {

      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DOP);

      DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
      DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

      IterativeDataSet<Tuple2<Long, Long>> iteration =
          initialSolutionSet.iterate(MAX_ITERATIONS);

      // register the convergence criterion
      iteration.registerAggregationConvergenceCriterion(UPDATED_ELEMENTS,
          new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergence_threshold));

      DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.join(edges).where(0).equalTo(0)
          .with(new NeighborWithComponentIDJoin())
          .groupBy(0).reduceGroup(new MinimumReduce());

      DataSet<Tuple2<Long, Long>> updatedComponentId =
          verticesWithNewComponents.join(iteration).where(0).equalTo(0)
          .flatMap(new MinimumIdFilter());

      iteration.closeWith(updatedComponentId).writeAsText(resultPath);

      env.execute();

      return resultPath;
    }
View Full Code Here

      case 1: {
        /*
         * Test aggregator without parameter for iterate
         */

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);

        DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
        IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

        // register aggregator
        LongSumAggregator aggr = new LongSumAggregator();
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
       
        // register convergence criterion
        iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
            new NegativeElementsConvergenceCriterion());
       
        DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
        iteration.closeWith(updatedDs).writeAsText(resultPath);
        env.execute();

        // return expected result
        return "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n"
             + "-1\n" + "0\n" + "0\n" + "0\n" + "0\n"
             + "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
      }
      case 2: {
        /*
         * Test aggregator with parameter for iterate
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);

        DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
        IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

        // register aggregator
        LongSumAggregatorWithParameter aggr = new LongSumAggregatorWithParameter(0);
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
       
        // register convergence criterion
        iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
            new NegativeElementsConvergenceCriterion());
       
        DataSet<Integer> updatedDs = iteration.map(new SubtractOneMapWithParam());
        iteration.closeWith(updatedDs).writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n"
             + "-1\n" + "0\n" + "0\n" + "0\n" + "0\n"
             + "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
      }
      case 3: {
        /*
         * Test convergence criterion with parameter for iterate
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);

        DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
        IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

        // register aggregator
        LongSumAggregator aggr = new LongSumAggregator();
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
       
        // register convergence criterion
        iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
            new NegativeElementsConvergenceCriterionWithParam(3));
       
        DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
        iteration.closeWith(updatedDs).writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return "-3\n" + "-2\n" + "-2\n" + "-1\n" + "-1\n"
             + "-1\n" + "0\n" + "0\n" + "0\n" + "0\n"
             + "1\n" + "1\n" + "1\n" + "1\n" + "1\n";
      }
      case 4: {
        /*
         * Test aggregator without parameter for iterateDelta
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);
       
        DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
           
        DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
            initialSolutionSet, MAX_ITERATIONS, 0);

        // register aggregator
        LongSumAggregator aggr = new LongSumAggregator();
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
       
        DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
       
        DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
            .where(0).equalTo(0).flatMap(new UpdateFilter());
       
        DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
        DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
        result.writeAsText(resultPath);
       
        env.execute();
       
        // return expected result
        return "1\n" + "2\n" + "2\n" + "3\n" + "3\n"
             + "3\n" + "4\n" + "4\n" + "4\n" + "4\n"
             + "5\n" + "5\n" + "5\n" + "5\n" + "5\n";
       
      }
      case 5: {
        /*
         * Test aggregator with parameter for iterateDelta
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setDegreeOfParallelism(DOP);
       
        DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());
           
        DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
            initialSolutionSet, MAX_ITERATIONS, 0);

        // register aggregator
        LongSumAggregator aggr = new LongSumAggregatorWithParameter(4);
        iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);
       
        DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());
       
        DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
            .where(0).equalTo(0).flatMap(new UpdateFilter());
       
        DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
        DataSet<Integer> result = iterationRes.map(new ProjectSecondMapper());
        result.writeAsText(resultPath);
       
        env.execute();
       
        // return expected result
        return "1\n" + "2\n" + "2\n" + "3\n" + "3\n"
             + "3\n" + "4\n" + "4\n" + "4\n" + "4\n"
             + "5\n" + "5\n" + "5\n" + "5\n" + "5\n";
View Full Code Here

   
  @Test
  public void testMultiSolutionSetJoinPlan() {
    try {
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0));
      DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10);
     
      // add two sinks, to test the case of branching after an iteration
      result.print();
      result.print();
   
      Plan p = env.createProgramPlan();
     
      OptimizedPlan optPlan = compileNoStats(p);
     
      OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
     
View Full Code Here

public class PageRankCompilerTest extends CompilerTestBase{
 
  @Test
  public void testPageRank() {
    try {
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      // get input data
      DataSet<Long> pagesInput = env.fromElements(1l);
      @SuppressWarnings("unchecked")
      DataSet<Tuple2<Long, Long>> linksInput =env.fromElements(new Tuple2<Long, Long>(1l, 2l));
     
      // assign initial rank to pages
      DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
          map(new RankAssigner((1.0d / 10)));
     
      // build adjacency list from link input
      DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
          linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());
     
      // set iterative data set
      IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(10);
     
      Configuration cfg = new Configuration();
      cfg.setString(PactCompiler.HINT_LOCAL_STRATEGY, PactCompiler.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
     
      DataSet<Tuple2<Long, Double>> newRanks = iteration
          // join pages with outgoing edges and distribute rank
          .join(adjacencyListInput).where(0).equalTo(0).withParameters(cfg)
          .flatMap(new JoinVertexWithEdgesMatch())
          // collect and sum ranks
          .groupBy(0).aggregate(SUM, 1)
          // apply dampening factor
          .map(new Dampener(0.85, 10));
     
      DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
          newRanks,
          newRanks.join(iteration).where(0).equalTo(0)
          // termination condition
          .filter(new EpsilonFilter()));
 
      finalPageRanks.print();
 
      // get the plan and compile it
      Plan p = env.createProgramPlan();
      OptimizedPlan op = compileNoStats(p);
     
      SinkPlanNode sinkPlanNode = (SinkPlanNode) op.getDataSinks().iterator().next();
      BulkIterationPlanNode iterPlanNode = (BulkIterationPlanNode) sinkPlanNode.getInput().getSource();
     
View Full Code Here

      case 1: {
        /*
         * Test non-passing mapper
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> nonPassingFlatMapDs = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new NonPassingMapper()));
       
        nonPassingFlatMapDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "\n";
      }
      case 2: {
        /*
         * Test data duplicating mapper
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> duplicatingFlatMapDs = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new DuplicatingMapper()));
       
        duplicatingFlatMapDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(1,Hi)\n" + "(1,HI)\n" +
        "(2,Hello)\n" + "(2,HELLO)\n" +
        "(3,Hello world)\n" + "(3,HELLO WORLD)\n" +
        "(4,Hello world, how are you?)\n" + "(4,HELLO WORLD, HOW ARE YOU?)\n" +
        "(5,I am fine.)\n" + "(5,I AM FINE.)\n" +
        "(6,Luke Skywalker)\n" + "(6,LUKE SKYWALKER)\n" +
        "(7,Comment#1)\n" + "(7,COMMENT#1)\n" +
        "(8,Comment#2)\n" + "(8,COMMENT#2)\n" +
        "(9,Comment#3)\n" + "(9,COMMENT#3)\n" +
        "(10,Comment#4)\n" + "(10,COMMENT#4)\n" +
        "(11,Comment#5)\n" + "(11,COMMENT#5)\n" +
        "(12,Comment#6)\n" + "(12,COMMENT#6)\n" +
        "(13,Comment#7)\n" + "(13,COMMENT#7)\n" +
        "(14,Comment#8)\n" + "(14,COMMENT#8)\n" +
        "(15,Comment#9)\n" + "(15,COMMENT#9)\n" +
        "(16,Comment#10)\n" + "(16,COMMENT#10)\n" +
        "(17,Comment#11)\n" + "(17,COMMENT#11)\n" +
        "(18,Comment#12)\n" + "(18,COMMENT#12)\n" +
        "(19,Comment#13)\n" + "(19,COMMENT#13)\n" +
        "(20,Comment#14)\n" + "(20,COMMENT#14)\n" +
        "(21,Comment#15)\n" + "(21,COMMENT#15)\n";
      }
      case 3: {
        // Mapper configured via JobConf
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        JobConf conf = new JobConf();
        conf.set("my.filterPrefix", "Hello");
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> hellos = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));
       
        hellos.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(2,Hello)\n" +
        "(3,Hello world)\n" +
        "(4,Hello world, how are you?)\n";
View Full Code Here

      switch(progId) {
      case 1: {
        /*
         * Test standard grouping
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() / 5);
                return v;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new CommentCntReducer()));
       
        commentCnts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,0)\n"+
            "(1,3)\n" +
            "(2,5)\n" +
            "(3,5)\n" +
            "(4,2)\n";
      }
      case 2: {
        /*
         * Test ungrouped Hadoop reducer
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
           
        DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));
       
        commentCnts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(42,15)\n";
      }
      case 3: {
        /*
         * Test configuration via JobConf
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        JobConf conf = new JobConf();
        conf.set("my.cntPrefix", "Hello");
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() % 5);
                return v;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> helloCnts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
                new ConfigurableCntReducer(), conf));
       
        helloCnts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,0)\n"+
            "(1,0)\n" +
            "(2,1)\n" +
View Full Code Here

TOP

Related Classes of org.apache.flink.api.java.ExecutionEnvironment

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.