Examples of ExecutionEnvironment


Examples of org.apache.flink.api.java.ExecutionEnvironment

      case 1: {
        /*
         * Test non-passing mapper
         */
   
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> nonPassingFlatMapDs = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new NonPassingMapper()));
       
        nonPassingFlatMapDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "\n";
      }
      case 2: {
        /*
         * Test data duplicating mapper
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> duplicatingFlatMapDs = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new DuplicatingMapper()));
       
        duplicatingFlatMapDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(1,Hi)\n" + "(1,HI)\n" +
        "(2,Hello)\n" + "(2,HELLO)\n" +
        "(3,Hello world)\n" + "(3,HELLO WORLD)\n" +
        "(4,Hello world, how are you?)\n" + "(4,HELLO WORLD, HOW ARE YOU?)\n" +
        "(5,I am fine.)\n" + "(5,I AM FINE.)\n" +
        "(6,Luke Skywalker)\n" + "(6,LUKE SKYWALKER)\n" +
        "(7,Comment#1)\n" + "(7,COMMENT#1)\n" +
        "(8,Comment#2)\n" + "(8,COMMENT#2)\n" +
        "(9,Comment#3)\n" + "(9,COMMENT#3)\n" +
        "(10,Comment#4)\n" + "(10,COMMENT#4)\n" +
        "(11,Comment#5)\n" + "(11,COMMENT#5)\n" +
        "(12,Comment#6)\n" + "(12,COMMENT#6)\n" +
        "(13,Comment#7)\n" + "(13,COMMENT#7)\n" +
        "(14,Comment#8)\n" + "(14,COMMENT#8)\n" +
        "(15,Comment#9)\n" + "(15,COMMENT#9)\n" +
        "(16,Comment#10)\n" + "(16,COMMENT#10)\n" +
        "(17,Comment#11)\n" + "(17,COMMENT#11)\n" +
        "(18,Comment#12)\n" + "(18,COMMENT#12)\n" +
        "(19,Comment#13)\n" + "(19,COMMENT#13)\n" +
        "(20,Comment#14)\n" + "(20,COMMENT#14)\n" +
        "(21,Comment#15)\n" + "(21,COMMENT#15)\n";
      }
      case 3: {
        // Mapper configured via JobConf
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        JobConf conf = new JobConf();
        conf.set("my.filterPrefix", "Hello");
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
        DataSet<Tuple2<IntWritable, Text>> hellos = ds.
            flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));
       
        hellos.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(2,Hello)\n" +
        "(3,Hello world)\n" +
        "(4,Hello world, how are you?)\n";
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      switch(progId) {
      case 1: {
        /*
         * Test standard grouping
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() / 5);
                return v;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new CommentCntReducer()));
       
        commentCnts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,0)\n"+
            "(1,3)\n" +
            "(2,5)\n" +
            "(3,5)\n" +
            "(4,2)\n";
      }
      case 2: {
        /*
         * Test ungrouped Hadoop reducer
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
           
        DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));
       
        commentCnts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(42,15)\n";
      }
      case 3: {
        /*
         * Test configuration via JobConf
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        JobConf conf = new JobConf();
        conf.set("my.cntPrefix", "Hello");
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() % 5);
                return v;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> helloCnts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
                new ConfigurableCntReducer(), conf));
       
        helloCnts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,0)\n"+
            "(1,0)\n" +
            "(2,1)\n" +
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      switch(progId) {
      case 1: {
        /*
         * Test standard counting with combiner
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = new IntWritable(v.f0.get() / 6);
                outT.f1 = new IntWritable(1);
                return outT;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new SumReducer()));
       
        counts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,5)\n"+
            "(1,6)\n" +
            "(2,6)\n" +
            "(3,4)\n";
      }
      case 2: {
        /*
         * Test ungrouped Hadoop reducer
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = new IntWritable(0);
                outT.f1 = v.f0;
                return outT;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new SumReducer()));
       
        sum.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,231)\n";
      }
      case 3: {
        /* Test combiner */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, IntWritable>>() {
              private static final long serialVersionUID = 1L;
              Tuple2<IntWritable,IntWritable> outT = new Tuple2<IntWritable,IntWritable>();
              @Override
              public Tuple2<IntWritable, IntWritable> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                outT.f0 = v.f0;
                outT.f1 = new IntWritable(1);
                return outT;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
                new SumReducer(), new KeyChangingReducer()));
       
        counts.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,5)\n"+
            "(1,6)\n" +
            "(2,5)\n" +
            "(3,5)\n";
      }
      case 4: {
        /*
         * Test configuration via JobConf
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        JobConf conf = new JobConf();
        conf.set("my.cntPrefix", "Hello");
       
        DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
            map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<IntWritable, Text>>() {
              private static final long serialVersionUID = 1L;
              @Override
              public Tuple2<IntWritable, Text> map(Tuple2<IntWritable, Text> v)
                  throws Exception {
                v.f0 = new IntWritable(v.f0.get() % 5);
                return v;
              }
            });
           
        DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.
            groupBy(0).
            reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
                new ConfigurableCntReducer(), conf));
       
        hellos.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return   "(0,0)\n"+
            "(1,0)\n" +
            "(2,1)\n" +
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

public class SpargelCompilerTest extends CompilerTestBase {

  @Test
  public void testSpargelCompiler() {
    try {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
       
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
       
        result.print();
      }
     
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
     
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

  public void testSpargelCompilerWithBroadcastVariable() {
    try {
      final String BC_VAR_NAME = "borat variable";
     
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(DEFAULT_PARALLELISM);
      // compose test program
      {
        DataSet<Long> bcVar = env.fromElements(1L);
       
        DataSet<Long> vertexIds = env.generateSequence(1, 2);
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
       
        DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
       
        VertexCentricIteration<Long, Long, Long, ?> vcIter = VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100);
        vcIter.addBroadcastSetForMessagingFunction(BC_VAR_NAME, bcVar);
        vcIter.addBroadcastSetForUpdateFunction(BC_VAR_NAME, bcVar);
       
        DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(vcIter);
       
        result.print();
      }
     
      Plan p = env.createProgramPlan("Spargel Connected Components");
      OptimizedPlan op = compileNoStats(p);
     
      // check the sink
      SinkPlanNode sink = op.getDataSinks().iterator().next();
      assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

  @Test
  public void testDeltaIterationNotDependingOnSolutionSet() {
    try {
      final List<Tuple2<Long, Long>> result = new ArrayList<Tuple2<Long,Long>>();
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      env.setDegreeOfParallelism(1);
     
      DataSet<Tuple2<Long, Long>> input = env.generateSequence(0, 9).map(new Duplicator<Long>());
     
      DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 5, 1);
     
      iteration.closeWith(iteration.getWorkset(), iteration.getWorkset().map(new TestMapper()))
        .output(new LocalCollectionOutputFormat<Tuple2<Long,Long>>(result));
     
      env.execute();
     
      boolean[] present = new boolean[50];
      for (Tuple2<Long, Long> t : result) {
        present[t.f0.intValue()] = true;
      }
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

    }
   
    final String inputPath = args[0];
    final String outputPath = args[1];
   
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(1);
   
    // Set up the Hadoop Input Format
    Job job = Job.getInstance();
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, job);
    TextInputFormat.addInputPath(job, new Path(inputPath));
   
    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
   
    // Tokenize the line and convert from Writable "Text" to String for better handling
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
   
    // Sum up the words
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
   
    // Convert String back to Writable "Text" for use with Hadoop Output Format
    DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
   
    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), job);
    hadoopOutputFormat.getConfiguration().set("mapreduce.output.textoutputformat.separator", " ");
    hadoopOutputFormat.getConfiguration().set("mapred.textoutputformat.separator", " "); // set the value for both, since this test
    // is being executed with both types (hadoop1 and hadoop2 profile)
    TextOutputFormat.setOutputPath(job, new Path(outputPath));
   
    // Output & Execute
    hadoopResult.output(hadoopOutputFormat);
    env.execute("Word Count");
  }
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      final int NUM_ITERATIONS = 13;
     
      final int ITERATION_DOP = 77;
     
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      DataSet<Long> bcMessaging = env.fromElements(1L);
      DataSet<Long> bcUpdate = env.fromElements(1L);
     
      DataSet<Tuple2<String, Double>> result;
     
      // ------------ construct the test program ------------------
      {
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<String, Double>("abc", 3.44));
 
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<String, String>("a", "c"));
       
       
        VertexCentricIteration<String, Double, Long, ?> vertexIteration =
            VertexCentricIteration.withPlainEdges(edges, new UpdateFunction(), new MessageFunctionNoEdgeValue(), NUM_ITERATIONS);
        vertexIteration.addBroadcastSetForMessagingFunction(BC_SET_MESSAGES_NAME, bcMessaging);
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

    resultPath = getTempFilePath("results");
  }
 
  @Override
  protected void testProgram() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES);
    DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"));
   
    DataSet<Tuple2<Long, Long>> edges = edgeString.map(new EdgeParser());
   
    DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
    DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
   
    result.writeAsCsv(resultPath, "\n", " ");
    env.execute("Spargel Connected Components");
  }
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      final int NUM_ITERATIONS = 13;
     
      final int ITERATION_DOP = 77;
     
     
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
     
      DataSet<Long> bcVar = env.fromElements(1L);
     
      DataSet<Tuple2<String, Double>> result;
     
      // ------------ construct the test program ------------------
      {
       
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<String, Double>("abc", 3.44));
 
        @SuppressWarnings("unchecked")
        DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<String, String>("a", "c"));
       
       
        VertexCentricIteration<String, Double, Long, ?> vertexIteration =
            VertexCentricIteration.withPlainEdges(edges, new UpdateFunction(), new MessageFunctionNoEdgeValue(), NUM_ITERATIONS);
        vertexIteration.addBroadcastSetForMessagingFunction(BC_SET_MESSAGES_NAME, bcVar);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.