Examples of ExecutionEnvironment


Examples of org.apache.flink.api.java.ExecutionEnvironment

    }
   
    final String inputPath = args[0];
    final String outputPath = args[1];
   
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    // Set up the Hadoop Input Format
    HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
    TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
   
    // Create a Flink job with it
    DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
   
    DataSet<Tuple2<Text, LongWritable>> words =
        text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
          .groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
   
    // Set up Hadoop Output Format
    HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat =
        new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
    hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
    TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
   
    // Output & Execute
    words.output(hadoopOutputFormat).setParallelism(1);
    env.execute("Hadoop Compat WordCount");
  }
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

    final String centersPath = args[1];
    final String outputPath = args[2];
    final int numIterations = Integer.parseInt(args[3]);


    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setDegreeOfParallelism(4);

    // get input data
    DataSet<Point> points = env.readCsvFile(pointsPath)
        .fieldDelimiter('|')
        .includeFields(true, true)
        .types(Double.class, Double.class)
        .map(new TuplePointConverter());

    DataSet<Centroid> centroids = env.readCsvFile(centersPath)
        .fieldDelimiter('|')
        .includeFields(true, true, true)
        .types(Integer.class, Double.class, Double.class)
        .map(new TupleCentroidConverter());

    // set number of bulk iterations for KMeans algorithm
    IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);

    DataSet<Centroid> newCentroids = points
      // compute closest centroid for each point
      .map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
      // count and sum point coordinates for each centroid
      .map(new CountAppender())
      // !test if key expressions are working!
      .groupBy("field0").reduce(new CentroidAccumulator())
      // compute new centroids from point counts and coordinate sums
      .map(new CentroidAverager());

    // feed new centroids back into next iteration
    DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

    DataSet<Tuple2<Integer, Point>> clusteredPoints = points
        // assign points to final clusters
        .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

    // emit result
    clusteredPoints.writeAsCsv(outputPath, "\n", " ");

    return env.createProgramPlan();
  }
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      switch(progId) {
      case 1: {
        /*
         * Union of 2 Same Data Sets
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> unionDs = ds.union(CollectionDataSets.get3TupleDataSet(env));
       
        unionDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING;
      }
      case 2: {
        /*
         * Union of 5 same Data Sets, with multiple unions
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> unionDs = ds.union(CollectionDataSets.get3TupleDataSet(env))
            .union(CollectionDataSets.get3TupleDataSet(env))
            .union(CollectionDataSets.get3TupleDataSet(env))
            .union(CollectionDataSets.get3TupleDataSet(env));
       
        unionDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING;
      }
      case 3: {
        /*
         * Test on union with empty dataset
         */
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        // Don't know how to make an empty result in an other way than filtering it
        DataSet<Tuple3<Integer, Long, String>> empty = CollectionDataSets.get3TupleDataSet(env).
            filter(new RichFilterFunction<Tuple3<Integer,Long,String>>() {
              private static final long serialVersionUID = 1L;

              @Override
              public boolean filter(Tuple3<Integer, Long, String> value) throws Exception {
                return false;
              }
            });
       
        DataSet<Tuple3<Integer, Long, String>> unionDs = CollectionDataSets.get3TupleDataSet(env)
          .union(empty);
     
        unionDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return FULL_TUPLE_3_STRING;       
      }
      default:
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

    compareResultsByLinesInMemory(WordCountData.COUNTS, resultPath);
  }
 
  @Override
  protected void testProgram() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<String> text = env.readTextFile(textPath);

    DataSet<WC> counts = text
        .flatMap(new Tokenizer())
        .groupBy("word")
        .reduce(new ReduceFunction<WC>() {
          private static final long serialVersionUID = 1L;

          public WC reduce(WC value1, WC value2) {
            return new WC(value1.word, value1.count + value2.count);
          }
        });

    counts.writeAsText(resultPath);

    env.execute("WordCount with custom data types example");
  }
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

      case 1: {
        /*
         * Full Aggregate
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple2<Integer, Long>> aggregateDs = ds
            .aggregate(Aggregations.SUM, 0)
            .and(Aggregations.MAX, 1)
            .project(0, 1).types(Integer.class, Long.class);
       
        aggregateDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "231,6\n";
      }
      case 2: {
        /*
         * Grouped Aggregate
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
            .aggregate(Aggregations.SUM, 0)
            .project(1, 0).types(Long.class, Integer.class);
       
        aggregateDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1\n" +
        "2,5\n" +
        "3,15\n" +
        "4,34\n" +
        "5,65\n" +
        "6,111\n";
      }
      case 3: {
        /*
         * Nested Aggregate
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
        DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
            .aggregate(Aggregations.MIN, 0)
            .aggregate(Aggregations.MIN, 0)
            .project(0).types(Integer.class);
       
        aggregateDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1\n";
      }
      default:
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

       
        /*
         * check correctness of distinct on tuples with key field selector
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> distinctDs = ds.union(ds).distinct(0, 1, 2);
       
        distinctDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,Hi\n" +
            "2,2,Hello\n" +
            "3,2,Hello world\n";
      }
      case 2: {
       
        /*
         * check correctness of distinct on tuples with key field selector with not all fields selected
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env);
        DataSet<Tuple1<Integer>> distinctDs = ds.union(ds).distinct(0).project(0).types(Integer.class);
       
        distinctDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1\n" +
            "2\n";
      }
      case 3: {
       
        /*
         * check correctness of distinct on tuples with key extractor
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env);
        DataSet<Tuple1<Integer>> reduceDs = ds.union(ds)
            .distinct(new KeySelector<Tuple5<Integer, Long,  Integer, String, Long>, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(Tuple5<Integer, Long,  Integer, String, Long> in) {
                    return in.f0;
                  }
                }).project(0).types(Integer.class);
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1\n" +
            "2\n";
               
      }
      case 4: {
       
        /*
         * check correctness of distinct on custom type with type extractor
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
        DataSet<Tuple1<Integer>> reduceDs = ds
            .distinct(new KeySelector<CustomType, Integer>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Integer getKey(CustomType in) {
                    return in.myInt;
                  }
                })
            .map(new RichMapFunction<CustomType, Tuple1<Integer>>() {
              @Override
              public Tuple1<Integer> map(CustomType value) throws Exception {
                return new Tuple1<Integer>(value.myInt);
              }
            });
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1\n" +
            "2\n" +
            "3\n" +
            "4\n" +
            "5\n" +
            "6\n";
       
      }
      case 5: {
       
        /*
         * check correctness of distinct on tuples
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
        DataSet<Tuple3<Integer, Long, String>> distinctDs = ds.union(ds).distinct();
       
        distinctDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1,Hi\n" +
            "2,2,Hello\n" +
            "3,2,Hello world\n";
      }
      case 6: {
       
        /*
         * check correctness of distinct on custom type with tuple-returning type extractor
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
        DataSet<Tuple2<Integer, Long>> reduceDs = ds
            .distinct(new KeySelector<Tuple5<Integer, Long, Integer, String, Long>, Tuple2<Integer, Long>>() {
                  private static final long serialVersionUID = 1L;
                  @Override
                  public Tuple2<Integer,Long> getKey(Tuple5<Integer, Long, Integer, String, Long> t) {
                    return new Tuple2<Integer, Long>(t.f0, t.f4);
                  }
                })
            .project(0,4).types(Integer.class, Long.class);
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1,1\n" +
            "2,1\n" +
            "2,2\n" +
            "3,2\n" +
            "3,3\n" +
            "4,1\n" +
            "4,2\n" +
            "5,1\n" +
            "5,2\n" +
            "5,3\n";
      }
      case 7: {
       
        /*
         * check correctness of distinct on tuples with field expressions
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env);
        DataSet<Tuple1<Integer>> reduceDs = ds.union(ds)
            .distinct("f0").project(0).types(Integer.class);
       
        reduceDs.writeAsCsv(resultPath);
        env.execute();
       
        // return expected result
        return "1\n" +
            "2\n";
               
      }
      case 8: {
       
        /*
         * check correctness of distinct on Pojos
         */
       
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
       
        DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env);
        DataSet<Integer> reduceDs = ds.distinct("nestedPojo.longNumber").map(new MapFunction<CollectionDataSets.POJO, Integer>() {
          @Override
          public Integer map(POJO value) throws Exception {
            return (int) value.nestedPojo.longNumber;
          }
        });
       
        reduceDs.writeAsText(resultPath);
        env.execute();
       
        // return expected result
        return "10000\n20000\n30000\n";
               
      }
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

  private List<Tuple2<Long, Long>> result = new ArrayList<Tuple2<Long, Long>>();
 
  @Override
  protected void testProgram() throws Exception {
   
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
   
    DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20).map(new Dupl());
       
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 20, 0);
    iter.closeWith(iter.getWorkset(), iter.getWorkset())
      .output(new LocalCollectionOutputFormat<Tuple2<Long, Long>>(result));
   
    env.execute();
  }
View Full Code Here

Examples of org.apache.flink.api.java.ExecutionEnvironment

  }


  @Override
  protected void testProgram() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<String> text = env.fromElements(WordCountData.TEXT);
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer());
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected));
    env.execute("Word Count Collection");
  }
View Full Code Here

Examples of org.hibernate.junit.functional.ExecutionEnvironment

   @Override
   protected void prepareTest() throws Exception
   {
      log.info( "Building second node locally managed execution env" );
      secondNodeEnvironment = new ExecutionEnvironment( new SecondNodeSettings() );
      secondNodeEnvironment.initialize();
     
      super.prepareTest();
   }
View Full Code Here

Examples of org.hibernate.testing.junit.functional.ExecutionEnvironment

   }

   @Override
   protected void prepareTest() throws Exception {
      log.info("Building second node locally managed execution env");
      secondNodeEnvironment = new ExecutionEnvironment(new SecondNodeSettings());
      secondNodeEnvironment.initialize();
      super.prepareTest();
   }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.