Package org.apache.flink.streaming.examples.wordcount

Source Code of org.apache.flink.streaming.examples.wordcount.WordCount

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.flink.streaming.examples.wordcount;

import java.util.StringTokenizer;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.examples.java.wordcount.util.WordCountData;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

/**
* Implements the "WordCount" program that computes a simple word occurrence
* histogram over text files in a streaming fashion.
*
* <p>
* The input is a plain text file with lines separated by newline characters.
*
* <p>
* Usage: <code>WordCount &lt;text path&gt; &lt;result path&gt;</code><br>
* If no parameters are provided, the program is run with default data from
* {@link WordCountData}.
*
* <p>
* This example shows how to:
* <ul>
* <li>write a simple Flink Streaming program.
* <li>use Tuple data types.
* <li>write and use user-defined functions.
* </ul>
*
*/
public class WordCount {

  // *************************************************************************
  // PROGRAM
  // *************************************************************************

  public static void main(String[] args) throws Exception {

    if (!parseParameters(args)) {
      return;
    }

    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // get input data
    DataStream<String> text = getTextDataStream(env);

    DataStream<Tuple2<String, Integer>> counts =
        // split up the lines in pairs (2-tuples) containing: (word,1)
        text.flatMap(new Tokenizer())
        // group by the tuple field "0" and sum up tuple field "1"
        .groupBy(0)
        .sum(1);

    // emit result
    if (fileOutput) {
      counts.writeAsText(outputPath, 1);
    } else {
      counts.print();
    }

    // execute program
    env.execute("Streaming WordCount");
  }

  // *************************************************************************
  // USER FUNCTIONS
  // *************************************************************************

  /**
   * Implements the string tokenizer that splits sentences into words as a
   * user-defined FlatMapFunction. The function takes a line (String) and
   * splits it into multiple pairs in the form of "(word,1)" (Tuple2<String,
   * Integer>).
   */
  public static final class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {
    private static final long serialVersionUID = 1L;

    @Override
    public void flatMap(String inTuple, Collector<Tuple2<String, Integer>> out)
        throws Exception {
      // tokenize the line
      StringTokenizer tokenizer = new StringTokenizer(inTuple);

      // emit the pairs
      while (tokenizer.hasMoreTokens()) {
        out.collect(new Tuple2<String, Integer>(tokenizer.nextToken(), 1));
      }
    }
  }

  // *************************************************************************
  // UTIL METHODS
  // *************************************************************************

  private static boolean fileOutput = false;
  private static String textPath;
  private static String outputPath;

  private static boolean parseParameters(String[] args) {

    if (args.length > 0) {
      // parse input arguments
      fileOutput = true;
      if (args.length == 2) {
        textPath = args[0];
        outputPath = args[1];
      } else {
        System.err.println("Usage: WordCount <text path> <result path>");
        return false;
      }
    } else {
      System.out.println("Executing WordCount example with built-in default data.");
      System.out.println("  Provide parameters to read input data from a file.");
      System.out.println("  Usage: WordCount <text path> <result path>");
    }
    return true;
  }

  private static DataStream<String> getTextDataStream(StreamExecutionEnvironment env) {
    if (fileOutput) {
      // read the text file from given input path
      return env.readTextFile(textPath);
    } else {
      // get default test text data
      return env.fromElements(WordCountData.WORDS);
    }
  }
}
TOP

Related Classes of org.apache.flink.streaming.examples.wordcount.WordCount

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.