Package org.apache.spark.streaming.api.java

Examples of org.apache.spark.streaming.api.java.JavaStreamingContext


    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context
    JavaStreamingContext ssc = new JavaStreamingContext(args[0], "QueueStream", new Duration(1000),
            System.getenv("SPARK_HOME"), JavaStreamingContext.jarOfClass(JavaQueueStream.class));

    // Create the queue through which RDDs can be pushed to
    // a QueueInputDStream
    Queue<JavaRDD<Integer>> rddQueue = new LinkedList<JavaRDD<Integer>>();

    // Create and push some RDDs into the queue
    List<Integer> list = Lists.newArrayList();
    for (int i = 0; i < 1000; i++) {
      list.add(i);
    }

    for (int i = 0; i < 30; i++) {
      rddQueue.add(ssc.sparkContext().parallelize(list));
    }

    // Create the QueueInputDStream and use it do some processing
    JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue);
    JavaPairDStream<Integer, Integer> mappedStream = inputStream.mapToPair(
        new PairFunction<Integer, Integer, Integer>() {
          @Override
          public Tuple2<Integer, Integer> call(Integer i) {
            return new Tuple2<Integer, Integer>(i % 10, 1);
          }
        });
    JavaPairDStream<Integer, Integer> reducedStream = mappedStream.reduceByKey(
      new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
    });

    reducedStream.print();
    ssc.start();
    ssc.awaitTermination();
  }
View Full Code Here


    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    JavaStreamingContext jssc = new JavaStreamingContext(args[0], "KafkaWordCount",
            new Duration(2000), System.getenv("SPARK_HOME"),
            JavaStreamingContext.jarOfClass(JavaKafkaWordCount.class));

    int numThreads = Integer.parseInt(args[4]);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[3].split(",");
    for (String topic: topics) {
      topicMap.put(topic, numThreads);
    }

    JavaPairDStream<String, String> messages = KafkaUtils.createStream(jssc, args[1], args[2], topicMap);

    JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
      @Override
      public String call(Tuple2<String, String> tuple2) {
        return tuple2._2();
      }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
      @Override
      public Iterable<String> call(String x) {
        return Lists.newArrayList(SPACE.split(x));
      }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
      new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
          return new Tuple2<String, Integer>(s, 1);
        }
      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
      });

    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
  }
View Full Code Here

    }

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    JavaStreamingContext ssc = new JavaStreamingContext(args[0], "JavaNetworkWordCount",
            new Duration(1000), System.getenv("SPARK_HOME"),
            JavaStreamingContext.jarOfClass(JavaNetworkWordCount.class));

    // Create a NetworkInputDStream on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    JavaDStream<String> lines = ssc.socketTextStream(args[1], Integer.parseInt(args[2]));
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
      @Override
      public Iterable<String> call(String x) {
        return Lists.newArrayList(SPACE.split(x));
      }
    });
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
      new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
          return new Tuple2<String, Integer>(s, 1);
        }
      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
      });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
  }
View Full Code Here

    @Before
    public void setUp() {
        System.clearProperty("spark.driver.port");
        System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
        ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
        ssc.checkpoint("checkpoint");
    }
View Full Code Here

    assertOrderInvariantEquals(expectedInitial, initialResult);
    Thread.sleep(1000);
    ssc.stop();

    ssc = new JavaStreamingContext(tempDir.getAbsolutePath());
    // Tweak to take into consideration that the last batch before failure
    // will be re-processed after recovery
    List<List<Integer>> finalResult = JavaCheckpointTestUtils.runStreams(ssc, 2, 3);
    assertOrderInvariantEquals(expectedFinal, finalResult.subList(1, 3));
  }
View Full Code Here

    server.start();

    final AtomicLong dataCounter = new AtomicLong(0);

    try {
      JavaStreamingContext ssc = new JavaStreamingContext("local[2]", "test", new Duration(200));
      JavaReceiverInputDStream<String> input =
        ssc.receiverStream(new JavaSocketReceiver("localhost", server.port()));
      JavaDStream<String> mapped = input.map(new Function<String, String>() {
        @Override
        public String call(String v1) throws Exception {
          return v1 + ".";
        }
      });
      mapped.foreachRDD(new Function<JavaRDD<String>, Void>() {
        @Override
        public Void call(JavaRDD<String> rdd) throws Exception {
        long count = rdd.count();
        dataCounter.addAndGet(count);
        return null;
        }
      });

      ssc.start();
      long startTime = System.currentTimeMillis();
      long timeout = 10000;

      Thread.sleep(200);
      for (int i = 0; i < 6; i++) {
        server.send("" + i + "\n"); // \n to make sure these are separate lines
        Thread.sleep(100);
      }
      while (dataCounter.get() == 0 && System.currentTimeMillis() - startTime < timeout) {
        Thread.sleep(100);
      }
      ssc.stop();
      assertTrue(dataCounter.get() > 0);
    } finally {
      server.stop();
    }
  }
View Full Code Here

    protected transient JavaStreamingContext ssc;

    @Before
    public void setUp() {
        System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
        ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
        ssc.checkpoint("checkpoint");
    }
View Full Code Here

        /* Kinesis checkpoint interval.  Same as batchInterval for this example. */
        Duration checkpointInterval = batchInterval;

        /* Setup the StreamingContext */
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval);

        /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */
        List<JavaDStream<byte[]>> streamsList = new ArrayList<JavaDStream<byte[]>>(numStreams);
        for (int i = 0; i < numStreams; i++) {
          streamsList.add(
            KinesisUtils.createStream(jssc, streamName, endpointUrl, checkpointInterval,
            InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2())
          );
        }

        /* Union all the streams if there is more than 1 stream */
        JavaDStream<byte[]> unionStreams;
        if (streamsList.size() > 1) {
            unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size()));
        } else {
            /* Otherwise, just use the 1 stream */
            unionStreams = streamsList.get(0);
        }

        /*
         * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection.
         * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR.
         */
        JavaDStream<String> words = unionStreams.flatMap(new FlatMapFunction<byte[], String>() {
                @Override
                public Iterable<String> call(byte[] line) {
                    return Lists.newArrayList(WORD_SEPARATOR.split(new String(line)));
                }
            });

        /* Map each word to a (word, 1) tuple, then reduce/aggregate by word. */
        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
            new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<String, Integer>(s, 1);
                }
            }).reduceByKey(new Function2<Integer, Integer, Integer>() {
                @Override
                public Integer call(Integer i1, Integer i2) {
                  return i1 + i2;
                }
            });

        /* Print the first 10 wordCounts */
        wordCounts.print();

        /* Start the streaming context and await termination */
        jssc.start();
        jssc.awaitTermination();
    }
View Full Code Here

    StreamingExamples.setStreamingLogLevels();

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));

    // Create a input stream with the custom receiver on target ip:port and count the
    // words in input stream of \n delimited text (eg. generated by 'nc')
    JavaReceiverInputDStream<String> lines = ssc.receiverStream(
      new JavaCustomReceiver(args[0], Integer.parseInt(args[1])));
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
      @Override
      public Iterable<String> call(String x) {
        return Lists.newArrayList(SPACE.split(x));
      }
    });
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
      new PairFunction<String, String, Integer>() {
        @Override public Tuple2<String, Integer> call(String s) {
          return new Tuple2<String, Integer>(s, 1);
        }
      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
      });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
  }
View Full Code Here

    }

    StreamingExamples.setStreamingLogLevels();
    SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
    // Create the context with a 1 second batch size
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));

    int numThreads = Integer.parseInt(args[3]);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[2].split(",");
    for (String topic: topics) {
      topicMap.put(topic, numThreads);
    }

    JavaPairReceiverInputDStream<String, String> messages =
            KafkaUtils.createStream(jssc, args[0], args[1], topicMap);

    JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
      @Override
      public String call(Tuple2<String, String> tuple2) {
        return tuple2._2();
      }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
      @Override
      public Iterable<String> call(String x) {
        return Lists.newArrayList(SPACE.split(x));
      }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
      new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
          return new Tuple2<String, Integer>(s, 1);
        }
      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
      });

    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
  }
View Full Code Here

TOP

Related Classes of org.apache.spark.streaming.api.java.JavaStreamingContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.