Package com.cloudera.oryx.lambda.speed

Source Code of com.cloudera.oryx.lambda.speed.SpeedLayer

/*
* Copyright (c) 2014, Cloudera, Inc. and Intel Corp. All Rights Reserved.
*
* Cloudera, Inc. licenses this file to you under the Apache License,
* Version 2.0 (the "License"). You may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for
* the specific language governing permissions and limitations under the
* License.
*/

package com.cloudera.oryx.lambda.speed;

import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterators;
import com.typesafe.config.Config;
import kafka.consumer.Consumer$;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
import kafka.serializer.Decoder;
import kafka.serializer.StringDecoder;
import kafka.utils.VerifiableProperties;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.api.java.JavaStreamingContextFactory;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.oryx.common.lang.ClassUtils;
import com.cloudera.oryx.common.lang.LoggingRunnable;
import com.cloudera.oryx.lambda.KeyMessage;

/**
* Main entry point for Oryx Speed Layer.
*
* @param <K> type of key read from input queue
* @param <M> type of message read from input queue
* @param <U> type of update message read/written
*/
public final class SpeedLayer<K,M,U> implements Closeable {

  private static final Logger log = LoggerFactory.getLogger(SpeedLayer.class);

  private final Config config;
  private final String streamingMaster;
  private final String inputQueueLockMaster;
  private final String messageTopic;
  private final String updateBroker;
  private final String updateTopic;
  private final String updateQueueLockMaster;
  private final String modelManagerClassName;
  private final String checkpointDirString;
  private final int generationIntervalSec;
  private final int blockIntervalSec;
  private final Class<? extends Decoder<?>> keyDecoderClass;
  private final Class<? extends Decoder<?>> messageDecoderClass;
  private final Class<? extends Decoder<U>> updateDecoderClass;
  private JavaStreamingContext streamingContext;
  private ConsumerConnector consumer;
  private SpeedModelManager<K,M,U> modelManager;
  private final Class<K> keyClass;
  private final Class<M> messageClass;

  @SuppressWarnings("unchecked")
  public SpeedLayer(Config config) {
    Preconditions.checkNotNull(config);
    this.config = config;
    this.streamingMaster = config.getString("speed.streaming.master");
    this.inputQueueLockMaster = config.getString("input-queue.lock.master");
    this.messageTopic = config.getString("input-queue.message.topic");
    this.updateBroker = config.getString("update-queue.broker");
    this.updateTopic = config.getString("update-queue.message.topic");
    this.updateQueueLockMaster = config.getString("update-queue.lock.master");
    this.modelManagerClassName = config.getString("speed.model-manager-class");
    this.checkpointDirString = config.hasPath("speed.storage.checkpoint-dir") ?
        config.getString("speed.storage.checkpoint-dir") :
        null;
    this.generationIntervalSec = config.getInt("speed.generation-interval-sec");
    this.blockIntervalSec = config.getInt("speed.block-interval-sec");
    this.keyDecoderClass = (Class<? extends Decoder<?>>) ClassUtils.loadClass(
        config.getString("input-queue.message.key-decoder-class"), Decoder.class);
    this.messageDecoderClass = (Class<? extends Decoder<?>>) ClassUtils.loadClass(
        config.getString("input-queue.message.message-decoder-class"), Decoder.class);
    this.updateDecoderClass = (Class<? extends Decoder<U>>) ClassUtils.loadClass(
        config.getString("update-queue.message.decoder-class"), Decoder.class);
    this.keyClass = ClassUtils.loadClass(config.getString("input-queue.message.key-class"));
    this.messageClass = ClassUtils.loadClass(config.getString("input-queue.message.message-class"));

    Preconditions.checkArgument(this.generationIntervalSec > 0);
    Preconditions.checkArgument(this.blockIntervalSec > 0);
  }

  public synchronized void start() {
    log.info("Starting SparkContext for master {}, interval {} seconds",
             streamingMaster, generationIntervalSec);

    long blockIntervalMS = TimeUnit.MILLISECONDS.convert(blockIntervalSec, TimeUnit.SECONDS);

    SparkConf sparkConf = new SparkConf();
    sparkConf.setIfMissing("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    sparkConf.setIfMissing("spark.streaming.blockInterval", Long.toString(blockIntervalMS));
    sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec));
    sparkConf.setIfMissing("spark.logConf", "true");
    sparkConf.setMaster(streamingMaster);
    sparkConf.setAppName("OryxSpeedLayer");
    final long batchDurationMS =
        TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS);
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);

    JavaStreamingContextFactory streamingContextFactory = new JavaStreamingContextFactory() {
      @Override
      public JavaStreamingContext create() {
        return new JavaStreamingContext(sparkContext, new Duration(batchDurationMS));
      }
    };

    if (checkpointDirString == null) {
      log.info("Not using a streaming checkpoint dir");
      streamingContext = streamingContextFactory.create();
    } else {
      log.info("Using streaming checkpoint dir {}", checkpointDirString);
      streamingContext = JavaStreamingContext.getOrCreate(
          checkpointDirString, sparkContext.hadoopConfiguration(), streamingContextFactory, false);
      streamingContext.checkpoint(checkpointDirString);
    }

    log.info("Creating message queue stream");

    JavaPairDStream<K,M> dStream = buildDStream();
    if (checkpointDirString != null) {
      dStream.checkpoint(new Duration(batchDurationMS));
    }

    Properties consumerProps = new Properties();
    consumerProps.setProperty("group.id", "OryxGroup-SpeedLayer-" + System.currentTimeMillis());
    consumerProps.setProperty("zookeeper.connect", updateQueueLockMaster);
    ConsumerConfig consumerConfig = new ConsumerConfig(consumerProps);
    consumer = Consumer$.MODULE$.createJavaConsumerConnector(consumerConfig);
    KafkaStream<String,U> stream =
        consumer.createMessageStreams(Collections.singletonMap(updateTopic, 1),
                                      new StringDecoder(null),
                                      loadDecoderInstance())
            .get(updateTopic).get(0);
    final Iterator<KeyMessage<String,U>> transformed = Iterators.transform(stream.iterator(),
        new Function<MessageAndMetadata<String,U>, KeyMessage<String,U>>() {
          @Override
          public KeyMessage<String,U> apply(MessageAndMetadata<String,U> input) {
            return new KeyMessage<>(input.key(), input.message());
          }
        });

    modelManager = loadManagerInstance();
    new Thread(new LoggingRunnable() {
      @Override
      public void doRun() throws IOException {
        modelManager.consume(transformed);
      }
    }).start();

    dStream.foreachRDD(new SpeedLayerUpdate<>(modelManager, updateBroker, updateTopic));

    streamingContext.start();
  }

  public void await() {
    Preconditions.checkState(streamingContext != null);
    log.info("Waiting for streaming...");
    streamingContext.awaitTermination();
  }

  @Override
  public synchronized void close() {
    if (modelManager != null) {
      log.info("Shutting down model manager");
      modelManager.close();
      modelManager = null;
    }
    if (consumer != null) {
      log.info("Shutting down consumer");
      consumer.shutdown();
      consumer = null;
    }
    if (streamingContext != null) {
      log.info("Shutting down streaming context");
      streamingContext.stop(true, true);
      streamingContext = null;
    }
  }

  private JavaPairDStream<K,M> buildDStream() {
    Map<String,String> kafkaParams = new HashMap<>();
    kafkaParams.put("zookeeper.connect", inputQueueLockMaster);
    kafkaParams.put("group.id", "OryxGroup-SpeedLayer-" + System.currentTimeMillis());
    return KafkaUtils.createStream(
        streamingContext,
        keyClass,
        messageClass,
        keyDecoderClass,
        messageDecoderClass,
        kafkaParams,
        Collections.singletonMap(messageTopic, 1),
        StorageLevel.MEMORY_AND_DISK_2());
  }

  private SpeedModelManager<K,M,U> loadManagerInstance() {
    Class<?> managerClass = ClassUtils.loadClass(modelManagerClassName);

    if (SpeedModelManager.class.isAssignableFrom(managerClass)) {

      try {
        @SuppressWarnings("unchecked")
        SpeedModelManager<K,M,U> instance = ClassUtils.loadInstanceOf(
            modelManagerClassName,
            SpeedModelManager.class,
            new Class<?>[] { Config.class },
            new Object[] { config });
        return instance;

      } catch (IllegalArgumentException iae) {
        @SuppressWarnings("unchecked")
        SpeedModelManager<K,M,U> instance =
            ClassUtils.loadInstanceOf(modelManagerClassName, SpeedModelManager.class);
        return instance;
      }

    } else if (ScalaSpeedModelManager.class.isAssignableFrom(managerClass)) {

      try {
        @SuppressWarnings("unchecked")
        ScalaSpeedModelManager<K,M,U> instance = ClassUtils.loadInstanceOf(
            modelManagerClassName,
            ScalaSpeedModelManager.class,
            new Class<?>[] { Config.class },
            new Object[] { config });
        return new ScalaSpeedModelManagerAdapter<>(instance);

      } catch (IllegalArgumentException iae) {
        @SuppressWarnings("unchecked")
        ScalaSpeedModelManager<K,M,U> instance =
            ClassUtils.loadInstanceOf(modelManagerClassName, ScalaSpeedModelManager.class);
        return new ScalaSpeedModelManagerAdapter<>(instance);
      }

    } else {
      throw new IllegalArgumentException("Bad manager class: " + managerClass);
    }
  }

  private Decoder<U> loadDecoderInstance() {
    try {
      return ClassUtils.loadInstanceOf(updateDecoderClass);
    } catch (IllegalArgumentException iae) {
      // special case the Kafka decoder, which wants an optional nullable parameter unfortunately
      return ClassUtils.loadInstanceOf(updateDecoderClass.getName(),
                                       updateDecoderClass,
                                       new Class<?>[] { VerifiableProperties.class },
                                       new Object[] { null });
    }
  }

}
TOP

Related Classes of com.cloudera.oryx.lambda.speed.SpeedLayer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.