Source Code of co.cask.cdap.internal.app.runtime.spark.SparkProgramRunner

/*
 * Copyright © 2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */


package co.cask.cdap.internal.app.runtime.spark;


import co.cask.cdap.api.spark.Spark;
import co.cask.cdap.api.spark.SparkSpecification;
import co.cask.cdap.app.ApplicationSpecification;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.runtime.Arguments;
import co.cask.cdap.app.runtime.ProgramController;
import co.cask.cdap.app.runtime.ProgramOptions;
import co.cask.cdap.app.runtime.ProgramRunner;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.lang.InstantiatorFactory;
import co.cask.cdap.common.logging.LoggingContextAccessor;
import co.cask.cdap.common.metrics.MetricsCollectionService;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.internal.app.runtime.ProgramOptionConstants;
import co.cask.cdap.proto.ProgramType;
import co.cask.tephra.TransactionSystemClient;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.reflect.TypeToken;
import com.google.common.util.concurrent.Service;
import com.google.inject.Inject;
import org.apache.hadoop.conf.Configuration;
import org.apache.twill.api.RunId;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.filesystem.LocationFactory;
import org.apache.twill.internal.RunIds;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * Runs {@link Spark} programs
 */
public class SparkProgramRunner implements ProgramRunner {


  private static final Logger LOG = LoggerFactory.getLogger(SparkProgramRunner.class);


  private final DatasetFramework datasetFramework;
  private final Configuration hConf;
  private final CConfiguration cConf;
  private final MetricsCollectionService metricsCollectionService;
  private final TransactionSystemClient txSystemClient;
  private final LocationFactory locationFactory;
  private final DiscoveryServiceClient discoveryServiceClient;


  @Inject
  public SparkProgramRunner(DatasetFramework datasetFramework, CConfiguration cConf,
                            MetricsCollectionService metricsCollectionService, Configuration hConf,
                            TransactionSystemClient txSystemClient, LocationFactory locationFactory,
                            DiscoveryServiceClient discoveryServiceClient) {
    this.hConf = hConf;
    this.datasetFramework = datasetFramework;
    this.cConf = cConf;
    this.metricsCollectionService = metricsCollectionService;
    this.locationFactory = locationFactory;
    this.txSystemClient = txSystemClient;
    this.discoveryServiceClient = discoveryServiceClient;
  }


  @Override
  public ProgramController run(Program program, ProgramOptions options) {
    // Extract and verify parameters
    final ApplicationSpecification appSpec = program.getSpecification();
    Preconditions.checkNotNull(appSpec, "Missing application specification.");


    ProgramType processorType = program.getType();
    Preconditions.checkNotNull(processorType, "Missing processor type.");
    Preconditions.checkArgument(processorType == ProgramType.SPARK, "Only Spark process type is supported.");


    final SparkSpecification spec = appSpec.getSpark().get(program.getName());
    Preconditions.checkNotNull(spec, "Missing SparkSpecification for %s", program.getName());


    // Optionally get runId. If the spark started by other program (e.g. Workflow), it inherit the runId.
    Arguments arguments = options.getArguments();
    RunId runId = arguments.hasOption(ProgramOptionConstants.RUN_ID) ? RunIds.fromString(arguments.getOption
      (ProgramOptionConstants.RUN_ID)) : RunIds.generate();


    long logicalStartTime = arguments.hasOption(ProgramOptionConstants.LOGICAL_START_TIME)
      ? Long.parseLong(arguments.getOption(ProgramOptionConstants.LOGICAL_START_TIME)) : System.currentTimeMillis();


    String workflowBatch = arguments.getOption(ProgramOptionConstants.WORKFLOW_BATCH);


    Spark spark;
    try {
      spark = new InstantiatorFactory(false).get(TypeToken.of(program.<Spark>getMainClass())).create();
    } catch (Exception e) {
      LOG.error("Failed to instantiate Spark class for {}", spec.getClassName(), e);
      throw Throwables.propagate(e);
    }


    final BasicSparkContext context = new BasicSparkContext(program, runId, options.getUserArguments(),
                                                            program.getSpecification().getDatasets().keySet(), spec,
                                                            logicalStartTime, workflowBatch,
                                                            metricsCollectionService, datasetFramework, cConf,
                                                            discoveryServiceClient);


    LoggingContextAccessor.setLoggingContext(context.getLoggingContext());


    Service sparkRuntimeService = new SparkRuntimeService(cConf, hConf, spark, spec, context,
                                                          program.getJarLocation(), locationFactory,
                                                          txSystemClient);
    ProgramController controller = new SparkProgramController(sparkRuntimeService, context);


    LOG.info("Starting Spark Job: {}", context.toString());
    sparkRuntimeService.start();
    return controller;
  }
}
Source Code of co.cask.cdap.internal.app.runtime.spark.SparkProgramRunner

Related Classes of co.cask.cdap.internal.app.runtime.spark.SparkProgramRunner