Source Code of co.cask.cdap.internal.app.runtime.spark.AbstractSparkContextBuilder

/*
 * Copyright © 2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */


package co.cask.cdap.internal.app.runtime.spark;


import co.cask.cdap.api.spark.SparkSpecification;
import co.cask.cdap.app.ApplicationSpecification;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.program.Programs;
import co.cask.cdap.app.runtime.Arguments;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.metrics.MetricsCollectionService;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.internal.app.runtime.batch.BasicMapReduceContext;
import co.cask.cdap.internal.app.runtime.spark.inmemory.InMemorySparkContextBuilder;
import co.cask.tephra.Transaction;
import co.cask.tephra.TransactionAware;
import com.google.common.base.Throwables;
import com.google.inject.Injector;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.filesystem.LocationFactory;
import org.apache.twill.internal.RunIds;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


import java.io.IOException;
import java.net.URI;


/**
 * Builds the {@link BasicSparkContext}.
 * Subclasses must override {@link #prepare()} method by providing Guice injector configured for running and starting
 * services specific to the environment. To release those resources subclass must override {@link #finish()}
 */
public abstract class AbstractSparkContextBuilder {


  private static final Logger LOG = LoggerFactory.getLogger(AbstractSparkContextBuilder.class);


  /**
   * Build the instance of {@link BasicSparkContext}.
   *
   * @param runId            program run id
   * @param logicalStartTime The logical start time of the job.
   * @param workflowBatch    Tells whether the batch job is started by workflow.
   * @param runtimeArguments the runtime arguments
   * @param tx               transaction to use
   * @param classLoader      classloader to use
   * @param programLocation  program location
   * @return instance of {@link BasicMapReduceContext}
   */
  public BasicSparkContext build(String runId, long logicalStartTime, String workflowBatch, Arguments runtimeArguments,
                                 Transaction tx, ClassLoader classLoader, URI programLocation) {
    Injector injector = prepare();


    // Initializing Program
    LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
    Program program;
    try {
      program = Programs.create(locationFactory.create(programLocation), classLoader);
      //TODO: This should be changed when we support Spark in Workflow
    } catch (IOException e) {
      LOG.error("Could not init Program based on location: " + programLocation);
      throw Throwables.propagate(e);
    }


    // Initializing dataset context and hooking it up with Spark job transaction


    DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
    CConfiguration configuration = injector.getInstance(CConfiguration.class);


    ApplicationSpecification appSpec = program.getSpecification();


    //TODO: Change this when Spark starts supporting Metrics
    MetricsCollectionService metricsCollectionService = null;


    DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);


    // Creating Spark job context
    SparkSpecification sparkSpec = program.getSpecification().getSpark().get(program.getName());
    BasicSparkContext context =
      new BasicSparkContext(program, RunIds.fromString(runId), runtimeArguments, appSpec.getDatasets().keySet(),
                            sparkSpec, logicalStartTime, workflowBatch, metricsCollectionService,
                            datasetFramework, configuration, discoveryServiceClient);


    // propagating tx to all txAware guys
    // The tx is committed or aborted depending upon the job success by the ProgramRunner and DatasetRecordWriter
    for (TransactionAware txAware : context.getDatasetInstantiator().getTransactionAware()) {
      txAware.startTx(tx);
    }
    return context;
  }


  /**
   * Subclasses must override {@link #prepare()} method by providing Guice injector configured for running and starting
   * services specific to the environment. Like {@link InMemorySparkContextBuilder} does.
   *
   * @return instance of {@link Injector} with bindings for current runtime environment
   */
  protected abstract Injector prepare();


  /**
   * Subclass must override {@link #finish()} to release resources
   */
  protected void finish() {
    // Do NOTHING by default
  }
}
Source Code of co.cask.cdap.internal.app.runtime.spark.AbstractSparkContextBuilder

Related Classes of co.cask.cdap.internal.app.runtime.spark.AbstractSparkContextBuilder