Package co.cask.cdap.internal.app.runtime.spark

Source Code of co.cask.cdap.internal.app.runtime.spark.AbstractSparkContextBuilder

/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package co.cask.cdap.internal.app.runtime.spark;

import co.cask.cdap.api.spark.SparkSpecification;
import co.cask.cdap.app.ApplicationSpecification;
import co.cask.cdap.app.program.Program;
import co.cask.cdap.app.program.Programs;
import co.cask.cdap.app.runtime.Arguments;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.metrics.MetricsCollectionService;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.internal.app.runtime.batch.BasicMapReduceContext;
import co.cask.cdap.internal.app.runtime.spark.inmemory.InMemorySparkContextBuilder;
import co.cask.tephra.Transaction;
import co.cask.tephra.TransactionAware;
import com.google.common.base.Throwables;
import com.google.inject.Injector;
import org.apache.twill.discovery.DiscoveryServiceClient;
import org.apache.twill.filesystem.LocationFactory;
import org.apache.twill.internal.RunIds;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;

/**
* Builds the {@link BasicSparkContext}.
* Subclasses must override {@link #prepare()} method by providing Guice injector configured for running and starting
* services specific to the environment. To release those resources subclass must override {@link #finish()}
*/
public abstract class AbstractSparkContextBuilder {

  private static final Logger LOG = LoggerFactory.getLogger(AbstractSparkContextBuilder.class);

  /**
   * Build the instance of {@link BasicSparkContext}.
   *
   * @param runId            program run id
   * @param logicalStartTime The logical start time of the job.
   * @param workflowBatch    Tells whether the batch job is started by workflow.
   * @param runtimeArguments the runtime arguments
   * @param tx               transaction to use
   * @param classLoader      classloader to use
   * @param programLocation  program location
   * @return instance of {@link BasicMapReduceContext}
   */
  public BasicSparkContext build(String runId, long logicalStartTime, String workflowBatch, Arguments runtimeArguments,
                                 Transaction tx, ClassLoader classLoader, URI programLocation) {
    Injector injector = prepare();

    // Initializing Program
    LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
    Program program;
    try {
      program = Programs.create(locationFactory.create(programLocation), classLoader);
      //TODO: This should be changed when we support Spark in Workflow
    } catch (IOException e) {
      LOG.error("Could not init Program based on location: " + programLocation);
      throw Throwables.propagate(e);
    }

    // Initializing dataset context and hooking it up with Spark job transaction

    DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
    CConfiguration configuration = injector.getInstance(CConfiguration.class);

    ApplicationSpecification appSpec = program.getSpecification();

    //TODO: Change this when Spark starts supporting Metrics
    MetricsCollectionService metricsCollectionService = null;

    DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);

    // Creating Spark job context
    SparkSpecification sparkSpec = program.getSpecification().getSpark().get(program.getName());
    BasicSparkContext context =
      new BasicSparkContext(program, RunIds.fromString(runId), runtimeArguments, appSpec.getDatasets().keySet(),
                            sparkSpec, logicalStartTime, workflowBatch, metricsCollectionService,
                            datasetFramework, configuration, discoveryServiceClient);

    // propagating tx to all txAware guys
    // The tx is committed or aborted depending upon the job success by the ProgramRunner and DatasetRecordWriter
    for (TransactionAware txAware : context.getDatasetInstantiator().getTransactionAware()) {
      txAware.startTx(tx);
    }
    return context;
  }

  /**
   * Subclasses must override {@link #prepare()} method by providing Guice injector configured for running and starting
   * services specific to the environment. Like {@link InMemorySparkContextBuilder} does.
   *
   * @return instance of {@link Injector} with bindings for current runtime environment
   */
  protected abstract Injector prepare();

  /**
   * Subclass must override {@link #finish()} to release resources
   */
  protected void finish() {
    // Do NOTHING by default
  }
}
TOP

Related Classes of co.cask.cdap.internal.app.runtime.spark.AbstractSparkContextBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.