Package org.apache.sqoop.job.mr

Source Code of org.apache.sqoop.job.mr.SqoopOutputFormatLoadExecutor

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.sqoop.job.mr;

import com.google.common.base.Throwables;

import java.io.IOException;
import java.util.concurrent.*;

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.log4j.Logger;
import org.apache.sqoop.common.SqoopException;
import org.apache.sqoop.job.JobConstants;
import org.apache.sqoop.job.MapreduceExecutionError;
import org.apache.sqoop.job.PrefixContext;
import org.apache.sqoop.job.etl.Loader;
import org.apache.sqoop.job.etl.LoaderContext;
import org.apache.sqoop.job.io.Data;
import org.apache.sqoop.etl.io.DataReader;
import org.apache.sqoop.schema.Schema;
import org.apache.sqoop.utils.ClassUtils;

public class SqoopOutputFormatLoadExecutor {

  public static final Logger LOG =
    Logger.getLogger(SqoopOutputFormatLoadExecutor.class);

  private volatile boolean readerFinished = false;
  private volatile boolean writerFinished = false;
  private volatile Data data;
  private JobContext context;
  private SqoopRecordWriter producer;
  private Future<?> consumerFuture;
  private Semaphore filled = new Semaphore(0, true);
  private Semaphore free = new Semaphore(1, true);
  private volatile boolean isTest = false;
  private String loaderName;

  SqoopOutputFormatLoadExecutor(boolean isTest, String loaderName){
    this.isTest = isTest;
    this.loaderName = loaderName;
    data = new Data();
    producer = new SqoopRecordWriter();
  }

  public SqoopOutputFormatLoadExecutor(JobContext jobctx) {
    data = new Data();
    context = jobctx;
    producer = new SqoopRecordWriter();
  }

  public RecordWriter<Data, NullWritable> getRecordWriter() {
    consumerFuture = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat
        ("OutputFormatLoader-consumer").build()).submit(
            new ConsumerThread());
    return producer;
  }

  /*
   * This is a producer-consumer problem and can be solved
   * with two semaphores.
   */
  private class SqoopRecordWriter extends RecordWriter<Data, NullWritable> {

    @Override
    public void write(Data key, NullWritable value) throws InterruptedException {
      free.acquire();
      checkIfConsumerThrew();
      int type = key.getType();
      data.setContent(key.getContent(type), type);
      filled.release();
    }

    @Override
    public void close(TaskAttemptContext context)
            throws InterruptedException, IOException {
      LOG.info("SqoopOutputFormatLoadExecutor::SqoopRecordWriter is about to be closed");
      free.acquire();
      writerFinished = true;
      filled.release();
      waitForConsumer();
      LOG.info("SqoopOutputFormatLoadExecutor::SqoopRecordWriter is closed");
    }
  }

  private void checkIfConsumerThrew() {
    if(readerFinished) {
      waitForConsumer();
    }
  }
  /**
   * This method checks if the reader thread has finished, and re-throw
   * any exceptions thrown by the reader thread.
   *
   * @throws SqoopException if the consumer thread threw it.
   * @throws RuntimeException if some other exception was thrown.
   */
  private void waitForConsumer() {
    try {
      consumerFuture.get();
    } catch (ExecutionException ex) {
      // In almost all cases, the exception will be SqoopException,
      // because all exceptions are caught and propagated as
      // SqoopExceptions
      Throwable t = ex.getCause();
      if (t instanceof SqoopException) {
        throw (SqoopException) t;
      }
      //In the rare case, it was not a SqoopException
      Throwables.propagate(t);
    } catch (Exception ex) {
      throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0019, ex);
    }
  }

  private class OutputFormatDataReader extends DataReader {
    @Override
    public void setFieldDelimiter(char fieldDelimiter) {
      data.setFieldDelimiter(fieldDelimiter);
    }

    @Override
    public Object[] readArrayRecord() throws InterruptedException {
      return (Object[])readContent(Data.ARRAY_RECORD);
    }

    @Override
    public String readCsvRecord() throws InterruptedException {
      return (String)readContent(Data.CSV_RECORD);
    }

    @Override
    public Object readContent(int type) throws InterruptedException {
      // Has any more data been produced after I last consumed.
      // If no, wait for the producer to produce.
      try {
        filled.acquire();
      } catch (InterruptedException ex) {
        //Really at this point, there is nothing to do. Just throw and get out
        LOG.error("Interrupted while waiting for data to be available from " +
            "mapper", ex);
        throw ex;
      }
      // If the writer has finished, there is definitely no data remaining
      if (writerFinished) {
        return null;
      }
      try {
        Object content = data.getContent(type);
        return content;
      } catch (Throwable t) {
        readerFinished = true;
        LOG.error("Caught exception e while getting content ", t);
        throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0018, t);
      } finally {
        free.release();
      }
    }
  }

  private class ConsumerThread implements Runnable {

    @Override
    public void run() {
      LOG.info("SqoopOutputFormatLoadExecutor consumer thread is starting");
      try {
        DataReader reader = new OutputFormatDataReader();

        Configuration conf = null;
        if (!isTest) {
          conf = context.getConfiguration();
          loaderName = conf.get(JobConstants.JOB_ETL_LOADER);
        }
        Loader loader = (Loader) ClassUtils.instantiate(loaderName);

        // Objects that should be pass to the Executor execution
        PrefixContext subContext = null;
        Object configConnection = null;
        Object configJob = null;
        Schema schema = null;

        if (!isTest) {
          // Propagate connector schema in every case for now
          // TODO: Change to coditional choosing between HIO and Connector schema
          schema = ConfigurationUtils.getConnectorSchema(conf);

          switch (ConfigurationUtils.getJobType(conf)) {
            case EXPORT:
              subContext = new PrefixContext(conf, JobConstants.PREFIX_CONNECTOR_CONTEXT);
              configConnection = ConfigurationUtils.getConfigConnectorConnection(conf);
              configJob = ConfigurationUtils.getConfigConnectorJob(conf);
              break;
            case IMPORT:
              subContext = new PrefixContext(conf, "");
              configConnection = ConfigurationUtils.getConfigFrameworkConnection(conf);
              configJob = ConfigurationUtils.getConfigFrameworkJob(conf);
              break;
            default:
              throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0023);
          }
        }

        // Create loader context
        LoaderContext loaderContext = new LoaderContext(subContext, reader, schema);

        LOG.info("Running loader class " + loaderName);
        loader.load(loaderContext, configConnection, configJob);
        LOG.info("Loader has finished");
      } catch (Throwable t) {
        readerFinished = true;
        LOG.error("Error while loading data out of MR job.", t);
        // Release so that the writer can tell the framework something went
        // wrong.
        free.release();
        throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0018, t);
      }

      // if no exception happens yet and reader finished before writer,
      // something went wrong
      if (!writerFinished) {
        // throw exception if data are not all consumed
        readerFinished = true;
        LOG.error("Reader terminated, but writer is still running!");
        // Release so that the writer can tell the framework something went
        // wrong.
        free.release();
        throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0019);

      }
      // inform writer that reader is finished
      readerFinished = true;
    }
  }
}
TOP

Related Classes of org.apache.sqoop.job.mr.SqoopOutputFormatLoadExecutor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.