Package eu.stratosphere.pact.runtime.task

Source Code of eu.stratosphere.pact.runtime.task.DataSinkTask

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.pact.runtime.task;

import java.io.IOException;

import eu.stratosphere.pact.runtime.task.chaining.ExceptionInChainedStubException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.stratosphere.api.common.io.FileOutputFormat;
import eu.stratosphere.api.common.io.FileOutputFormat.OutputDirectoryMode;
import eu.stratosphere.api.common.io.OutputFormat;
import eu.stratosphere.api.common.typeutils.TypeComparatorFactory;
import eu.stratosphere.api.common.typeutils.TypeSerializer;
import eu.stratosphere.api.common.typeutils.TypeSerializerFactory;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.FileSystem.WriteMode;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.io.IOReadableWritable;
import eu.stratosphere.nephele.execution.CancelTaskException;
import eu.stratosphere.nephele.execution.librarycache.LibraryCacheManager;
import eu.stratosphere.runtime.io.api.MutableReader;
import eu.stratosphere.runtime.io.api.MutableRecordReader;
import eu.stratosphere.runtime.io.api.MutableUnionRecordReader;
import eu.stratosphere.nephele.template.AbstractOutputTask;
import eu.stratosphere.pact.runtime.plugable.DeserializationDelegate;
import eu.stratosphere.pact.runtime.sort.UnilateralSortMerger;
import eu.stratosphere.pact.runtime.task.util.CloseableInputProvider;
import eu.stratosphere.pact.runtime.task.util.ReaderIterator;
import eu.stratosphere.pact.runtime.task.util.RecordReaderIterator;
import eu.stratosphere.pact.runtime.task.util.TaskConfig;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.MutableObjectIterator;

/**
* DataSinkTask which is executed by a Nephele task manager.
* The task hands the data to an output format.
*
* @see OutputFormat
*/
public class DataSinkTask<IT> extends AbstractOutputTask {
 
  public static final String DEGREE_OF_PARALLELISM_KEY = "sink.dop";
 
  // Obtain DataSinkTask Logger
  private static final Log LOG = LogFactory.getLog(DataSinkTask.class);

  // --------------------------------------------------------------------------------------------
 
  // OutputFormat instance. volatile, because the asynchronous canceller may access it
  private volatile OutputFormat<IT> format;
 
  // input reader
  private MutableObjectIterator<IT> reader;
 
  // input iterator
  private MutableObjectIterator<IT> input;
 
  // The serializer for the input type
  private TypeSerializerFactory<IT> inputTypeSerializerFactory;
 
  // local strategy
  private CloseableInputProvider<IT> localStrategy;

  // task configuration
  private TaskConfig config;
 
  // class loader for user code
  private ClassLoader userCodeClassLoader;

  // cancel flag
  private volatile boolean taskCanceled;
 

  @Override
  public void registerInputOutput() {
    if (LOG.isDebugEnabled()) {
      LOG.debug(getLogString("Start registering input and output"));
    }

    // initialize OutputFormat
    initOutputFormat();
   
    // initialize input readers
    try {
      initInputReaders();
    } catch (Exception e) {
      throw new RuntimeException("Initializing the input streams failed" +
        e.getMessage() == null ? "." : ": " + e.getMessage(), e);
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug(getLogString("Finished registering input and output"));
    }
  }


  @Override
  public void invoke() throws Exception
  {
    if (LOG.isDebugEnabled()) {
      LOG.debug(getLogString("Starting data sink operator"));
    }
   
    try {
     
      // initialize local strategies
      switch (this.config.getInputLocalStrategy(0)) {
      case NONE:
        // nothing to do
        localStrategy = null;
        input = reader;
        break;
      case SORT:
        // initialize sort local strategy
        try {
          // get type comparator
          TypeComparatorFactory<IT> compFact = this.config.getInputComparator(0, this.userCodeClassLoader);
          if (compFact == null) {
            throw new Exception("Missing comparator factory for local strategy on input " + 0);
          }
         
          // initialize sorter
          UnilateralSortMerger<IT> sorter = new UnilateralSortMerger<IT>(
              getEnvironment().getMemoryManager(),
              getEnvironment().getIOManager(),
              this.reader, this, this.inputTypeSerializerFactory, compFact.createComparator(),
              this.config.getMemoryInput(0), this.config.getFilehandlesInput(0),
              this.config.getSpillingThresholdInput(0));
         
          this.localStrategy = sorter;
          this.input = sorter.getIterator();
        } catch (Exception e) {
          throw new RuntimeException("Initializing the input processing failed" +
            e.getMessage() == null ? "." : ": " + e.getMessage(), e);
        }
        break;
      default:
        throw new RuntimeException("Invalid local strategy for DataSinkTask");
      }
     
      // read the reader and write it to the output
     
      final TypeSerializer<IT> serializer = this.inputTypeSerializerFactory.getSerializer();
      final MutableObjectIterator<IT> input = this.input;
      final OutputFormat<IT> format = this.format;
     
     
      IT record = serializer.createInstance();
     
      // check if task has been canceled
      if (this.taskCanceled) {
        return;
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug(getLogString("Starting to produce output"));
      }

      // open
      format.open(this.getEnvironment().getIndexInSubtaskGroup(), this.getEnvironment().getCurrentNumberOfSubtasks());

      // work!
      while (!this.taskCanceled && ((record = input.next(record)) != null)) {
        format.writeRecord(record);
      }
     
      // close. We close here such that a regular close throwing an exception marks a task as failed.
      if (!this.taskCanceled) {
        this.format.close();
        this.format = null;
      }
    }
    catch (Exception ex) {
      ex = ExceptionInChainedStubException.exceptionUnwrap(ex);

      if (ex instanceof CancelTaskException) {
        // forward canceling exception
        throw ex;
      }
      // drop, if the task was canceled
      else if (!this.taskCanceled) {
        if (LOG.isErrorEnabled()) {
          LOG.error(getLogString("Error in user code: " + ex.getMessage()), ex);
        }
        throw ex;
      }
    }
    finally {
      if (this.format != null) {
        // close format, if it has not been closed, yet.
        // This should only be the case if we had a previous error, or were canceled.
        try {
          this.format.close();
        }
        catch (Throwable t) {
          if (LOG.isWarnEnabled()) {
            LOG.warn(getLogString("Error closing the ouput format."), t);
          }
        }
      }
      // close local strategy if necessary
      if (localStrategy != null) {
        try {
          this.localStrategy.close();
        } catch (Throwable t) {
          LOG.error("Error closing local strategy", t);
        }
      }
    }

    if (!this.taskCanceled) {
      if (LOG.isDebugEnabled()) {
        LOG.debug(getLogString("Finished data sink operator"));
      }
    }
    else {
      if (LOG.isDebugEnabled()) {
        LOG.debug(getLogString("Data sink operator cancelled"));
      }
    }
  }

  @Override
  public void cancel() throws Exception {
    this.taskCanceled = true;
    OutputFormat<IT> format = this.format;
    if (format != null) {
      try {
        this.format.close();
      } catch (Throwable t) {}
    }
   
    if (LOG.isDebugEnabled()) {
      LOG.debug(getLogString("Cancelling data sink operator"));
    }
  }
 
  /**
   * Sets the class-loader to be used to load the user code.
   *
   * @param cl The class-loader to be used to load the user code.
   */
  public void setUserCodeClassLoader(ClassLoader cl) {
    this.userCodeClassLoader = cl;
  }

  /**
   * Initializes the OutputFormat implementation and configuration.
   *
   * @throws RuntimeException
   *         Throws if instance of OutputFormat implementation can not be
   *         obtained.
   */
  private void initOutputFormat() {
    if (this.userCodeClassLoader == null) {
      try {
        this.userCodeClassLoader = LibraryCacheManager.getClassLoader(getEnvironment().getJobID());
      } catch (IOException ioe) {
        throw new RuntimeException("Library cache manager could not be instantiated.", ioe);
      }
    }
    // obtain task configuration (including stub parameters)
    Configuration taskConf = getTaskConfiguration();
    taskConf.setClassLoader(this.userCodeClassLoader);
    this.config = new TaskConfig(taskConf);

    try {
      this.format = config.<OutputFormat<IT>>getStubWrapper(this.userCodeClassLoader).getUserCodeObject(OutputFormat.class, this.userCodeClassLoader);

      // check if the class is a subclass, if the check is required
      if (!OutputFormat.class.isAssignableFrom(this.format.getClass())) {
        throw new RuntimeException("The class '" + this.format.getClass().getName() + "' is not a subclass of '" +
            OutputFormat.class.getName() + "' as is required.");
      }
    }
    catch (ClassCastException ccex) {
      throw new RuntimeException("The stub class is not a proper subclass of " + OutputFormat.class.getName(), ccex);
    }
   
    // configure the stub. catch exceptions here extra, to report them as originating from the user code
    try {
      this.format.configure(this.config.getStubParameters());
    }
    catch (Throwable t) {
      throw new RuntimeException("The user defined 'configure()' method in the Output Format caused an error: "
        + t.getMessage(), t);
    }
  }

  /**
   * Initializes the input readers of the DataSinkTask.
   *
   * @throws RuntimeException
   *         Thrown in case of invalid task input configuration.
   */
  @SuppressWarnings("unchecked")
  private void initInputReaders() throws Exception {
   
    MutableReader<?> inputReader;
   
    int numGates = 0;
    //  ---------------- create the input readers ---------------------
    // in case where a logical input unions multiple physical inputs, create a union reader
    final int groupSize = this.config.getGroupSize(0);
    numGates += groupSize;
    if (groupSize == 1) {
      // non-union case
      inputReader = new MutableRecordReader<DeserializationDelegate<IT>>(this);
    } else if (groupSize > 1){
      // union case
     
      MutableRecordReader<IOReadableWritable>[] readers = new MutableRecordReader[groupSize];
      for (int j = 0; j < groupSize; ++j) {
        readers[j] = new MutableRecordReader<IOReadableWritable>(this);
      }
      inputReader = new MutableUnionRecordReader<IOReadableWritable>(readers);
    } else {
      throw new Exception("Illegal input group size in task configuration: " + groupSize);
    }
   
    this.inputTypeSerializerFactory = this.config.getInputSerializer(0, this.userCodeClassLoader);
   
    if (this.inputTypeSerializerFactory.getDataType() == Record.class) {
      // record specific deserialization
      MutableReader<Record> reader = (MutableReader<Record>) inputReader;
      this.reader = (MutableObjectIterator<IT>)new RecordReaderIterator(reader);
    } else {
      // generic data type serialization
      MutableReader<DeserializationDelegate<?>> reader = (MutableReader<DeserializationDelegate<?>>) inputReader;
      @SuppressWarnings({ "rawtypes" })
      final MutableObjectIterator<?> iter = new ReaderIterator(reader, this.inputTypeSerializerFactory.getSerializer());
      this.reader = (MutableObjectIterator<IT>)iter;
    }
   
    // final sanity check
    if (numGates != this.config.getNumInputs()) {
      throw new Exception("Illegal configuration: Number of input gates and group sizes are not consistent.");
    }
  }
 
  // ------------------------------------------------------------------------
  //                     Degree of parallelism & checks
  // ------------------------------------------------------------------------
 

  @Override
  public int getMaximumNumberOfSubtasks() {
    if (!(this.format instanceof FileOutputFormat<?>)) {
      return -1;
    }
   
    final FileOutputFormat<?> fileOutputFormat = (FileOutputFormat<?>) this.format;
   
    // ----------------- This code applies only to file inputs ------------------
   
    final Path path = fileOutputFormat.getOutputFilePath();
    final WriteMode writeMode = fileOutputFormat.getWriteMode();
    final OutputDirectoryMode outDirMode = fileOutputFormat.getOutputDirectoryMode();

    // Prepare output path and determine max DOP   
    try {
     
      int dop = getTaskConfiguration().getInteger(DEGREE_OF_PARALLELISM_KEY, -1);
      final FileSystem fs = path.getFileSystem();
     
      if(dop == 1 && outDirMode == OutputDirectoryMode.PARONLY) {
        // output is not written in parallel and should be written to a single file.
       
        if(fs.isDistributedFS()) {
          // prepare distributed output path
          if(!fs.initOutPathDistFS(path, writeMode, false)) {
            // output preparation failed! Cancel task.
            throw new IOException("Output path could not be initialized.");
          }
        }
       
        return 1;
       
      } else {
        // output should be written to a directory
       
        if(fs.isDistributedFS()) {
          // only distributed file systems can be initialized at start-up time.
          if(!fs.initOutPathDistFS(path, writeMode, true)) {
            throw new IOException("Output directory could not be created.");
          }
        }
       
        return -1;
       
      }
    }
    catch (IOException e) {
      LOG.error("Could not access the file system to detemine the status of the output.", e);
      throw new RuntimeException("I/O Error while accessing file", e);
    }
  }

  // ------------------------------------------------------------------------
  //                               Utilities
  // ------------------------------------------------------------------------
 
  /**
   * Utility function that composes a string for logging purposes. The string includes the given message and
   * the index of the task in its task group together with the number of tasks in the task group.
   *
   * @param message The main message for the log.
   * @return The string ready for logging.
   */
  private String getLogString(String message) {
    return RegularPactTask.constructLogString(message, this.getEnvironment().getTaskName(), this);
  }
}
TOP

Related Classes of eu.stratosphere.pact.runtime.task.DataSinkTask

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.