Package cascading.tap.hadoop.io

Source Code of cascading.tap.hadoop.io.TapOutputCollector

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading.tap.hadoop.io;

import java.io.Closeable;
import java.io.IOException;

import cascading.flow.FlowProcess;
import cascading.tap.Tap;
import cascading.tap.TapException;
import cascading.tap.hadoop.util.Hadoop18TapUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static cascading.flow.hadoop.util.HadoopUtil.asJobConfInstance;

/**
*
*/
public class TapOutputCollector implements OutputCollector, Closeable
  {
  private static final Logger LOG = LoggerFactory.getLogger( TapOutputCollector.class );

  public static final String PART_TASK_PATTERN = "%s%spart-%05d";
  public static final String PART_TASK_SEQ_PATTERN = "%s%spart-%05d-%05d";

  /** Field conf */
  private Configuration conf;
  /** Field writer */
  private RecordWriter writer;
  /** Field filenamePattern */
  private String filenamePattern;
  /** Field filename */
  private String filename;
  /** Field tap */
  private Tap<Configuration, RecordReader, OutputCollector> tap;
  /** Field prefix */
  private String prefix;
  /** Field sequence */
  private long sequence;
  /** Field isFileOutputFormat */
  private boolean isFileOutputFormat;
  /** Field reporter */
  private final Reporter reporter = Reporter.NULL;
  private final FlowProcess<? extends Configuration> flowProcess;

  public TapOutputCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap ) throws IOException
    {
    this( flowProcess, tap, null );
    }

  public TapOutputCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, String prefix ) throws IOException
    {
    this( flowProcess, tap, prefix, -1 );
    }

  public TapOutputCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, String prefix, long sequence ) throws IOException
    {
    this.tap = tap;
    this.sequence = sequence;
    this.prefix = prefix == null || prefix.length() == 0 ? null : prefix;
    this.flowProcess = flowProcess;
    this.conf = this.flowProcess.getConfigCopy();
    this.filenamePattern = this.conf.get( "cascading.tapcollector.partname", sequence == -1 ? PART_TASK_PATTERN : PART_TASK_SEQ_PATTERN );

    initialize();
    }

  protected void initialize() throws IOException
    {
    tap.sinkConfInit( flowProcess, conf );

    OutputFormat outputFormat = asJobConfInstance( conf ).getOutputFormat();

    // todo: use OutputCommitter class

    isFileOutputFormat = outputFormat instanceof FileOutputFormat;

    if( isFileOutputFormat )
      {
      Hadoop18TapUtil.setupJob( conf );
      Hadoop18TapUtil.setupTask( conf );

      if( prefix != null )
        filename = String.format( filenamePattern, prefix, "/", conf.getInt( "mapred.task.partition", 0 ), sequence );
      else
        filename = String.format( filenamePattern, "", "", conf.getInt( "mapred.task.partition", 0 ), sequence );
      }

    LOG.info( "creating path: {}", filename );

    writer = outputFormat.getRecordWriter( null, asJobConfInstance( conf ), filename, Reporter.NULL );
    }

  /**
   * Method collect writes the given values to the {@link Tap} this instance encapsulates.
   *
   * @param writableComparable of type WritableComparable
   * @param writable           of type Writable
   * @throws IOException when
   */
  public void collect( Object writableComparable, Object writable ) throws IOException
    {
    flowProcess.keepAlive();
    writer.write( writableComparable, writable );
    }

  public void close()
    {
    try
      {
      if( isFileOutputFormat )
        LOG.info( "closing tap collector for: {}", new Path( tap.getIdentifier(), filename ) );
      else
        LOG.info( "closing tap collector for: {}", tap );

      try
        {
        writer.close( reporter );
        }
      finally
        {
        if( isFileOutputFormat )
          {
          if( Hadoop18TapUtil.needsTaskCommit( conf ) )
            Hadoop18TapUtil.commitTask( conf );

          Hadoop18TapUtil.cleanupJob( conf );
          }
        }
      }
    catch( IOException exception )
      {
      LOG.warn( "exception closing: {}", filename, exception );
      throw new TapException( "exception closing: " + filename, exception );
      }
    }
  }
TOP

Related Classes of cascading.tap.hadoop.io.TapOutputCollector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.