Package com.facebook.hiveio.output

Source Code of com.facebook.hiveio.output.HiveApiOutputCommitter

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.facebook.hiveio.output;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.facebook.hiveio.common.FileSystems;
import com.facebook.hiveio.common.HadoopUtils;
import com.facebook.hiveio.common.HiveUtils;
import com.google.common.base.Preconditions;

import java.io.IOException;
import java.util.List;

/**
* OutputCommitter for Hive output
*/
class HiveApiOutputCommitter extends OutputCommitter {
  private static final Logger LOG = LoggerFactory.getLogger(HiveApiOutputCommitter.class);

  /** Base Hadoop output committer */
  private final OutputCommitter baseCommitter;
  /** Profile ID to use */
  private final String profileId;

  /**
   * Constructor
   *
   * @param baseCommitter Base Hadoop committer
   * @param profileId Profile ID
   */
  public HiveApiOutputCommitter(OutputCommitter baseCommitter,
                                String profileId) {
    this.baseCommitter = baseCommitter;
    this.profileId = profileId;
  }

  @Override
  public void setupJob(JobContext jobContext) throws IOException {
    baseCommitter.setupJob(jobContext);
  }

  @Override
  public void commitJob(JobContext jobContext) throws IOException {
    baseCommitter.commitJob(jobContext);

    Configuration conf = jobContext.getConfiguration();
    OutputConf outputConf = new OutputConf(conf, profileId);
    HiveOutputDescription outputDesc = outputConf.readOutputDescription();
    OutputInfo outputInfo = outputConf.readOutputTableInfo();
    if (outputInfo.hasPartitionInfo()) {
      registerPartitions(conf, outputDesc, outputInfo);
    } else {
      noPartitionsCopyData(conf, outputInfo);
    }

    writeSuccessFile(conf);
  }

  /**
   * Write success file to Hadoop if required
   *
   * @param conf Configuration
   * @throws IOException I/O errors
   */
  private static void writeSuccessFile(Configuration conf) throws IOException {
    if (!HadoopUtils.needSuccessMarker(conf)) {
      return;
    }
    Path outputPath = HadoopUtils.getOutputPath(conf);
    FileSystem fs = outputPath.getFileSystem(conf);
    if (fs.exists(outputPath)) {
      Path successPath = new Path(outputPath, "_SUCCESS");
      if (!fs.exists(successPath)) {
        fs.create(successPath).close();
      }
    }
  }

  /**
   * Register partitions for new data we wrote.
   *
   * @param conf Configuration
   * @param outputDesc Output description from user
   * @param outputInfo Internal output information
   * @throws IOException
   */
  private void registerPartitions(Configuration conf,
    HiveOutputDescription outputDesc, OutputInfo outputInfo)
    throws IOException {
    HiveConf hiveConf = new HiveConf(conf, HiveApiOutputCommitter.class);

    String dbName = outputDesc.getDbName();
    String tableName = outputDesc.getTableName();

    HiveMetaStoreClient client;
    Table hiveTable;
    try {
      client = new HiveMetaStoreClient(hiveConf);
      hiveTable = client.getTable(dbName, tableName);
      // CHECKSTYLE: stop IllegalCatch
    } catch (Exception e) {
      // CHECKSTYLE: resume IllegalCatch
      throw new IOException(e);
    }

    Partition partition = new Partition();
    partition.setDbName(dbName);
    partition.setTableName(tableName);
    partition.setParameters(outputInfo.getTableParams());
    List<String> partitionValues = HiveUtils
        .orderedPartitionValues(hiveTable.getPartitionKeys(),
            outputDesc.getPartitionValues());
    partition.setValues(partitionValues);

    StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd());
    sd.setParameters(outputInfo.getSerializerParams());
    sd.setLocation(outputInfo.getFinalOutputPath());
    sd.setCols(outputInfo.getColumnInfo());
    partition.setSd(sd);

    try {
      client.add_partition(partition);
      // CHECKSTYLE: stop IllegalCatch
    } catch (Exception e) {
      // CHECKSTYLE: resume IllegalCatch
      throw new IOException(e);
    }
  }

  /**
   * Table has no partitions, just copy data
   *
   * @param conf Configuration
   * @param oti OutputInfo
   * @throws IOException I/O errors
   */
  private void noPartitionsCopyData(Configuration conf, OutputInfo oti)
    throws IOException {
    Preconditions.checkArgument(!oti.hasPartitionInfo());
    Path tablePath = new Path(oti.getTableRoot());
    Path writePath = new Path(oti.getPartitionPath());
    FileSystem tableFs = tablePath.getFileSystem(conf);
    FileSystem writePathFs = writePath.getFileSystem(conf);
    if (!tableFs.getUri().equals(writePathFs.getUri())) {
      LOG.error("Table's root path fs {} is not on same as its partition path fs {}",
          tableFs.getUri(), writePathFs.getUri());
      throw new IllegalStateException("Table's root path fs " + tableFs.getUri() +
          " is not on same as its partition path fs " + writePathFs.getUri());
    }
    FileSystems.move(tableFs, writePath, writePath, tablePath);
    tableFs.delete(writePath, true);
  }

  @Override @Deprecated
  public void cleanupJob(JobContext jobContext) throws IOException {
    baseCommitter.cleanupJob(jobContext);
  }

  @Override
  public void abortJob(JobContext jobContext, JobStatus.State state)
    throws IOException {
    baseCommitter.abortJob(jobContext, state);
    HadoopUtils.deleteOutputDir(jobContext.getConfiguration());
  }

  @Override
  public void setupTask(TaskAttemptContext taskContext) throws IOException {
    baseCommitter.setupTask(taskContext);
  }

  @Override
  public boolean needsTaskCommit(TaskAttemptContext taskContext)
    throws IOException {
    return baseCommitter.needsTaskCommit(taskContext);
  }

  @Override
  public void commitTask(TaskAttemptContext taskContext) throws IOException {
    HadoopUtils.setWorkOutputDir(taskContext);
    baseCommitter.commitTask(taskContext);
  }

  @Override
  public void abortTask(TaskAttemptContext taskContext) throws IOException {
    baseCommitter.abortTask(taskContext);
  }
}
TOP

Related Classes of com.facebook.hiveio.output.HiveApiOutputCommitter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.