/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.howl.mapreduce;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.JobStatus.State;
import org.apache.hadoop.security.AccessControlException;
import org.apache.howl.common.ErrorType;
import org.apache.howl.common.HowlConstants;
import org.apache.howl.common.HowlException;
import org.apache.howl.common.HowlUtil;
import org.apache.howl.data.schema.HowlFieldSchema;
import org.apache.howl.data.schema.HowlSchema;
import org.apache.howl.data.schema.HowlSchemaUtils;
import org.apache.thrift.TException;
public class HowlOutputCommitter extends OutputCommitter {
/** The underlying output committer */
private final OutputCommitter baseCommitter;
public HowlOutputCommitter(OutputCommitter baseCommitter) {
this.baseCommitter = baseCommitter;
}
@Override
public void abortTask(TaskAttemptContext context) throws IOException {
baseCommitter.abortTask(context);
}
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
baseCommitter.commitTask(context);
}
@Override
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
return baseCommitter.needsTaskCommit(context);
}
@Override
public void setupJob(JobContext context) throws IOException {
if( baseCommitter != null ) {
baseCommitter.setupJob(context);
}
}
@Override
public void setupTask(TaskAttemptContext context) throws IOException {
baseCommitter.setupTask(context);
}
@Override
public void abortJob(JobContext jobContext, State state) throws IOException {
if(baseCommitter != null) {
baseCommitter.abortJob(jobContext, state);
}
OutputJobInfo jobInfo = HowlOutputFormat.getJobInfo(jobContext);
try {
HiveMetaStoreClient client = HowlOutputFormat.createHiveClient(
jobInfo.getTableInfo().getServerUri(), jobContext.getConfiguration());
// cancel the deleg. tokens that were acquired for this job now that
// we are done - we should cancel if the tokens were acquired by
// HowlOutputFormat and not if they were supplied by Oozie. In the latter
// case the HOWL_KEY_TOKEN_SIGNATURE property in the conf will not be set
String tokenStrForm = client.getTokenStrForm();
if(tokenStrForm != null && jobContext.getConfiguration().get
(HowlOutputFormat.HOWL_KEY_TOKEN_SIGNATURE) != null) {
client.cancelDelegationToken(tokenStrForm);
}
} catch(Exception e) {
if( e instanceof HowlException ) {
throw (HowlException) e;
} else {
throw new HowlException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
}
}
Path src = new Path(jobInfo.getLocation());
FileSystem fs = src.getFileSystem(jobContext.getConfiguration());
fs.delete(src, true);
}
public static final String SUCCEEDED_FILE_NAME = "_SUCCESS";
static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER =
"mapreduce.fileoutputcommitter.marksuccessfuljobs";
private static boolean getOutputDirMarking(Configuration conf) {
return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER,
false);
}
@Override
public void commitJob(JobContext jobContext) throws IOException {
if(baseCommitter != null) {
baseCommitter.commitJob(jobContext);
}
// create _SUCCESS FILE if so requested.
OutputJobInfo jobInfo = HowlOutputFormat.getJobInfo(jobContext);
if(getOutputDirMarking(jobContext.getConfiguration())) {
Path outputPath = new Path(jobInfo.getLocation());
if (outputPath != null) {
FileSystem fileSys = outputPath.getFileSystem(jobContext.getConfiguration());
// create a file in the folder to mark it
if (fileSys.exists(outputPath)) {
Path filePath = new Path(outputPath, SUCCEEDED_FILE_NAME);
if(!fileSys.exists(filePath)) { // may have been created by baseCommitter.commitJob()
fileSys.create(filePath).close();
}
}
}
}
cleanupJob(jobContext);
}
@Override
public void cleanupJob(JobContext context) throws IOException {
OutputJobInfo jobInfo = HowlOutputFormat.getJobInfo(context);
Configuration conf = context.getConfiguration();
Table table = jobInfo.getTable();
StorageDescriptor tblSD = table.getSd();
Path tblPath = new Path(tblSD.getLocation());
FileSystem fs = tblPath.getFileSystem(conf);
if( table.getPartitionKeys().size() == 0 ) {
//non partitioned table
if( baseCommitter != null ) {
baseCommitter.cleanupJob(context);
}
//Move data from temp directory the actual table directory
//No metastore operation required.
Path src = new Path(jobInfo.getLocation());
moveTaskOutputs(fs, src, src, tblPath);
fs.delete(src, true);
return;
}
HiveMetaStoreClient client = null;
List<String> values = null;
boolean partitionAdded = false;
HowlTableInfo tableInfo = jobInfo.getTableInfo();
try {
client = HowlOutputFormat.createHiveClient(tableInfo.getServerUri(), conf);
StorerInfo storer = InitializeInput.extractStorerInfo(table.getSd(),table.getParameters());
Partition partition = new Partition();
partition.setDbName(tableInfo.getDatabaseName());
partition.setTableName(tableInfo.getTableName());
partition.setSd(new StorageDescriptor(tblSD));
partition.getSd().setLocation(jobInfo.getLocation());
updateTableSchema(client, table, jobInfo.getOutputSchema());
List<FieldSchema> fields = new ArrayList<FieldSchema>();
for(HowlFieldSchema fieldSchema : jobInfo.getOutputSchema().getFields()) {
fields.add(HowlSchemaUtils.getFieldSchema(fieldSchema));
}
partition.getSd().setCols(fields);
Map<String,String> partKVs = tableInfo.getPartitionValues();
//Get partition value list
partition.setValues(getPartitionValueList(table,partKVs));
Map<String, String> params = new HashMap<String, String>();
params.put(HowlConstants.HOWL_ISD_CLASS, storer.getInputSDClass());
params.put(HowlConstants.HOWL_OSD_CLASS, storer.getOutputSDClass());
//Copy table level howl.* keys to the partition
for(Map.Entry<Object, Object> entry : storer.getProperties().entrySet()) {
params.put(entry.getKey().toString(), entry.getValue().toString());
}
partition.setParameters(params);
// Sets permissions and group name on partition dirs.
FileStatus tblStat = fs.getFileStatus(tblPath);
String grpName = tblStat.getGroup();
FsPermission perms = tblStat.getPermission();
Path partPath = tblPath;
for(FieldSchema partKey : table.getPartitionKeys()){
partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
fs.setPermission(partPath, perms);
try{
fs.setOwner(partPath, null, grpName);
} catch(AccessControlException ace){
// log the messages before ignoring. Currently, logging is not built in Howl.
}
}
//Publish the new partition
client.add_partition(partition);
partitionAdded = true; //publish to metastore done
if( baseCommitter != null ) {
baseCommitter.cleanupJob(context);
}
// cancel the deleg. tokens that were acquired for this job now that
// we are done - we should cancel if the tokens were acquired by
// HowlOutputFormat and not if they were supplied by Oozie. In the latter
// case the HOWL_KEY_TOKEN_SIGNATURE property in the conf will not be set
String tokenStrForm = client.getTokenStrForm();
if(tokenStrForm != null && context.getConfiguration().get
(HowlOutputFormat.HOWL_KEY_TOKEN_SIGNATURE) != null) {
client.cancelDelegationToken(tokenStrForm);
}
} catch (Exception e) {
if( partitionAdded ) {
try {
//baseCommitter.cleanupJob failed, try to clean up the metastore
client.dropPartition(tableInfo.getDatabaseName(),
tableInfo.getTableName(), values);
} catch(Exception te) {
//Keep cause as the original exception
throw new HowlException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
}
}
if( e instanceof HowlException ) {
throw (HowlException) e;
} else {
throw new HowlException(ErrorType.ERROR_PUBLISHING_PARTITION, e);
}
} finally {
if( client != null ) {
client.close();
}
}
}
private Path constructPartialPartPath(Path partialPath, String partKey, Map<String,String> partKVs){
StringBuilder sb = new StringBuilder(FileUtils.escapePathName(partKey));
sb.append("=");
sb.append(FileUtils.escapePathName(partKVs.get(partKey)));
return new Path(partialPath, sb.toString());
}
/**
* Update table schema, adding new columns as added for the partition.
* @param client the client
* @param table the table
* @param partitionSchema the schema of the partition
* @throws IOException Signals that an I/O exception has occurred.
* @throws InvalidOperationException the invalid operation exception
* @throws MetaException the meta exception
* @throws TException the t exception
*/
private void updateTableSchema(HiveMetaStoreClient client, Table table,
HowlSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException {
List<FieldSchema> newColumns = HowlUtil.validatePartitionSchema(table, partitionSchema);
if( newColumns.size() != 0 ) {
List<FieldSchema> tableColumns = new ArrayList<FieldSchema>(table.getSd().getCols());
tableColumns.addAll(newColumns);
//Update table schema to add the newly added columns
table.getSd().setCols(tableColumns);
client.alter_table(table.getDbName(), table.getTableName(), table);
}
}
/**
* Convert the partition value map to a value list in the partition key order.
* @param table the table being written to
* @param valueMap the partition value map
* @return the partition value list
* @throws IOException
*/
static List<String> getPartitionValueList(Table table, Map<String, String> valueMap) throws IOException {
if( valueMap.size() != table.getPartitionKeys().size() ) {
throw new HowlException(ErrorType.ERROR_INVALID_PARTITION_VALUES,
"Table "
+ table.getTableName() + " has " +
table.getPartitionKeys().size() + " partition keys, got "+
valueMap.size());
}
List<String> values = new ArrayList<String>();
for(FieldSchema schema : table.getPartitionKeys()) {
String value = valueMap.get(schema.getName().toLowerCase());
if( value == null ) {
throw new HowlException(ErrorType.ERROR_MISSING_PARTITION_KEY,
"Key " + schema.getName() + " of table " + table.getTableName());
}
values.add(value);
}
return values;
}
/**
* Move all of the files from the temp directory to the final location
* @param fs the output file system
* @param file the file to move
* @param src the source directory
* @param dest the target directory
* @throws IOException
*/
private void moveTaskOutputs(FileSystem fs,
Path file,
Path src,
Path dest) throws IOException {
if (fs.isFile(file)) {
Path finalOutputPath = getFinalPath(file, src, dest);
if (!fs.rename(file, finalOutputPath)) {
if (!fs.delete(finalOutputPath, true)) {
throw new HowlException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath);
}
if (!fs.rename(file, finalOutputPath)) {
throw new HowlException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + dest);
}
}
} else if(fs.getFileStatus(file).isDir()) {
FileStatus[] paths = fs.listStatus(file);
Path finalOutputPath = getFinalPath(file, src, dest);
fs.mkdirs(finalOutputPath);
if (paths != null) {
for (FileStatus path : paths) {
moveTaskOutputs(fs, path.getPath(), src, dest);
}
}
}
}
/**
* Find the final name of a given output file, given the output directory
* and the work directory.
* @param file the file to move
* @param src the source directory
* @param dest the target directory
* @return the final path for the specific output file
* @throws IOException
*/
private Path getFinalPath(Path file, Path src,
Path dest) throws IOException {
URI taskOutputUri = file.toUri();
URI relativePath = src.toUri().relativize(taskOutputUri);
if (taskOutputUri == relativePath) {
throw new HowlException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " +
src + " child = " + file);
}
if (relativePath.getPath().length() > 0) {
return new Path(dest, relativePath.getPath());
} else {
return dest;
}
}
}