Package org.apache.hive.hcatalog.pig

Source Code of org.apache.hive.hcatalog.pig.HCatStorer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hive.hcatalog.pig;

import java.io.IOException;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.security.Credentials;
import org.apache.hive.hcatalog.common.HCatConstants;
import org.apache.hive.hcatalog.common.HCatContext;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat;
import org.apache.hive.hcatalog.mapreduce.OutputJobInfo;
import org.apache.pig.PigException;
import org.apache.pig.ResourceSchema;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.impl.util.UDFContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* HCatStorer.
*
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class HCatStorer extends HCatBaseStorer {
  private static final Logger LOG = LoggerFactory.getLogger(HCatStorer.class);

  // Signature for wrapped storer, see comments in LoadFuncBasedInputDriver.initialize
  final public static String INNER_SIGNATURE = "hcatstorer.inner.signature";
  final public static String INNER_SIGNATURE_PREFIX = "hcatstorer_inner_signature";
  // A hash map which stores job credentials. The key is a signature passed by Pig, which is
  //unique to the store func and out file name (table, in our case).
  private static Map<String, Credentials> jobCredentials = new HashMap<String, Credentials>();
  private final static Options validOptions = new Options();
  static {
    try {
      populateValidOptions();
    }
    catch(Throwable t) {
      LOG.error("Failed to build option list: ", t);
      throw new RuntimeException(t);
    }
  }
  private final static CommandLineParser parser = new GnuParser();

  /**
   * @param optString may empty str (not null), in which case it's no-op
   */
  public HCatStorer(String partSpecs, String pigSchema, String optString) throws Exception {
    super(partSpecs, pigSchema);
    String[] optsArr = optString.split(" ");
    CommandLine configuredOptions;
    try {
      configuredOptions = parser.parse(validOptions, optsArr);
    } catch (ParseException e) {
      HelpFormatter formatter = new HelpFormatter();
      formatter.printHelp( "[-" + ON_OOR_VALUE_OPT + "]", validOptions );
      throw e;
    }
    Properties udfProps = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign});
    //downstream code expects it to be set to a valid value
    udfProps.put(ON_OORA_VALUE_PROP, configuredOptions.getOptionValue(ON_OOR_VALUE_OPT, getDefaultValue().name()));
    if(LOG.isDebugEnabled()) {
      LOG.debug("setting " + configuredOptions.getOptionValue(ON_OOR_VALUE_OPT));
    }
    isValidOOROption((String)udfProps.get(ON_OORA_VALUE_PROP));
  }
  public HCatStorer(String partSpecs, String pigSchema) throws Exception {
    this(partSpecs, pigSchema, "");
  }
  public HCatStorer(String partSpecs) throws Exception {
    this(partSpecs, null, "");
  }

  public HCatStorer() throws Exception {
    this(null, null, "");
  }

  @Override
  public OutputFormat getOutputFormat() throws IOException {
    return new HCatOutputFormat();
  }

  /**
   * makes a list of all options that HCatStorer understands
   */
  private static void populateValidOptions() {
    validOptions.addOption(ON_OOR_VALUE_OPT, true,
      "Controls how store operation handles Pig values which are out of range for the target column" +
      "in Hive table.  Default is to throw an exception.");
  }
  /**
   * check that onOutOfRangeValue handling is configured properly
   * @throws FrontendException
   */
  private static void isValidOOROption(String optVal) throws FrontendException {
    boolean found = false;
    for(OOR_VALUE_OPT_VALUES v : OOR_VALUE_OPT_VALUES.values()) {
      if(v.name().equalsIgnoreCase(optVal)) {
        found = true;
        break;
      }
    }
    if(!found) {
      throw new FrontendException("Unexpected value for '" + ON_OOR_VALUE_OPT + "' found: " + optVal);
    }
  }
  /**
   * @param location databaseName.tableName
   */
  @Override
  public void setStoreLocation(String location, Job job) throws IOException {
    Configuration config = job.getConfiguration();
    config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign);
    Properties udfProps = UDFContext.getUDFContext().getUDFProperties(
      this.getClass(), new String[]{sign});
    String[] userStr = location.split("\\.");

    if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) {
      for (Enumeration<Object> emr = udfProps.keys(); emr.hasMoreElements(); ) {
        PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString());
      }
      Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign);
      if (crd != null) {
        job.getCredentials().addAll(crd);
      }
    } else {
      Job clone = new Job(job.getConfiguration());
      OutputJobInfo outputJobInfo;
      if (userStr.length == 2) {
        outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions);
      } else if (userStr.length == 1) {
        outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions);
      } else {
        throw new FrontendException("location " + location
          + " is invalid. It must be of the form [db.]table",
          PigHCatUtil.PIG_EXCEPTION_CODE);
      }
      Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA));
      if (schema != null) {
        pigSchema = schema;
      }
      if (pigSchema == null) {
        throw new FrontendException(
          "Schema for data cannot be determined.",
          PigHCatUtil.PIG_EXCEPTION_CODE);
      }
      String externalLocation = (String) udfProps.getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION);
      if (externalLocation != null) {
        outputJobInfo.setLocation(externalLocation);
      }
      try {
        HCatOutputFormat.setOutput(job, outputJobInfo);
      } catch (HCatException he) {
        // pass the message to the user - essentially something about
        // the table
        // information passed to HCatOutputFormat was not right
        throw new PigException(he.getMessage(),
          PigHCatUtil.PIG_EXCEPTION_CODE, he);
      }
      HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job.getConfiguration());
      try {
        doSchemaValidations(pigSchema, hcatTblSchema);
      } catch (HCatException he) {
        throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
      }
      computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema);
      HCatOutputFormat.setSchema(job, computedSchema);
      udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema));

      // We will store all the new /changed properties in the job in the
      // udf context, so the the HCatOutputFormat.setOutput and setSchema
      // methods need not be called many times.
      for (Entry<String, String> keyValue : job.getConfiguration()) {
        String oldValue = clone.getConfiguration().getRaw(keyValue.getKey());
        if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) {
          udfProps.put(keyValue.getKey(), keyValue.getValue());
        }
      }
      //Store credentials in a private hash map and not the udf context to
      // make sure they are not public.
      jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials());
      udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true);
    }
  }

  @Override
  public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException {
    ShimLoader.getHadoopShims().getHCatShim().commitJob(getOutputFormat(), job);
  }

  @Override
  public void cleanupOnFailure(String location, Job job) throws IOException {
    ShimLoader.getHadoopShims().getHCatShim().abortJob(getOutputFormat(), job);
  }
}
TOP

Related Classes of org.apache.hive.hcatalog.pig.HCatStorer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.