Package org.apache.giraph.hive.common

Source Code of org.apache.giraph.hive.common.HiveUtils

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.giraph.hive.common;

import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
import org.apache.giraph.conf.StrConfOption;
import org.apache.giraph.hive.input.mapping.HiveToMapping;
import org.apache.giraph.hive.input.edge.HiveToEdge;
import org.apache.giraph.hive.input.vertex.HiveToVertex;
import org.apache.giraph.hive.output.VertexToHive;
import org.apache.giraph.utils.ReflectionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.log4j.Logger;

import com.facebook.hiveio.schema.HiveTableSchema;
import com.facebook.hiveio.schema.HiveTableSchemas;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;

import static java.lang.System.getenv;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_EDGE_INPUT;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_MAPPING_INPUT;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_INPUT;
import static org.apache.giraph.hive.common.GiraphHiveConstants.VERTEX_TO_HIVE_CLASS;

/**
* Utility methods for Hive IO
*/
@SuppressWarnings("unchecked")
public class HiveUtils {
  /** Logger */
  private static final Logger LOG = Logger.getLogger(HiveUtils.class);

  /** Do not instantiate */
  private HiveUtils() {
  }

  /**
   * @param outputTablePartitionString table partition string
   * @return Map
   */
  public static Map<String, String> parsePartitionValues(
      String outputTablePartitionString) {
    if (outputTablePartitionString == null) {
      return null;
    }
    Splitter commaSplitter = Splitter.on(',').omitEmptyStrings().trimResults();
    Splitter equalSplitter = Splitter.on('=').omitEmptyStrings().trimResults();
    Map<String, String> partitionValues = Maps.newHashMap();
    for (String keyValStr : commaSplitter.split(outputTablePartitionString)) {
      List<String> keyVal = Lists.newArrayList(equalSplitter.split(keyValStr));
      if (keyVal.size() != 2) {
        throw new IllegalArgumentException(
            "Unrecognized partition value format: " +
                outputTablePartitionString);
      }
      partitionValues.put(keyVal.get(0), keyVal.get(1));
    }
    return partitionValues;
  }

  /**
   * Lookup index of column in {@link HiveTableSchema}, or throw if not found.
   *
   * @param schema {@link HiveTableSchema}
   * @param columnName column name
   * @return column index
   */
  public static int columnIndexOrThrow(HiveTableSchema schema,
      String columnName) {
    int index = schema.positionOf(columnName);
    if (index == -1) {
      throw new IllegalArgumentException("Column " + columnName +
          " not found in table " + schema.getTableDesc());
    }
    return index;
  }

  /**
   * Lookup index of column in {@link HiveTableSchema}, or throw if not found.
   *
   * @param schema {@link HiveTableSchema}
   * @param conf {@link Configuration}
   * @param confOption {@link StrConfOption}
   * @return column index
   */
  public static int columnIndexOrThrow(HiveTableSchema schema,
      Configuration conf, StrConfOption confOption) {
    String columnName = confOption.get(conf);
    if (columnName == null) {
      throw new IllegalArgumentException("Column " + confOption.getKey() +
          " not set in configuration");
    }
    return columnIndexOrThrow(schema, columnName);
  }

  /**
   * Add hive-site.xml file to tmpfiles in Configuration.
   *
   * @param conf Configuration
   */
  public static void addHiveSiteXmlToTmpFiles(Configuration conf) {
    // When output partitions are used, workers register them to the
    // metastore at cleanup stage, and on HiveConf's initialization, it
    // looks for hive-site.xml.
    addToHiveFromClassLoader(conf, "hive-site.xml");
  }

  /**
   * Add hive-site-custom.xml to tmpfiles in Configuration.
   *
   * @param conf Configuration
   */
  public static void addHiveSiteCustomXmlToTmpFiles(Configuration conf) {
    addToHiveFromClassLoader(conf, "hive-site-custom.xml");
    addToHiveFromEnv(conf, "HIVE_HOME", "conf/hive-site.xml");
  }

  /**
   * Add a file to Configuration tmpfiles from environment variable
   *
   * @param conf Configuration
   * @param envKey environment variable key
   * @param path search path
   * @return true if file found and added, false otherwise
   */
  private static boolean addToHiveFromEnv(Configuration conf,
      String envKey, String path) {
    String envValue = getenv(envKey);
    if (envValue == null) {
      return false;
    }
    File file = new File(envValue, path);
    if (file.exists()) {
      LOG.info("addToHiveFromEnv: Adding " + file.getPath() +
          " to Configuration tmpfiles");
    }
    try {
      addToStringCollection(conf, "tmpfiles", file.toURI().toURL().toString());
    } catch (MalformedURLException e) {
      LOG.error("Failed to get URL for file " + file);
    }
    return true;
  }

  /**
   * Add a file to Configuration tmpfiles from ClassLoader resource
   *
   * @param conf Configuration
   * @param name file name
   * @return true if file found in class loader, false otherwise
   */
  private static boolean addToHiveFromClassLoader(Configuration conf,
      String name) {
    URL url = conf.getClassLoader().getResource(name);
    if (url == null) {
      return false;
    }
    if (LOG.isInfoEnabled()) {
      LOG.info("addToHiveFromClassLoader: Adding " + name + " at " +
          url + " to Configuration tmpfiles");
    }
    addToStringCollection(conf, "tmpfiles", url.toString());
    return true;
  }

  /**
   * Add jars from HADOOP_CLASSPATH environment variable to tmpjars property
   * in Configuration.
   *
   * @param conf Configuration
   */
  public static void addHadoopClasspathToTmpJars(Configuration conf) {
    // Or, more effectively, we can provide all the jars client needed to
    // the workers as well
    String hadoopClasspath = getenv("HADOOP_CLASSPATH");
    if (hadoopClasspath == null) {
      return;
    }
    String[] hadoopJars = hadoopClasspath.split(File.pathSeparator);
    if (hadoopJars.length > 0) {
      List<String> hadoopJarURLs = Lists.newArrayList();
      for (String jarPath : hadoopJars) {
        File file = new File(jarPath);
        if (file.exists() && file.isFile()) {
          hadoopJarURLs.add(file.toURI().toString());
        }
      }
      HiveUtils.addToStringCollection(conf, "tmpjars", hadoopJarURLs);
    }
  }

  /**
   * Handle -hiveconf options, adding them to Configuration
   *
   * @param hiveconfArgs array of hiveconf args
   * @param conf Configuration
   */
  public static void processHiveconfOptions(String[] hiveconfArgs,
      Configuration conf) {
    for (String hiveconf : hiveconfArgs) {
      processHiveconfOption(conf, hiveconf);
    }
  }

  /**
   * Process -hiveconf option, adding it to Configuration appropriately.
   *
   * @param conf Configuration
   * @param hiveconf option to process
   */
  public static void processHiveconfOption(Configuration conf,
      String hiveconf) {
    String[] keyval = hiveconf.split("=", 2);
    if (keyval.length == 2) {
      String name = keyval[0];
      String value = keyval[1];
      if (name.equals("tmpjars") || name.equals("tmpfiles")) {
        addToStringCollection(conf, name, value);
      } else {
        conf.set(name, value);
      }
    }
  }

  /**
   * Add string to collection
   *
   * @param conf Configuration
   * @param key key to add
   * @param values values for collection
   */
  public static void addToStringCollection(Configuration conf, String key,
      String... values) {
    addToStringCollection(conf, key, Arrays.asList(values));
  }

  /**
   * Add string to collection
   *
   * @param conf Configuration
   * @param key to add
   * @param values values for collection
   */
  public static void addToStringCollection(
      Configuration conf, String key, Collection<String> values) {
    Collection<String> strings = conf.getStringCollection(key);
    strings.addAll(values);
    conf.setStrings(key, strings.toArray(new String[strings.size()]));
  }

  /**
   * Create a new VertexToHive
   *
   * @param <I> Vertex ID
   * @param <V> Vertex Value
   * @param <E> Edge Value
   * @param conf Configuration
   * @param schema Hive table schema
   * @return VertexToHive
   * @throws IOException on any instantiation errors
   */
  public static <I extends WritableComparable, V extends Writable,
      E extends Writable> VertexToHive<I, V, E> newVertexToHive(
      ImmutableClassesGiraphConfiguration<I, V, E> conf,
      HiveTableSchema schema) throws IOException {
    Class<? extends VertexToHive> klass = VERTEX_TO_HIVE_CLASS.get(conf);
    if (klass == null) {
      throw new IOException(VERTEX_TO_HIVE_CLASS.getKey() +
          " not set in conf");
    }
    return newInstance(klass, conf, schema);
  }

  /**
   * Create a new HiveToEdge
   *
   * @param <I> Vertex ID
   * @param <V> Vertex Value
   * @param <E> Edge Value
   * @param conf Configuration
   * @param schema Hive table schema
   * @return HiveToVertex
   */
  public static <I extends WritableComparable, V extends Writable,
        E extends Writable> HiveToEdge<I, E> newHiveToEdge(
      ImmutableClassesGiraphConfiguration<I, V, E> conf,
      HiveTableSchema schema) {
    Class<? extends HiveToEdge> klass = HIVE_EDGE_INPUT.getClass(conf);
    if (klass == null) {
      throw new IllegalArgumentException(
          HIVE_EDGE_INPUT.getClassOpt().getKey() + " not set in conf");
    }
    return newInstance(klass, conf, schema);
  }

  /**
   * Create a new HiveToVertex
   *
   * @param <I> Vertex ID
   * @param <V> Vertex Value
   * @param <E> Edge Value
   * @param conf Configuration
   * @param schema Hive table schema
   * @return HiveToVertex
   */
  public static <I extends WritableComparable, V extends Writable,
        E extends Writable> HiveToVertex<I, V, E> newHiveToVertex(
      ImmutableClassesGiraphConfiguration<I, V, E> conf,
      HiveTableSchema schema) {
    Class<? extends HiveToVertex> klass = HIVE_VERTEX_INPUT.getClass(conf);
    if (klass == null) {
      throw new IllegalArgumentException(
          HIVE_VERTEX_INPUT.getClassOpt().getKey() + " not set in conf");
    }
    return newInstance(klass, conf, schema);
  }

  /**
   * Create a new HiveToMapping
   *
   * @param conf ImmutableClassesGiraphConfiguration
   * @param schema HiveTableSchema
   * @param <I> vertexId type
   * @param <V> vertexValue type
   * @param <E> edgeValue type
   * @param <B> mappingTarget type
   * @return HiveToMapping
   */
  public static <I extends WritableComparable, V extends Writable,
    E extends Writable, B extends Writable>
  HiveToMapping<I, B> newHiveToMapping(
    ImmutableClassesGiraphConfiguration<I, V, E> conf,
    HiveTableSchema schema) {
    Class<? extends HiveToMapping> klass = HIVE_MAPPING_INPUT.getClass(conf);
    if (klass == null) {
      throw new IllegalArgumentException(
          HIVE_MAPPING_INPUT.getClassOpt().getKey() + " not set in conf"
      );
    }
    return newInstance(klass, conf, schema);
  }

  /**
   * Create a new instance of a class, configuring it and setting the Hive table
   * schema if it supports those types.
   *
   * @param klass Class to create
   * @param conf {@link ImmutableClassesGiraphConfiguration} to configure with
   * @param schema {@link HiveTableSchema} from Hive to set
   * @param <I> Vertex ID
   * @param <V> Vertex Value
   * @param <E> Edge Value
   * @param <T> type being created
   * @return new object of type <T>
   */
  public static
  <I extends WritableComparable, V extends Writable, E extends Writable, T>
  T newInstance(Class<T> klass,
      ImmutableClassesGiraphConfiguration<I, V, E> conf,
      HiveTableSchema schema) {
    T object = ReflectionUtils.<T>newInstance(klass, conf);
    HiveTableSchemas.configure(object, schema);
    return object;
  }
}
TOP

Related Classes of org.apache.giraph.hive.common.HiveUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.