Package org.apache.giraph.hive.jython

Source Code of org.apache.giraph.hive.jython.HiveJythonUtils

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.giraph.hive.jython;

import org.apache.giraph.conf.GiraphConstants;
import org.apache.giraph.conf.GiraphTypes;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
import org.apache.giraph.conf.StrConfOption;
import org.apache.giraph.graph.GraphType;
import org.apache.giraph.graph.Language;
import org.apache.giraph.hive.common.GiraphHiveConstants;
import org.apache.giraph.hive.common.HiveUtils;
import org.apache.giraph.hive.common.LanguageAndType;
import org.apache.giraph.hive.input.edge.HiveEdgeInputFormat;
import org.apache.giraph.hive.input.vertex.HiveVertexInputFormat;
import org.apache.giraph.hive.output.HiveVertexOutputFormat;
import org.apache.giraph.hive.primitives.PrimitiveValueReader;
import org.apache.giraph.hive.primitives.PrimitiveValueWriter;
import org.apache.giraph.hive.values.HiveValueReader;
import org.apache.giraph.hive.values.HiveValueWriter;
import org.apache.giraph.io.formats.multi.MultiEdgeInputFormat;
import org.apache.giraph.io.formats.multi.MultiVertexInputFormat;
import org.apache.giraph.jython.factories.JythonEdgeValueFactory;
import org.apache.giraph.jython.factories.JythonFactoryBase;
import org.apache.giraph.jython.factories.JythonIncomingMessageValueFactory;
import org.apache.giraph.jython.JythonJob;
import org.apache.giraph.jython.factories.JythonOutgoingMessageValueFactory;
import org.apache.giraph.jython.JythonUtils;
import org.apache.giraph.jython.factories.JythonVertexIdFactory;
import org.apache.giraph.jython.factories.JythonVertexValueFactory;
import org.apache.giraph.jython.wrappers.JythonWritableWrapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.log4j.Logger;
import org.python.core.Py;
import org.python.core.PyClass;
import org.python.core.PyObject;
import org.python.core.PyType;
import org.python.util.PythonInterpreter;

import com.facebook.hiveio.schema.HiveTableSchema;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.io.Closeables;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.giraph.conf.GiraphConstants.EDGE_INPUT_FORMAT_CLASS;
import static org.apache.giraph.conf.GiraphConstants.GRAPH_TYPE_LANGUAGES;
import static org.apache.giraph.conf.GiraphConstants.MAX_WORKERS;
import static org.apache.giraph.conf.GiraphConstants.MIN_WORKERS;
import static org.apache.giraph.conf.GiraphConstants.MESSAGE_COMBINER_CLASS;
import static org.apache.giraph.conf.GiraphConstants.VERTEX_INPUT_FORMAT_CLASS;
import static org.apache.giraph.conf.GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_EDGE_INPUT;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_INPUT;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_OUTPUT_DATABASE;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_OUTPUT_PARTITION;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_OUTPUT_PROFILE_ID;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_OUTPUT_TABLE;
import static org.apache.giraph.hive.common.GiraphHiveConstants.VERTEX_TO_HIVE_CLASS;
import static org.apache.giraph.hive.common.GiraphHiveConstants.VERTEX_VALUE_READER_JYTHON_NAME;
import static org.apache.giraph.hive.common.GiraphHiveConstants.VERTEX_VALUE_WRITER_JYTHON_NAME;
import static org.apache.giraph.hive.jython.JythonHiveToEdge.EDGE_SOURCE_ID_COLUMN;
import static org.apache.giraph.hive.jython.JythonHiveToEdge.EDGE_TARGET_ID_COLUMN;
import static org.apache.giraph.hive.jython.JythonHiveToEdge.EDGE_VALUE_COLUMN;
import static org.apache.giraph.hive.jython.JythonVertexToHive.VERTEX_VALUE_COLUMN;

/**
* Plugin to {@link HiveJythonRunner} to use Hive.
*/
public class HiveJythonUtils {
  /** Logger */
  private static final Logger LOG = Logger.getLogger(HiveJythonUtils.class);

  /** Don't construct */
  private HiveJythonUtils() { }

  /**
   * Process command line arguments
   *
   * @param args cmdline args
   * @param conf {@link Configuration}
   * @return remaining cmdline args to process
   */
  public static String[] processArgs(String[] args, Configuration conf) {
    HiveUtils.addHadoopClasspathToTmpJars(conf);
    HiveUtils.addHiveSiteXmlToTmpFiles(conf);
    HiveUtils.addHiveSiteCustomXmlToTmpFiles(conf);
    return moveHiveconfOptionsToConf(args, conf);
  }

  /**
   * Remove -hiveconf options from cmdline
   *
   * @param args cmdline args
   * @param conf Configuration
   * @return cmdline args without -hiveconf options
   */
  private static String[] moveHiveconfOptionsToConf(String[] args,
      Configuration conf) {
    int start = 0;
    while (start < args.length) {
      if (args[start].endsWith("hiveconf")) {
        HiveUtils.processHiveconfOption(conf, args[start + 1]);
        start += 2;
      } else {
        break;
      }
    }
    return Arrays.copyOfRange(args, start, args.length);
  }

  /**
   * Parse set of Jython scripts from local files
   *
   * @param interpreter PythonInterpreter to use
   * @param paths Jython files to parse
   * @return JythonJob
   * @throws java.io.IOException
   */
  public static JythonJob parseJythonFiles(PythonInterpreter interpreter,
      String ... paths) throws IOException {
    return parseJythonFiles(interpreter, Arrays.asList(paths));
  }

  /**
   * Parse set of Jython scripts from local files
   *
   * @param interpreter PythonInterpreter to use
   * @param paths Jython files to parse
   * @return JythonJob
   * @throws IOException
   */
  public static JythonJob parseJythonFiles(PythonInterpreter interpreter,
      List<String> paths) throws IOException {
    InputStream[] streams = new InputStream[paths.size()];
    for (int i = 0; i < paths.size(); ++i) {
      LOG.info("Reading jython file " + paths.get(i));
      streams[i] = new FileInputStream(paths.get(i));
    }

    JythonJob jythonJob;
    try {
      jythonJob = parseJythonStreams(interpreter, streams);
    } finally {
      for (InputStream stream : streams) {
        Closeables.close(stream, true);
      }
    }
    return jythonJob;
  }

  /**
   * Parse scripts from Jython InputStreams
   *
   * @param interpreter PythonInterpreter
   * @param streams InputStreams to parse
   * @return JythonJob
   */
  public static JythonJob parseJythonStreams(PythonInterpreter interpreter,
      InputStream ... streams) {
    for (InputStream stream : streams) {
      readJythonStream(interpreter, stream);
    }

    PyObject pyPrepare = interpreter.get("prepare");

    JythonJob jythonJob = new JythonJob();
    pyPrepare._jcall(new Object[]{jythonJob});

    return jythonJob;
  }

  /**
   * Execute a Jython script
   *
   * @param interpreter Jython interpreter to use
   * @param jythonStream {@link java.io.InputStream} with Jython code
   * @throws java.io.IOException
   */
  private static void readJythonStream(PythonInterpreter interpreter,
      InputStream jythonStream) {
    try {
      interpreter.execfile(jythonStream);
    } finally {
      try {
        jythonStream.close();
      } catch (IOException e) {
        LOG.error("Failed to close jython stream " + jythonStream);
      }
    }
  }

  /**
   * Set arbitrary option of unknown type in Configuration
   *
   * @param conf Configuration
   * @param key String key
   * @param value Object to set
   */
  private static void setOption(Configuration conf, String key,
      Object value) {
    if (value instanceof Boolean) {
      conf.getBoolean(key, (Boolean) value);
    } else if (value instanceof Byte || value instanceof Short ||
        value instanceof Integer) {
      conf.setInt(key, ((Number) value).intValue());
    } else if (value instanceof Long) {
      conf.setLong(key, (Long) value);
    } else if (value instanceof Float || value instanceof Double) {
      conf.setFloat(key, ((Number) value).floatValue());
    } else if (value instanceof String) {
      conf.set(key, value.toString());
    } else if (value instanceof Class) {
      conf.set(key, ((Class) value).getName());
    } else {
      throw new IllegalArgumentException(
          "Don't know how to handle option key: " + key +
          ", value: " + value + ", value type: " + value.getClass());
    }
  }

  /**
   * Write JythonJob to Configuration
   *
   * @param jythonJob JythonJob
   * @param conf Configuration
   * @param interpreter PythonInterpreter
   * @return name of Job
   */
  public static String writeJythonJobToConf(JythonJob jythonJob,
      Configuration conf, PythonInterpreter interpreter) {
    checkJob(jythonJob);

    JythonUtils.init(conf, jythonJob.getComputation_name());

    if (jythonJob.getMessageCombiner() != null) {
      MESSAGE_COMBINER_CLASS.set(conf, jythonJob.getMessageCombiner());
    }

    conf.setInt(MIN_WORKERS, jythonJob.getWorkers());
    conf.setInt(MAX_WORKERS, jythonJob.getWorkers());

    String javaOptions = Joiner.on(' ').join(jythonJob.getJava_options());
    conf.set("mapred.child.java.opts", javaOptions);

    Map<String, Object> options = jythonJob.getGiraph_options();
    for (Map.Entry<String, Object> entry : options.entrySet()) {
      setOption(conf, entry.getKey(), entry.getValue());
    }

    setPool(conf, jythonJob);

    initHiveReadersWriters(conf, jythonJob, interpreter);
    initGraphTypes(conf, jythonJob, interpreter);
    initOutput(conf, jythonJob);
    initVertexInputs(conf, jythonJob);
    initEdgeInputs(conf, jythonJob);

    String name = jythonJob.getName();
    if (name == null) {
      name = jythonJob.getComputation_name();
    }
    return name;
  }

  /**
   * Set the hadoop mapreduce pool
   *
   * @param conf Configuration
   * @param job the job info
   */
  private static void setPool(Configuration conf, JythonJob job) {
    if (job.getPool() == null) {
      if (job.getWorkers() < 50) {
        job.setPool("graph.test");
      } else {
        job.setPool("graph.production");
      }
    }
    conf.set("mapred.fairscheduler.pool", job.getPool());
  }

  /**
   * Check that the job is valid
   *
   * @param jythonJob JythonJob
   */
  private static void checkJob(JythonJob jythonJob) {
    checkNotNull(jythonJob.getComputation_name(),
        "computation_name cannot be null");
    checkTypeNotNull(jythonJob.getVertex_id(), GraphType.VERTEX_ID);
    checkTypeNotNull(jythonJob.getVertex_value(), GraphType.VERTEX_VALUE);
    checkTypeNotNull(jythonJob.getEdge_value(), GraphType.EDGE_VALUE);
    checkMessageTypes(jythonJob);
  }

  /**
   * Check if job has edge inputs
   *
   * @param jythonJob JythonJob
   * @return true if job has edge inputs, false otherwise
   */
  private static boolean hasEdgeInputs(JythonJob jythonJob) {
    return !jythonJob.getEdge_inputs().isEmpty();
  }

  /**
   * Check if job has vertex inputs
   *
   * @param jythonJob JythonJob
   * @return true if job has vertex inputs, false otherwise
   */
  private static boolean hasVertexInputs(JythonJob jythonJob) {
    return !jythonJob.getVertex_inputs().isEmpty();
  }

  /**
   * Check that type is present
   *
   * @param typeHolder TypeHolder
   * @param graphType GraphType
   */
  private static void checkTypeNotNull(JythonJob.TypeHolder typeHolder,
      GraphType graphType) {
    checkNotNull(typeHolder.getType(), graphType + ".type not present");
  }

  /**
   * Initialize the job types
   *
   * @param conf Configuration
   * @param jythonJob the job info
   * @param interpreter PythonInterpreter to use
   */
  private static void initGraphTypes(Configuration conf,
      JythonJob jythonJob, PythonInterpreter interpreter) {
    GiraphTypes types = new GiraphTypes();
    types.setVertexIdClass(initValueType(conf, GraphType.VERTEX_ID,
        jythonJob.getVertex_id().getType(), new JythonVertexIdFactory(),
        interpreter));
    types.setVertexValueClass(initValueType(conf, GraphType.VERTEX_VALUE,
        jythonJob.getVertex_value().getType(), new JythonVertexValueFactory(),
        interpreter));
    types.setEdgeValueClass(initValueType(conf, GraphType.EDGE_VALUE,
        jythonJob.getEdge_value().getType(), new JythonEdgeValueFactory(),
        interpreter));
    types.setIncomingMessageValueClass(
        initValueType(conf, GraphType.INCOMING_MESSAGE_VALUE,
            jythonJob.getIncoming_message_value().getType(),
            new JythonIncomingMessageValueFactory(), interpreter));
    types.setOutgoingMessageValueClass(
        initValueType(conf, GraphType.OUTGOING_MESSAGE_VALUE,
            jythonJob.getOutgoing_message_value().getType(),
            new JythonOutgoingMessageValueFactory(), interpreter));
    types.writeTo(conf);
  }

  /**
   * Initialize a graph type (IVEMM)
   *
   * @param conf Configuration
   * @param graphType GraphType
   * @param jythonOrJavaClass jython or java class given by user
   * @param jythonFactory Jactory for making Jython types
   * @param interpreter PythonInterpreter
   * @return Class for Configuration
   */
  private static Class initValueType(Configuration conf, GraphType graphType,
      Object jythonOrJavaClass, JythonFactoryBase jythonFactory,
      PythonInterpreter interpreter) {
    Class<? extends Writable> writableClass = graphType.interfaceClass();
    LanguageAndType langType = processUserType(jythonOrJavaClass, interpreter);

    switch (langType.getLanguage()) {
    case JAVA:
      GRAPH_TYPE_LANGUAGES.set(conf, graphType, Language.JAVA);
      checkImplements(langType, writableClass, interpreter);
      return langType.getJavaClass();
    case JYTHON:
      GRAPH_TYPE_LANGUAGES.set(conf, graphType, Language.JYTHON);
      String jythonClassName = langType.getJythonClassName();
      PyObject jythonClass = interpreter.get(jythonClassName);
      if (jythonClass == null) {
        throw new IllegalArgumentException("Could not find Jython class " +
            jythonClassName + " for parameter " + graphType);
      }
      PyObject valuePyObj = jythonClass.__call__();

      // Check if the Jython type implements Writable. If so, just use it
      // directly. Otherwise, wrap it in a class that does using pickle.
      Object pyWritable = valuePyObj.__tojava__(writableClass);
      if (pyWritable.equals(Py.NoConversion)) {
        GiraphConstants.GRAPH_TYPES_NEEDS_WRAPPERS.set(conf, graphType, true);
        jythonFactory.useThisFactory(conf, jythonClassName);
        return JythonWritableWrapper.class;
      } else {
        GiraphConstants.GRAPH_TYPES_NEEDS_WRAPPERS.set(conf, graphType, false);
        jythonFactory.useThisFactory(conf, jythonClassName);
        return writableClass;
      }
    default:
      throw new IllegalArgumentException("Don't know how to handle " +
          LanguageAndType.class.getSimpleName() + " with language " +
          langType.getLanguage());
    }
  }

  /**
   * Check that the incoming / outgoing message value types are present.
   *
   * @param jythonJob JythonJob
   */
  private static void checkMessageTypes(JythonJob jythonJob) {
    checkMessageType(jythonJob.getIncoming_message_value(),
        GraphType.INCOMING_MESSAGE_VALUE, jythonJob);
    checkMessageType(jythonJob.getOutgoing_message_value(),
        GraphType.OUTGOING_MESSAGE_VALUE, jythonJob);
  }

  /**
   * Check that given message value type is present.
   *
   * @param msgTypeHolder Incoming or outgoing message type holder
   * @param graphType The graph type
   * @param jythonJob JythonJob
   */
  private static void checkMessageType(JythonJob.TypeHolder msgTypeHolder,
      GraphType graphType, JythonJob jythonJob) {
    if (msgTypeHolder.getType() == null) {
      Object msgValueType = jythonJob.getMessage_value().getType();
      checkNotNull(msgValueType, graphType + ".type and " +
          "message_value.type cannot both be empty");
      msgTypeHolder.setType(msgValueType);
    }
  }

  /**
   * Check that the vertex ID, vertex value, and edge value Hive info is valid.
   *
   * @param conf Configuration
   * @param jythonJob JythonJob
   * @param interpreter PythonInterpreter
   */
  private static void initHiveReadersWriters(Configuration conf,
      JythonJob jythonJob, PythonInterpreter interpreter) {
    if (!userTypeIsJavaPrimitiveWritable(jythonJob.getVertex_id())) {
      checkTypeWithHive(jythonJob.getVertex_id(), GraphType.VERTEX_ID);

      LanguageAndType idReader = processUserType(
          jythonJob.getVertex_id().getHive_reader(), interpreter);
      checkImplements(idReader, JythonHiveReader.class, interpreter);
      checkArgument(idReader.getLanguage() == Language.JYTHON);
      GiraphHiveConstants.VERTEX_ID_READER_JYTHON_NAME.set(conf,
          idReader.getJythonClassName());

      LanguageAndType idWriter = processUserType(
          jythonJob.getVertex_id().getHive_writer(), interpreter);
      checkImplements(idWriter, JythonHiveWriter.class, interpreter);
      checkArgument(idWriter.getLanguage() == Language.JYTHON);
      GiraphHiveConstants.VERTEX_ID_WRITER_JYTHON_NAME.set(conf,
          idWriter.getJythonClassName());
    }

    if (hasVertexInputs(jythonJob) &&
        !userTypeIsJavaPrimitiveWritable(jythonJob.getVertex_value())) {
      checkTypeWithHive(jythonJob.getVertex_value(), GraphType.VERTEX_VALUE);

      LanguageAndType valueReader = processUserType(
          jythonJob.getVertex_value().getHive_reader(), interpreter);
      checkImplements(valueReader, JythonHiveReader.class, interpreter);
      checkArgument(valueReader.getLanguage() == Language.JYTHON);
      VERTEX_VALUE_READER_JYTHON_NAME.set(conf,
          valueReader.getJythonClassName());

      LanguageAndType valueWriter = processUserType(
          jythonJob.getVertex_value().getHive_writer(), interpreter);
      checkImplements(valueWriter, JythonHiveWriter.class, interpreter);
      checkArgument(valueWriter.getLanguage() == Language.JYTHON);
      VERTEX_VALUE_WRITER_JYTHON_NAME.set(conf,
          valueWriter.getJythonClassName());
    }

    if (hasEdgeInputs(jythonJob) &&
        !userTypeIsJavaPrimitiveWritable(jythonJob.getEdge_value())) {
      checkNotNull(jythonJob.getEdge_value().getHive_reader(),
          "edge_value.hive_reader cannot be null");

      LanguageAndType edgeReader = processUserType(
          jythonJob.getEdge_value().getHive_reader(), interpreter);
      checkImplements(edgeReader, JythonHiveReader.class, interpreter);
      checkArgument(edgeReader.getLanguage() == Language.JYTHON);
      GiraphHiveConstants.EDGE_VALUE_READER_JYTHON_NAME.set(conf,
          edgeReader.getJythonClassName());
    }
  }

  /**
   * Verify Jython class is present and implements the Java type
   *
   * @param interpreter PythonInterpreter
   * @param valueFromUser Jython class or name of class
   * @return name of Jython class
   */
  private static LanguageAndType processUserType(Object valueFromUser,
      PythonInterpreter interpreter) {
    // user gave a Class object, should be either Java or Jython class name
    if (valueFromUser instanceof Class) {
      Class valueClass = (Class) valueFromUser;
      String jythonClassName = extractJythonClass(valueClass);
      if (jythonClassName != null) {
        // Jython class
        return processJythonType(jythonClassName, interpreter);
      } else {
        // Java class
        return LanguageAndType.java(valueClass);
      }

      // user gave a string, should be either Java or Jython class name
    } else if (valueFromUser instanceof String) {
      String valueStr = (String) valueFromUser;
      Class valueClass;
      try {
        // Try to find Java class with name
        valueClass = Class.forName(valueStr);
        return LanguageAndType.java(valueClass);
      } catch (ClassNotFoundException e) {
        // Java class not found, try to find a Jython one
        return processJythonType(valueStr, interpreter);
      }

      // user gave a PyClass, process as a Jython class
    } else if (valueFromUser instanceof PyClass) {
      PyClass userPyClass = (PyClass) valueFromUser;
      return processJythonType(userPyClass.__name__, interpreter);

      // user gave a PyType, process as Jython class
    } else if (valueFromUser instanceof PyType) {
      PyType userPyType = (PyType) valueFromUser;
      return processJythonType(userPyType.getName(), interpreter);

      // Otherwise, don't know how to handle this, so error
    } else {
      throw new IllegalArgumentException("Don't know how to handle " +
          valueFromUser + " of class " + valueFromUser.getClass() +
          ", needs to be Class or String");
    }
  }

  /**
   * Check that a type implements a Java interface
   *
   * @param langType type with langauge
   * @param interfaceClass java interface class
   * @param interpreter PythonInterpreter
   */
  private static void checkImplements(LanguageAndType langType,
      Class interfaceClass, PythonInterpreter interpreter) {
    switch (langType.getLanguage()) {
    case JAVA:
      checkArgument(interfaceClass.isAssignableFrom(langType.getJavaClass()),
          langType.getJavaClass().getSimpleName() + " needs to implement " +
          interfaceClass.getSimpleName());
      break;
    case JYTHON:
      PyObject pyClass = interpreter.get(langType.getJythonClassName());
      PyObject pyObj = pyClass.__call__();
      Object converted = pyObj.__tojava__(interfaceClass);
      checkArgument(!Py.NoConversion.equals(converted),
         "Jython class " + langType.getJythonClassName() +
         " does not implement " + interfaceClass.getSimpleName() +
         " interface");
      break;
    default:
      throw new IllegalArgumentException("Don't know how to handle " +
          "language " + langType.getLanguage());
    }
  }

  /**
   * Verify Jython class is present and implements specified type
   *
   * @param jythonName Jython class name
   * @param interpreter PythonInterpreter
   * @return language and type specification
   */
  private static LanguageAndType processJythonType(String jythonName,
      PythonInterpreter interpreter) {
    PyObject pyClass = interpreter.get(jythonName);
    checkNotNull(pyClass, "Jython class " + jythonName + " not found");
    return LanguageAndType.jython(jythonName);
  }

  /**
   * Check that the given value type is valid
   *
   * @param typeWithHive value type
   * @param graphType GraphType
   */
  private static void checkTypeWithHive(JythonJob.TypeWithHive typeWithHive,
      GraphType graphType) {
    if (typeWithHive.getHive_reader() == null) {
      checkNotNull(typeWithHive.getHive_io(), graphType + ".hive_reader and " +
          graphType + ".hive_io cannot both be empty");
      typeWithHive.setHive_reader(typeWithHive.getHive_io());
    }
    if (typeWithHive.getHive_writer() == null) {
      checkNotNull(typeWithHive.getHive_io(), graphType + ".hive_writer and " +
          graphType + ".hive_io cannot both be empty");
      typeWithHive.setHive_writer(typeWithHive.getHive_io());
    }
  }

  /**
   * Create a graph value (IVEMM) reader
   *
   * @param <T> graph value type
   * @param schema {@link com.facebook.hiveio.schema.HiveTableSchema}
   * @param columnOption option for column name
   * @param conf {@link ImmutableClassesGiraphConfiguration}
   * @param graphType GraphType creating a reader for
   * @param jythonClassNameOption option for jython class option
   * @return {@link org.apache.giraph.hive.values.HiveValueReader}
   */
  public static <T extends Writable> HiveValueReader<T> newValueReader(
      HiveTableSchema schema, StrConfOption columnOption,
      ImmutableClassesGiraphConfiguration conf, GraphType graphType,
      StrConfOption jythonClassNameOption) {
    HiveValueReader<T> reader;
    if (HiveJythonUtils.isPrimitiveWritable(graphType.get(conf))) {
      reader = PrimitiveValueReader.create(conf, graphType, columnOption,
          schema);
    } else if (jythonClassNameOption.contains(conf)) {
      reader = JythonColumnReader.create(conf, jythonClassNameOption,
          columnOption, schema);
    } else {
      throw new IllegalArgumentException("Don't know how to read " + graphType +
          " of class " + graphType.get(conf) + " which is not primitive and" +
          " no " + JythonHiveReader.class.getSimpleName() + " is set");
    }
    return reader;
  }

  /**
   * Create a graph value (IVEMM) writer
   *
   * @param <T> writable type
   * @param schema {@link HiveTableSchema}
   * @param columnOption option for column
   * @param conf {@link ImmutableClassesGiraphConfiguration}
   * @param graphType {@link GraphType}
   * @param jythonClassNameOption option for name of jython class
   * @return {@link HiveValueWriter}
   */
  public static <T extends Writable> HiveValueWriter<T>
  newValueWriter(HiveTableSchema schema, StrConfOption columnOption,
      ImmutableClassesGiraphConfiguration conf, GraphType graphType,
      StrConfOption jythonClassNameOption) {
    HiveValueWriter<T> writer;
    if (HiveJythonUtils.isPrimitiveWritable(graphType.get(conf))) {
      writer = PrimitiveValueWriter.create(conf, columnOption,
          schema, graphType);
    } else if (jythonClassNameOption.contains(conf)) {
      writer = JythonColumnWriter.create(conf, jythonClassNameOption,
          columnOption, schema);
    } else {
      throw new IllegalArgumentException("Don't know how to write " +
          graphType +  " of class " + graphType.get(conf) +
          " which is not primitive and no " +
          JythonHiveWriter.class.getSimpleName() + " is set");
    }
    return writer;
  }

  /**
   * Extract Jython class name from a user set proxy Jython class.
   *
   * For example:
   *    job.vertex_value_type = FakeLPVertexValue
   * Yields:
   *    org.python.proxies.__main__$FakeLPVertexValue$0
   * This method extracts:
   *    FakeLPVertexValue
   *
   * @param klass Jython proxy class
   * @return Jython class name
   */
  private static String extractJythonClass(Class klass) {
    if (!isJythonClass(klass)) {
      return null;
    }
    Iterable<String> parts = Splitter.on('$').split(klass.getSimpleName());
    if (Iterables.size(parts) != 3) {
      return null;
    }
    Iterator<String> partsIter = parts.iterator();
    partsIter.next();
    return partsIter.next();
  }

  /**
   * Check if passed in class is a Jython class
   *
   * @param klass to check
   * @return true if Jython class, false otherwise
   */
  private static boolean isJythonClass(Class klass) {
    return klass.getCanonicalName().startsWith("org.python.proxies");
  }

  /**
   * Initialize edge input
   *
   * @param conf Configuration
   * @param jythonJob data to initialize
   */
  private static void initEdgeInputs(Configuration conf, JythonJob jythonJob) {
    List<JythonJob.EdgeInput> edgeInputs = jythonJob.getEdge_inputs();
    if (!edgeInputs.isEmpty()) {
      if (edgeInputs.size() == 1) {
        EDGE_INPUT_FORMAT_CLASS.set(conf, HiveEdgeInputFormat.class);
        JythonJob.EdgeInput edgeInput = edgeInputs.get(0);
        checkEdgeInput(edgeInput);
        LOG.info("Setting edge input using: " + edgeInput);

        HIVE_EDGE_INPUT.getDatabaseOpt().set(conf,
            jythonJob.getHive_database());
        HIVE_EDGE_INPUT.getTableOpt().set(conf, edgeInput.getTable());
        if (edgeInput.getPartition_filter() != null) {
          HIVE_EDGE_INPUT.getPartitionOpt().set(conf,
              edgeInput.getPartition_filter());
        }
        HIVE_EDGE_INPUT.getClassOpt().set(conf, JythonHiveToEdge.class);

        EDGE_SOURCE_ID_COLUMN.set(conf, edgeInput.getSource_id_column());
        EDGE_TARGET_ID_COLUMN.set(conf, edgeInput.getTarget_id_column());
        if (edgeInput.getValue_column() != null) {
          EDGE_VALUE_COLUMN.set(conf, edgeInput.getValue_column());
        }
      } else {
        EDGE_INPUT_FORMAT_CLASS.set(conf, MultiEdgeInputFormat.class);
        throw new IllegalArgumentException(
            "Multiple edge inputs not supported yet: " + edgeInputs);
      }
    }
  }

  /**
   * Check that the edge input is valid
   *
   * @param edgeInput data to check
   */
  private static void checkEdgeInput(JythonJob.EdgeInput edgeInput) {
    checkNotNull(edgeInput.getTable(), "EdgeInput table name needs to be set");
    checkNotNull(edgeInput.getSource_id_column(),
        "EdgeInput source ID column needs to be set");
    checkNotNull(edgeInput.getTarget_id_column(),
        "EdgeInput target ID column needs to be set");
  }

  /**
   * Initialize vertex output info
   *
   * @param conf Configuration
   * @param jythonJob the job info
   */
  private static void initVertexInputs(Configuration conf,
      JythonJob jythonJob) {
    List<JythonJob.VertexInput> vertexInputs = jythonJob.getVertex_inputs();
    if (!vertexInputs.isEmpty()) {
      if (vertexInputs.size() == 1) {
        VERTEX_INPUT_FORMAT_CLASS.set(conf, HiveVertexInputFormat.class);
        JythonJob.VertexInput vertexInput = vertexInputs.get(0);
        checkVertexInput(vertexInput);
        LOG.info("Setting vertex input using: " + vertexInput);

        HIVE_VERTEX_INPUT.getDatabaseOpt().set(conf,
            jythonJob.getHive_database());
        HIVE_VERTEX_INPUT.getTableOpt().set(conf, vertexInput.getTable());
        if (vertexInput.getPartition_filter() != null) {
          HIVE_VERTEX_INPUT.getPartitionOpt().set(conf,
              vertexInput.getPartition_filter());
        }
        HIVE_VERTEX_INPUT.getClassOpt().set(conf, JythonHiveToVertex.class);

        JythonHiveToVertex.VERTEX_ID_COLUMN.set(conf,
            vertexInput.getId_column());
        if (vertexInput.getValue_column() != null) {
          JythonHiveToVertex.VERTEX_VALUE_COLUMN.set(conf,
              vertexInput.getValue_column());
        }
      } else {
        VERTEX_INPUT_FORMAT_CLASS.set(conf, MultiVertexInputFormat.class);
        throw new IllegalArgumentException(
            "Multiple vertex inputs not supported yet: " + vertexInputs);
      }
    }
  }

  /**
   * Check that the vertex input info is valid
   *
   * @param vertexInput data to check
   */
  private static void checkVertexInput(JythonJob.VertexInput vertexInput) {
    checkNotNull(vertexInput.getTable(),
        "VertexInput table name needs to be set");
    checkNotNull(vertexInput.getId_column(),
        "VertexInput ID column needs to be set");
  }

  /**
   * Check if the writable is holding a primitive type
   *
   * @param klass Writable class
   * @return true if writable is holding primitive
   */
  public static boolean isPrimitiveWritable(Class klass) {
    return NullWritable.class.equals(klass) ||
        BooleanWritable.class.equals(klass) ||
        ByteWritable.class.equals(klass) ||
        IntWritable.class.equals(klass) ||
        LongWritable.class.equals(klass) ||
        FloatWritable.class.equals(klass) ||
        DoubleWritable.class.equals(klass);
  }

  /**
   * Tell whether the user type given is a primitive writable
   *
   * @param typeHolder TypeHolder
   * @return true if type is a Java primitive writable
   */
  public static boolean userTypeIsJavaPrimitiveWritable(
      JythonJob.TypeHolder typeHolder) {
    Object type = typeHolder.getType();
    if (type instanceof Class) {
      return isPrimitiveWritable((Class) type);
    } else if (type instanceof String) {
      try {
        Class klass = Class.forName((String) type);
        return isPrimitiveWritable(klass);
      } catch (ClassNotFoundException e) {
        return false;
      }
    } else {
      return false;
    }
  }

  /**
   * Initialize output info
   *
   * @param conf Configuration
   * @param jythonJob the job info
   */
  private static void initOutput(Configuration conf, JythonJob jythonJob) {
    JythonJob.VertexOutput vertexOutput = jythonJob.getVertex_output();
    if (vertexOutput.getTable() != null) {
      LOG.info("Setting vertex output using: " + vertexOutput);
      VERTEX_OUTPUT_FORMAT_CLASS.set(conf, HiveVertexOutputFormat.class);
      VERTEX_TO_HIVE_CLASS.set(conf, JythonVertexToHive.class);
      JythonVertexToHive.VERTEX_ID_COLUMN.set(conf,
          vertexOutput.getId_column());
      VERTEX_VALUE_COLUMN.set(conf, vertexOutput.getValue_column());
      HIVE_VERTEX_OUTPUT_DATABASE.set(conf, jythonJob.getHive_database());
      HIVE_VERTEX_OUTPUT_PROFILE_ID.set(conf, "vertex_output_profile");
      HIVE_VERTEX_OUTPUT_TABLE.set(conf, vertexOutput.getTable());
      if (vertexOutput.getPartition() != null) {
        HIVE_VERTEX_OUTPUT_PARTITION.set(conf,
            makePartitionString(vertexOutput.getPartition()));
      }
    }
  }

  /**
   * Create partition string
   *
   * @param parts partition pieces
   * @return partition string
   */
  private static String makePartitionString(Map<String, String> parts) {
    return Joiner.on(",").withKeyValueSeparator("=").join(parts);
  }
}
TOP

Related Classes of org.apache.giraph.hive.jython.HiveJythonUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.