/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.giraph.hive.jython;
import org.apache.giraph.conf.GiraphConstants;
import org.apache.giraph.conf.GiraphTypes;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
import org.apache.giraph.conf.StrConfOption;
import org.apache.giraph.graph.GraphType;
import org.apache.giraph.graph.Language;
import org.apache.giraph.hive.common.GiraphHiveConstants;
import org.apache.giraph.hive.common.HiveUtils;
import org.apache.giraph.hive.common.LanguageAndType;
import org.apache.giraph.hive.input.edge.HiveEdgeInputFormat;
import org.apache.giraph.hive.input.vertex.HiveVertexInputFormat;
import org.apache.giraph.hive.output.HiveVertexOutputFormat;
import org.apache.giraph.hive.primitives.PrimitiveValueReader;
import org.apache.giraph.hive.primitives.PrimitiveValueWriter;
import org.apache.giraph.hive.values.HiveValueReader;
import org.apache.giraph.hive.values.HiveValueWriter;
import org.apache.giraph.io.formats.multi.MultiEdgeInputFormat;
import org.apache.giraph.io.formats.multi.MultiVertexInputFormat;
import org.apache.giraph.jython.factories.JythonEdgeValueFactory;
import org.apache.giraph.jython.factories.JythonFactoryBase;
import org.apache.giraph.jython.factories.JythonIncomingMessageValueFactory;
import org.apache.giraph.jython.JythonJob;
import org.apache.giraph.jython.factories.JythonOutgoingMessageValueFactory;
import org.apache.giraph.jython.JythonUtils;
import org.apache.giraph.jython.factories.JythonVertexIdFactory;
import org.apache.giraph.jython.factories.JythonVertexValueFactory;
import org.apache.giraph.jython.wrappers.JythonWritableWrapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.log4j.Logger;
import org.python.core.Py;
import org.python.core.PyClass;
import org.python.core.PyObject;
import org.python.core.PyType;
import org.python.util.PythonInterpreter;
import com.facebook.hiveio.schema.HiveTableSchema;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.io.Closeables;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.giraph.conf.GiraphConstants.EDGE_INPUT_FORMAT_CLASS;
import static org.apache.giraph.conf.GiraphConstants.GRAPH_TYPE_LANGUAGES;
import static org.apache.giraph.conf.GiraphConstants.MAX_WORKERS;
import static org.apache.giraph.conf.GiraphConstants.MIN_WORKERS;
import static org.apache.giraph.conf.GiraphConstants.MESSAGE_COMBINER_CLASS;
import static org.apache.giraph.conf.GiraphConstants.VERTEX_INPUT_FORMAT_CLASS;
import static org.apache.giraph.conf.GiraphConstants.VERTEX_OUTPUT_FORMAT_CLASS;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_EDGE_INPUT;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_INPUT;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_OUTPUT_DATABASE;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_OUTPUT_PARTITION;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_OUTPUT_PROFILE_ID;
import static org.apache.giraph.hive.common.GiraphHiveConstants.HIVE_VERTEX_OUTPUT_TABLE;
import static org.apache.giraph.hive.common.GiraphHiveConstants.VERTEX_TO_HIVE_CLASS;
import static org.apache.giraph.hive.common.GiraphHiveConstants.VERTEX_VALUE_READER_JYTHON_NAME;
import static org.apache.giraph.hive.common.GiraphHiveConstants.VERTEX_VALUE_WRITER_JYTHON_NAME;
import static org.apache.giraph.hive.jython.JythonHiveToEdge.EDGE_SOURCE_ID_COLUMN;
import static org.apache.giraph.hive.jython.JythonHiveToEdge.EDGE_TARGET_ID_COLUMN;
import static org.apache.giraph.hive.jython.JythonHiveToEdge.EDGE_VALUE_COLUMN;
import static org.apache.giraph.hive.jython.JythonVertexToHive.VERTEX_VALUE_COLUMN;
/**
* Plugin to {@link HiveJythonRunner} to use Hive.
*/
public class HiveJythonUtils {
/** Logger */
private static final Logger LOG = Logger.getLogger(HiveJythonUtils.class);
/** Don't construct */
private HiveJythonUtils() { }
/**
* Process command line arguments
*
* @param args cmdline args
* @param conf {@link Configuration}
* @return remaining cmdline args to process
*/
public static String[] processArgs(String[] args, Configuration conf) {
HiveUtils.addHadoopClasspathToTmpJars(conf);
HiveUtils.addHiveSiteXmlToTmpFiles(conf);
HiveUtils.addHiveSiteCustomXmlToTmpFiles(conf);
return moveHiveconfOptionsToConf(args, conf);
}
/**
* Remove -hiveconf options from cmdline
*
* @param args cmdline args
* @param conf Configuration
* @return cmdline args without -hiveconf options
*/
private static String[] moveHiveconfOptionsToConf(String[] args,
Configuration conf) {
int start = 0;
while (start < args.length) {
if (args[start].endsWith("hiveconf")) {
HiveUtils.processHiveconfOption(conf, args[start + 1]);
start += 2;
} else {
break;
}
}
return Arrays.copyOfRange(args, start, args.length);
}
/**
* Parse set of Jython scripts from local files
*
* @param interpreter PythonInterpreter to use
* @param paths Jython files to parse
* @return JythonJob
* @throws java.io.IOException
*/
public static JythonJob parseJythonFiles(PythonInterpreter interpreter,
String ... paths) throws IOException {
return parseJythonFiles(interpreter, Arrays.asList(paths));
}
/**
* Parse set of Jython scripts from local files
*
* @param interpreter PythonInterpreter to use
* @param paths Jython files to parse
* @return JythonJob
* @throws IOException
*/
public static JythonJob parseJythonFiles(PythonInterpreter interpreter,
List<String> paths) throws IOException {
InputStream[] streams = new InputStream[paths.size()];
for (int i = 0; i < paths.size(); ++i) {
LOG.info("Reading jython file " + paths.get(i));
streams[i] = new FileInputStream(paths.get(i));
}
JythonJob jythonJob;
try {
jythonJob = parseJythonStreams(interpreter, streams);
} finally {
for (InputStream stream : streams) {
Closeables.close(stream, true);
}
}
return jythonJob;
}
/**
* Parse scripts from Jython InputStreams
*
* @param interpreter PythonInterpreter
* @param streams InputStreams to parse
* @return JythonJob
*/
public static JythonJob parseJythonStreams(PythonInterpreter interpreter,
InputStream ... streams) {
for (InputStream stream : streams) {
readJythonStream(interpreter, stream);
}
PyObject pyPrepare = interpreter.get("prepare");
JythonJob jythonJob = new JythonJob();
pyPrepare._jcall(new Object[]{jythonJob});
return jythonJob;
}
/**
* Execute a Jython script
*
* @param interpreter Jython interpreter to use
* @param jythonStream {@link java.io.InputStream} with Jython code
* @throws java.io.IOException
*/
private static void readJythonStream(PythonInterpreter interpreter,
InputStream jythonStream) {
try {
interpreter.execfile(jythonStream);
} finally {
try {
jythonStream.close();
} catch (IOException e) {
LOG.error("Failed to close jython stream " + jythonStream);
}
}
}
/**
* Set arbitrary option of unknown type in Configuration
*
* @param conf Configuration
* @param key String key
* @param value Object to set
*/
private static void setOption(Configuration conf, String key,
Object value) {
if (value instanceof Boolean) {
conf.getBoolean(key, (Boolean) value);
} else if (value instanceof Byte || value instanceof Short ||
value instanceof Integer) {
conf.setInt(key, ((Number) value).intValue());
} else if (value instanceof Long) {
conf.setLong(key, (Long) value);
} else if (value instanceof Float || value instanceof Double) {
conf.setFloat(key, ((Number) value).floatValue());
} else if (value instanceof String) {
conf.set(key, value.toString());
} else if (value instanceof Class) {
conf.set(key, ((Class) value).getName());
} else {
throw new IllegalArgumentException(
"Don't know how to handle option key: " + key +
", value: " + value + ", value type: " + value.getClass());
}
}
/**
* Write JythonJob to Configuration
*
* @param jythonJob JythonJob
* @param conf Configuration
* @param interpreter PythonInterpreter
* @return name of Job
*/
public static String writeJythonJobToConf(JythonJob jythonJob,
Configuration conf, PythonInterpreter interpreter) {
checkJob(jythonJob);
JythonUtils.init(conf, jythonJob.getComputation_name());
if (jythonJob.getMessageCombiner() != null) {
MESSAGE_COMBINER_CLASS.set(conf, jythonJob.getMessageCombiner());
}
conf.setInt(MIN_WORKERS, jythonJob.getWorkers());
conf.setInt(MAX_WORKERS, jythonJob.getWorkers());
String javaOptions = Joiner.on(' ').join(jythonJob.getJava_options());
conf.set("mapred.child.java.opts", javaOptions);
Map<String, Object> options = jythonJob.getGiraph_options();
for (Map.Entry<String, Object> entry : options.entrySet()) {
setOption(conf, entry.getKey(), entry.getValue());
}
setPool(conf, jythonJob);
initHiveReadersWriters(conf, jythonJob, interpreter);
initGraphTypes(conf, jythonJob, interpreter);
initOutput(conf, jythonJob);
initVertexInputs(conf, jythonJob);
initEdgeInputs(conf, jythonJob);
String name = jythonJob.getName();
if (name == null) {
name = jythonJob.getComputation_name();
}
return name;
}
/**
* Set the hadoop mapreduce pool
*
* @param conf Configuration
* @param job the job info
*/
private static void setPool(Configuration conf, JythonJob job) {
if (job.getPool() == null) {
if (job.getWorkers() < 50) {
job.setPool("graph.test");
} else {
job.setPool("graph.production");
}
}
conf.set("mapred.fairscheduler.pool", job.getPool());
}
/**
* Check that the job is valid
*
* @param jythonJob JythonJob
*/
private static void checkJob(JythonJob jythonJob) {
checkNotNull(jythonJob.getComputation_name(),
"computation_name cannot be null");
checkTypeNotNull(jythonJob.getVertex_id(), GraphType.VERTEX_ID);
checkTypeNotNull(jythonJob.getVertex_value(), GraphType.VERTEX_VALUE);
checkTypeNotNull(jythonJob.getEdge_value(), GraphType.EDGE_VALUE);
checkMessageTypes(jythonJob);
}
/**
* Check if job has edge inputs
*
* @param jythonJob JythonJob
* @return true if job has edge inputs, false otherwise
*/
private static boolean hasEdgeInputs(JythonJob jythonJob) {
return !jythonJob.getEdge_inputs().isEmpty();
}
/**
* Check if job has vertex inputs
*
* @param jythonJob JythonJob
* @return true if job has vertex inputs, false otherwise
*/
private static boolean hasVertexInputs(JythonJob jythonJob) {
return !jythonJob.getVertex_inputs().isEmpty();
}
/**
* Check that type is present
*
* @param typeHolder TypeHolder
* @param graphType GraphType
*/
private static void checkTypeNotNull(JythonJob.TypeHolder typeHolder,
GraphType graphType) {
checkNotNull(typeHolder.getType(), graphType + ".type not present");
}
/**
* Initialize the job types
*
* @param conf Configuration
* @param jythonJob the job info
* @param interpreter PythonInterpreter to use
*/
private static void initGraphTypes(Configuration conf,
JythonJob jythonJob, PythonInterpreter interpreter) {
GiraphTypes types = new GiraphTypes();
types.setVertexIdClass(initValueType(conf, GraphType.VERTEX_ID,
jythonJob.getVertex_id().getType(), new JythonVertexIdFactory(),
interpreter));
types.setVertexValueClass(initValueType(conf, GraphType.VERTEX_VALUE,
jythonJob.getVertex_value().getType(), new JythonVertexValueFactory(),
interpreter));
types.setEdgeValueClass(initValueType(conf, GraphType.EDGE_VALUE,
jythonJob.getEdge_value().getType(), new JythonEdgeValueFactory(),
interpreter));
types.setIncomingMessageValueClass(
initValueType(conf, GraphType.INCOMING_MESSAGE_VALUE,
jythonJob.getIncoming_message_value().getType(),
new JythonIncomingMessageValueFactory(), interpreter));
types.setOutgoingMessageValueClass(
initValueType(conf, GraphType.OUTGOING_MESSAGE_VALUE,
jythonJob.getOutgoing_message_value().getType(),
new JythonOutgoingMessageValueFactory(), interpreter));
types.writeTo(conf);
}
/**
* Initialize a graph type (IVEMM)
*
* @param conf Configuration
* @param graphType GraphType
* @param jythonOrJavaClass jython or java class given by user
* @param jythonFactory Jactory for making Jython types
* @param interpreter PythonInterpreter
* @return Class for Configuration
*/
private static Class initValueType(Configuration conf, GraphType graphType,
Object jythonOrJavaClass, JythonFactoryBase jythonFactory,
PythonInterpreter interpreter) {
Class<? extends Writable> writableClass = graphType.interfaceClass();
LanguageAndType langType = processUserType(jythonOrJavaClass, interpreter);
switch (langType.getLanguage()) {
case JAVA:
GRAPH_TYPE_LANGUAGES.set(conf, graphType, Language.JAVA);
checkImplements(langType, writableClass, interpreter);
return langType.getJavaClass();
case JYTHON:
GRAPH_TYPE_LANGUAGES.set(conf, graphType, Language.JYTHON);
String jythonClassName = langType.getJythonClassName();
PyObject jythonClass = interpreter.get(jythonClassName);
if (jythonClass == null) {
throw new IllegalArgumentException("Could not find Jython class " +
jythonClassName + " for parameter " + graphType);
}
PyObject valuePyObj = jythonClass.__call__();
// Check if the Jython type implements Writable. If so, just use it
// directly. Otherwise, wrap it in a class that does using pickle.
Object pyWritable = valuePyObj.__tojava__(writableClass);
if (pyWritable.equals(Py.NoConversion)) {
GiraphConstants.GRAPH_TYPES_NEEDS_WRAPPERS.set(conf, graphType, true);
jythonFactory.useThisFactory(conf, jythonClassName);
return JythonWritableWrapper.class;
} else {
GiraphConstants.GRAPH_TYPES_NEEDS_WRAPPERS.set(conf, graphType, false);
jythonFactory.useThisFactory(conf, jythonClassName);
return writableClass;
}
default:
throw new IllegalArgumentException("Don't know how to handle " +
LanguageAndType.class.getSimpleName() + " with language " +
langType.getLanguage());
}
}
/**
* Check that the incoming / outgoing message value types are present.
*
* @param jythonJob JythonJob
*/
private static void checkMessageTypes(JythonJob jythonJob) {
checkMessageType(jythonJob.getIncoming_message_value(),
GraphType.INCOMING_MESSAGE_VALUE, jythonJob);
checkMessageType(jythonJob.getOutgoing_message_value(),
GraphType.OUTGOING_MESSAGE_VALUE, jythonJob);
}
/**
* Check that given message value type is present.
*
* @param msgTypeHolder Incoming or outgoing message type holder
* @param graphType The graph type
* @param jythonJob JythonJob
*/
private static void checkMessageType(JythonJob.TypeHolder msgTypeHolder,
GraphType graphType, JythonJob jythonJob) {
if (msgTypeHolder.getType() == null) {
Object msgValueType = jythonJob.getMessage_value().getType();
checkNotNull(msgValueType, graphType + ".type and " +
"message_value.type cannot both be empty");
msgTypeHolder.setType(msgValueType);
}
}
/**
* Check that the vertex ID, vertex value, and edge value Hive info is valid.
*
* @param conf Configuration
* @param jythonJob JythonJob
* @param interpreter PythonInterpreter
*/
private static void initHiveReadersWriters(Configuration conf,
JythonJob jythonJob, PythonInterpreter interpreter) {
if (!userTypeIsJavaPrimitiveWritable(jythonJob.getVertex_id())) {
checkTypeWithHive(jythonJob.getVertex_id(), GraphType.VERTEX_ID);
LanguageAndType idReader = processUserType(
jythonJob.getVertex_id().getHive_reader(), interpreter);
checkImplements(idReader, JythonHiveReader.class, interpreter);
checkArgument(idReader.getLanguage() == Language.JYTHON);
GiraphHiveConstants.VERTEX_ID_READER_JYTHON_NAME.set(conf,
idReader.getJythonClassName());
LanguageAndType idWriter = processUserType(
jythonJob.getVertex_id().getHive_writer(), interpreter);
checkImplements(idWriter, JythonHiveWriter.class, interpreter);
checkArgument(idWriter.getLanguage() == Language.JYTHON);
GiraphHiveConstants.VERTEX_ID_WRITER_JYTHON_NAME.set(conf,
idWriter.getJythonClassName());
}
if (hasVertexInputs(jythonJob) &&
!userTypeIsJavaPrimitiveWritable(jythonJob.getVertex_value())) {
checkTypeWithHive(jythonJob.getVertex_value(), GraphType.VERTEX_VALUE);
LanguageAndType valueReader = processUserType(
jythonJob.getVertex_value().getHive_reader(), interpreter);
checkImplements(valueReader, JythonHiveReader.class, interpreter);
checkArgument(valueReader.getLanguage() == Language.JYTHON);
VERTEX_VALUE_READER_JYTHON_NAME.set(conf,
valueReader.getJythonClassName());
LanguageAndType valueWriter = processUserType(
jythonJob.getVertex_value().getHive_writer(), interpreter);
checkImplements(valueWriter, JythonHiveWriter.class, interpreter);
checkArgument(valueWriter.getLanguage() == Language.JYTHON);
VERTEX_VALUE_WRITER_JYTHON_NAME.set(conf,
valueWriter.getJythonClassName());
}
if (hasEdgeInputs(jythonJob) &&
!userTypeIsJavaPrimitiveWritable(jythonJob.getEdge_value())) {
checkNotNull(jythonJob.getEdge_value().getHive_reader(),
"edge_value.hive_reader cannot be null");
LanguageAndType edgeReader = processUserType(
jythonJob.getEdge_value().getHive_reader(), interpreter);
checkImplements(edgeReader, JythonHiveReader.class, interpreter);
checkArgument(edgeReader.getLanguage() == Language.JYTHON);
GiraphHiveConstants.EDGE_VALUE_READER_JYTHON_NAME.set(conf,
edgeReader.getJythonClassName());
}
}
/**
* Verify Jython class is present and implements the Java type
*
* @param interpreter PythonInterpreter
* @param valueFromUser Jython class or name of class
* @return name of Jython class
*/
private static LanguageAndType processUserType(Object valueFromUser,
PythonInterpreter interpreter) {
// user gave a Class object, should be either Java or Jython class name
if (valueFromUser instanceof Class) {
Class valueClass = (Class) valueFromUser;
String jythonClassName = extractJythonClass(valueClass);
if (jythonClassName != null) {
// Jython class
return processJythonType(jythonClassName, interpreter);
} else {
// Java class
return LanguageAndType.java(valueClass);
}
// user gave a string, should be either Java or Jython class name
} else if (valueFromUser instanceof String) {
String valueStr = (String) valueFromUser;
Class valueClass;
try {
// Try to find Java class with name
valueClass = Class.forName(valueStr);
return LanguageAndType.java(valueClass);
} catch (ClassNotFoundException e) {
// Java class not found, try to find a Jython one
return processJythonType(valueStr, interpreter);
}
// user gave a PyClass, process as a Jython class
} else if (valueFromUser instanceof PyClass) {
PyClass userPyClass = (PyClass) valueFromUser;
return processJythonType(userPyClass.__name__, interpreter);
// user gave a PyType, process as Jython class
} else if (valueFromUser instanceof PyType) {
PyType userPyType = (PyType) valueFromUser;
return processJythonType(userPyType.getName(), interpreter);
// Otherwise, don't know how to handle this, so error
} else {
throw new IllegalArgumentException("Don't know how to handle " +
valueFromUser + " of class " + valueFromUser.getClass() +
", needs to be Class or String");
}
}
/**
* Check that a type implements a Java interface
*
* @param langType type with langauge
* @param interfaceClass java interface class
* @param interpreter PythonInterpreter
*/
private static void checkImplements(LanguageAndType langType,
Class interfaceClass, PythonInterpreter interpreter) {
switch (langType.getLanguage()) {
case JAVA:
checkArgument(interfaceClass.isAssignableFrom(langType.getJavaClass()),
langType.getJavaClass().getSimpleName() + " needs to implement " +
interfaceClass.getSimpleName());
break;
case JYTHON:
PyObject pyClass = interpreter.get(langType.getJythonClassName());
PyObject pyObj = pyClass.__call__();
Object converted = pyObj.__tojava__(interfaceClass);
checkArgument(!Py.NoConversion.equals(converted),
"Jython class " + langType.getJythonClassName() +
" does not implement " + interfaceClass.getSimpleName() +
" interface");
break;
default:
throw new IllegalArgumentException("Don't know how to handle " +
"language " + langType.getLanguage());
}
}
/**
* Verify Jython class is present and implements specified type
*
* @param jythonName Jython class name
* @param interpreter PythonInterpreter
* @return language and type specification
*/
private static LanguageAndType processJythonType(String jythonName,
PythonInterpreter interpreter) {
PyObject pyClass = interpreter.get(jythonName);
checkNotNull(pyClass, "Jython class " + jythonName + " not found");
return LanguageAndType.jython(jythonName);
}
/**
* Check that the given value type is valid
*
* @param typeWithHive value type
* @param graphType GraphType
*/
private static void checkTypeWithHive(JythonJob.TypeWithHive typeWithHive,
GraphType graphType) {
if (typeWithHive.getHive_reader() == null) {
checkNotNull(typeWithHive.getHive_io(), graphType + ".hive_reader and " +
graphType + ".hive_io cannot both be empty");
typeWithHive.setHive_reader(typeWithHive.getHive_io());
}
if (typeWithHive.getHive_writer() == null) {
checkNotNull(typeWithHive.getHive_io(), graphType + ".hive_writer and " +
graphType + ".hive_io cannot both be empty");
typeWithHive.setHive_writer(typeWithHive.getHive_io());
}
}
/**
* Create a graph value (IVEMM) reader
*
* @param <T> graph value type
* @param schema {@link com.facebook.hiveio.schema.HiveTableSchema}
* @param columnOption option for column name
* @param conf {@link ImmutableClassesGiraphConfiguration}
* @param graphType GraphType creating a reader for
* @param jythonClassNameOption option for jython class option
* @return {@link org.apache.giraph.hive.values.HiveValueReader}
*/
public static <T extends Writable> HiveValueReader<T> newValueReader(
HiveTableSchema schema, StrConfOption columnOption,
ImmutableClassesGiraphConfiguration conf, GraphType graphType,
StrConfOption jythonClassNameOption) {
HiveValueReader<T> reader;
if (HiveJythonUtils.isPrimitiveWritable(graphType.get(conf))) {
reader = PrimitiveValueReader.create(conf, graphType, columnOption,
schema);
} else if (jythonClassNameOption.contains(conf)) {
reader = JythonColumnReader.create(conf, jythonClassNameOption,
columnOption, schema);
} else {
throw new IllegalArgumentException("Don't know how to read " + graphType +
" of class " + graphType.get(conf) + " which is not primitive and" +
" no " + JythonHiveReader.class.getSimpleName() + " is set");
}
return reader;
}
/**
* Create a graph value (IVEMM) writer
*
* @param <T> writable type
* @param schema {@link HiveTableSchema}
* @param columnOption option for column
* @param conf {@link ImmutableClassesGiraphConfiguration}
* @param graphType {@link GraphType}
* @param jythonClassNameOption option for name of jython class
* @return {@link HiveValueWriter}
*/
public static <T extends Writable> HiveValueWriter<T>
newValueWriter(HiveTableSchema schema, StrConfOption columnOption,
ImmutableClassesGiraphConfiguration conf, GraphType graphType,
StrConfOption jythonClassNameOption) {
HiveValueWriter<T> writer;
if (HiveJythonUtils.isPrimitiveWritable(graphType.get(conf))) {
writer = PrimitiveValueWriter.create(conf, columnOption,
schema, graphType);
} else if (jythonClassNameOption.contains(conf)) {
writer = JythonColumnWriter.create(conf, jythonClassNameOption,
columnOption, schema);
} else {
throw new IllegalArgumentException("Don't know how to write " +
graphType + " of class " + graphType.get(conf) +
" which is not primitive and no " +
JythonHiveWriter.class.getSimpleName() + " is set");
}
return writer;
}
/**
* Extract Jython class name from a user set proxy Jython class.
*
* For example:
* job.vertex_value_type = FakeLPVertexValue
* Yields:
* org.python.proxies.__main__$FakeLPVertexValue$0
* This method extracts:
* FakeLPVertexValue
*
* @param klass Jython proxy class
* @return Jython class name
*/
private static String extractJythonClass(Class klass) {
if (!isJythonClass(klass)) {
return null;
}
Iterable<String> parts = Splitter.on('$').split(klass.getSimpleName());
if (Iterables.size(parts) != 3) {
return null;
}
Iterator<String> partsIter = parts.iterator();
partsIter.next();
return partsIter.next();
}
/**
* Check if passed in class is a Jython class
*
* @param klass to check
* @return true if Jython class, false otherwise
*/
private static boolean isJythonClass(Class klass) {
return klass.getCanonicalName().startsWith("org.python.proxies");
}
/**
* Initialize edge input
*
* @param conf Configuration
* @param jythonJob data to initialize
*/
private static void initEdgeInputs(Configuration conf, JythonJob jythonJob) {
List<JythonJob.EdgeInput> edgeInputs = jythonJob.getEdge_inputs();
if (!edgeInputs.isEmpty()) {
if (edgeInputs.size() == 1) {
EDGE_INPUT_FORMAT_CLASS.set(conf, HiveEdgeInputFormat.class);
JythonJob.EdgeInput edgeInput = edgeInputs.get(0);
checkEdgeInput(edgeInput);
LOG.info("Setting edge input using: " + edgeInput);
HIVE_EDGE_INPUT.getDatabaseOpt().set(conf,
jythonJob.getHive_database());
HIVE_EDGE_INPUT.getTableOpt().set(conf, edgeInput.getTable());
if (edgeInput.getPartition_filter() != null) {
HIVE_EDGE_INPUT.getPartitionOpt().set(conf,
edgeInput.getPartition_filter());
}
HIVE_EDGE_INPUT.getClassOpt().set(conf, JythonHiveToEdge.class);
EDGE_SOURCE_ID_COLUMN.set(conf, edgeInput.getSource_id_column());
EDGE_TARGET_ID_COLUMN.set(conf, edgeInput.getTarget_id_column());
if (edgeInput.getValue_column() != null) {
EDGE_VALUE_COLUMN.set(conf, edgeInput.getValue_column());
}
} else {
EDGE_INPUT_FORMAT_CLASS.set(conf, MultiEdgeInputFormat.class);
throw new IllegalArgumentException(
"Multiple edge inputs not supported yet: " + edgeInputs);
}
}
}
/**
* Check that the edge input is valid
*
* @param edgeInput data to check
*/
private static void checkEdgeInput(JythonJob.EdgeInput edgeInput) {
checkNotNull(edgeInput.getTable(), "EdgeInput table name needs to be set");
checkNotNull(edgeInput.getSource_id_column(),
"EdgeInput source ID column needs to be set");
checkNotNull(edgeInput.getTarget_id_column(),
"EdgeInput target ID column needs to be set");
}
/**
* Initialize vertex output info
*
* @param conf Configuration
* @param jythonJob the job info
*/
private static void initVertexInputs(Configuration conf,
JythonJob jythonJob) {
List<JythonJob.VertexInput> vertexInputs = jythonJob.getVertex_inputs();
if (!vertexInputs.isEmpty()) {
if (vertexInputs.size() == 1) {
VERTEX_INPUT_FORMAT_CLASS.set(conf, HiveVertexInputFormat.class);
JythonJob.VertexInput vertexInput = vertexInputs.get(0);
checkVertexInput(vertexInput);
LOG.info("Setting vertex input using: " + vertexInput);
HIVE_VERTEX_INPUT.getDatabaseOpt().set(conf,
jythonJob.getHive_database());
HIVE_VERTEX_INPUT.getTableOpt().set(conf, vertexInput.getTable());
if (vertexInput.getPartition_filter() != null) {
HIVE_VERTEX_INPUT.getPartitionOpt().set(conf,
vertexInput.getPartition_filter());
}
HIVE_VERTEX_INPUT.getClassOpt().set(conf, JythonHiveToVertex.class);
JythonHiveToVertex.VERTEX_ID_COLUMN.set(conf,
vertexInput.getId_column());
if (vertexInput.getValue_column() != null) {
JythonHiveToVertex.VERTEX_VALUE_COLUMN.set(conf,
vertexInput.getValue_column());
}
} else {
VERTEX_INPUT_FORMAT_CLASS.set(conf, MultiVertexInputFormat.class);
throw new IllegalArgumentException(
"Multiple vertex inputs not supported yet: " + vertexInputs);
}
}
}
/**
* Check that the vertex input info is valid
*
* @param vertexInput data to check
*/
private static void checkVertexInput(JythonJob.VertexInput vertexInput) {
checkNotNull(vertexInput.getTable(),
"VertexInput table name needs to be set");
checkNotNull(vertexInput.getId_column(),
"VertexInput ID column needs to be set");
}
/**
* Check if the writable is holding a primitive type
*
* @param klass Writable class
* @return true if writable is holding primitive
*/
public static boolean isPrimitiveWritable(Class klass) {
return NullWritable.class.equals(klass) ||
BooleanWritable.class.equals(klass) ||
ByteWritable.class.equals(klass) ||
IntWritable.class.equals(klass) ||
LongWritable.class.equals(klass) ||
FloatWritable.class.equals(klass) ||
DoubleWritable.class.equals(klass);
}
/**
* Tell whether the user type given is a primitive writable
*
* @param typeHolder TypeHolder
* @return true if type is a Java primitive writable
*/
public static boolean userTypeIsJavaPrimitiveWritable(
JythonJob.TypeHolder typeHolder) {
Object type = typeHolder.getType();
if (type instanceof Class) {
return isPrimitiveWritable((Class) type);
} else if (type instanceof String) {
try {
Class klass = Class.forName((String) type);
return isPrimitiveWritable(klass);
} catch (ClassNotFoundException e) {
return false;
}
} else {
return false;
}
}
/**
* Initialize output info
*
* @param conf Configuration
* @param jythonJob the job info
*/
private static void initOutput(Configuration conf, JythonJob jythonJob) {
JythonJob.VertexOutput vertexOutput = jythonJob.getVertex_output();
if (vertexOutput.getTable() != null) {
LOG.info("Setting vertex output using: " + vertexOutput);
VERTEX_OUTPUT_FORMAT_CLASS.set(conf, HiveVertexOutputFormat.class);
VERTEX_TO_HIVE_CLASS.set(conf, JythonVertexToHive.class);
JythonVertexToHive.VERTEX_ID_COLUMN.set(conf,
vertexOutput.getId_column());
VERTEX_VALUE_COLUMN.set(conf, vertexOutput.getValue_column());
HIVE_VERTEX_OUTPUT_DATABASE.set(conf, jythonJob.getHive_database());
HIVE_VERTEX_OUTPUT_PROFILE_ID.set(conf, "vertex_output_profile");
HIVE_VERTEX_OUTPUT_TABLE.set(conf, vertexOutput.getTable());
if (vertexOutput.getPartition() != null) {
HIVE_VERTEX_OUTPUT_PARTITION.set(conf,
makePartitionString(vertexOutput.getPartition()));
}
}
}
/**
* Create partition string
*
* @param parts partition pieces
* @return partition string
*/
private static String makePartitionString(Map<String, String> parts) {
return Joiner.on(",").withKeyValueSeparator("=").join(parts);
}
}