package io.lumify.gdelt;
import io.lumify.core.bootstrap.InjectHelper;
import io.lumify.core.bootstrap.LumifyBootstrap;
import io.lumify.core.config.ConfigurationLoader;
import io.lumify.core.util.LumifyLogger;
import io.lumify.core.util.LumifyLoggerFactory;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.core.data.Mutation;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.securegraph.accumulo.AccumuloGraphConfiguration;
import org.securegraph.accumulo.mapreduce.AccumuloElementOutputFormat;
import org.securegraph.accumulo.mapreduce.ElementMapper;
public class GDELTRunner extends Configured implements Tool {
private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(GDELTRunner.class);
@Override
public int run(String[] args) throws Exception {
io.lumify.core.config.Configuration lumifyConfig = ConfigurationLoader.load();
Configuration conf = getConfiguration(args, lumifyConfig);
AccumuloGraphConfiguration accumuloGraphConfiguration = new AccumuloGraphConfiguration(conf, "graph.");
// InjectHelper.inject(this, LumifyBootstrap.bootstrapModuleMaker(lumifyConfig));
Job job = new Job(conf, "GDELTImport");
String instanceName = accumuloGraphConfiguration.getAccumuloInstanceName();
String zooKeepers = accumuloGraphConfiguration.getZookeeperServers();
String principal = accumuloGraphConfiguration.getAccumuloUsername();
AuthenticationToken authorizationToken = accumuloGraphConfiguration.getAuthenticationToken();
AccumuloElementOutputFormat.setOutputInfo(job, instanceName, zooKeepers, principal, authorizationToken);
job.setJarByClass(GDELTRunner.class);
job.setMapperClass(GDELTMapper.class);
job.setMapOutputValueClass(Mutation.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(AccumuloElementOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(conf.get("in")));
int returnCode = job.waitForCompletion(true) ? 0 : 1;
CounterGroup groupCounters = job.getCounters().getGroup(GDELTImportCounters.class.getName());
for (Counter counter : groupCounters) {
System.out.println(counter.getDisplayName() + ": " + counter.getValue());
}
return returnCode;
}
private Configuration getConfiguration(String[] args, io.lumify.core.config.Configuration lumifyConfig) {
if (args.length < 1) {
throw new RuntimeException("Required arguments <inputFileName>");
}
String inFileName = args[args.length - 1];
LOGGER.info("Using config:\n" + lumifyConfig);
Configuration hadoopConfig = lumifyConfig.toHadoopConfiguration();
hadoopConfig.set(ElementMapper.GRAPH_CONFIG_PREFIX, "graph.");
LOGGER.info("inFileName: %s", inFileName);
hadoopConfig.set("in", inFileName);
this.setConf(hadoopConfig);
return hadoopConfig;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new GDELTRunner(), args);
System.exit(res);
}
}