Package $

Source Code of $.MyJob

package ${package};

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.lilyproject.client.LilyClient;
import org.lilyproject.mapreduce.LilyMapReduceUtil;
import org.lilyproject.repository.api.QName;
import org.lilyproject.repository.api.RecordScan;
import org.lilyproject.repository.api.LRepository;
import org.lilyproject.repository.api.filter.RecordTypeFilter;
import org.lilyproject.util.io.Closer;

import java.io.IOException;

/**
* Sets up and launches a Lily-based MapReduce job.
*/
public class MyJob extends Configured implements Tool {
    private String zkConnectString;

    public static void main(String[] args) throws Exception {
        // Let <code>ToolRunner</code> handle generic command-line options
        int res = ToolRunner.run(new Configuration(), new MyJob(), args);
        System.exit(res);
    }

    @Override
    public int run(String[] args) throws Exception {
        int result = parseArgs(args);
        if (result != 0) {
            return result;
        }

        Configuration config = getConf();

        Job job = new Job(config, "MyJob");
        job.setJarByClass(MyJob.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(1);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // The reducer writes directly to Lily, so for Hadoop there is no output to produce
        job.setOutputFormatClass(NullOutputFormat.class);

        // The RecordScan defines what subset of the records will be offered as input
        // to the map task.
        RecordScan scan = new RecordScan();
        scan.setRecordFilter(new RecordTypeFilter(new QName("mrsample", "Document")));

        // Need LilyClient here just to be able to serialize the RecordScan.
        // This is a bit lame, will improve in the future.
        LilyClient lilyClient = new LilyClient(zkConnectString, 30000);
        LRepository repository = lilyClient.getDefaultRepository();

        // Utility method will configure everything related to LilyInputFormat
        LilyMapReduceUtil.initMapperJob(scan, zkConnectString, repository, job);

        Closer.close(lilyClient);

        // Launch the job
        boolean b = job.waitForCompletion(true);
        if (!b) {
            throw new IOException("error executing job!");
        }

        return 0;
    }

    @SuppressWarnings("static-access")
    protected int parseArgs(String[] args) {
        Options cliOptions = new Options();

        Option zkOption = OptionBuilder
                .isRequired()
                .withArgName("connection-string")
                .hasArg()
                .withDescription("ZooKeeper connection string: hostname1:port,hostname2:port,...")
                .withLongOpt("zookeeper")
                .create("z");
        cliOptions.addOption(zkOption);

        CommandLineParser parser = new PosixParser();
        CommandLine cmd;
        try {
            cmd = parser.parse(cliOptions, args);
        } catch (ParseException e) {
            System.out.println(e.getMessage());
            System.out.println();

            HelpFormatter help = new HelpFormatter();
            help.printHelp(getClass().getSimpleName(), cliOptions, true);
            return 1;
        }

        zkConnectString = cmd.getOptionValue(zkOption.getOpt());

        return 0;
    }
}
TOP

Related Classes of $.MyJob

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.