Source Code of $.MyJob

package ${package};


import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.lilyproject.client.LilyClient;
import org.lilyproject.mapreduce.LilyMapReduceUtil;
import org.lilyproject.repository.api.QName;
import org.lilyproject.repository.api.RecordScan;
import org.lilyproject.repository.api.LRepository;
import org.lilyproject.repository.api.filter.RecordTypeFilter;
import org.lilyproject.util.io.Closer;


import java.io.IOException;


/**
 * Sets up and launches a Lily-based MapReduce job.
 */
public class MyJob extends Configured implements Tool {
    private String zkConnectString;


    public static void main(String[] args) throws Exception {
        // Let <code>ToolRunner</code> handle generic command-line options
        int res = ToolRunner.run(new Configuration(), new MyJob(), args);
        System.exit(res);
    }


    @Override
    public int run(String[] args) throws Exception {
        int result = parseArgs(args);
        if (result != 0) {
            return result;
        }


        Configuration config = getConf();


        Job job = new Job(config, "MyJob");
        job.setJarByClass(MyJob.class);


        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(1);


        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);


        // The reducer writes directly to Lily, so for Hadoop there is no output to produce
        job.setOutputFormatClass(NullOutputFormat.class);


        // The RecordScan defines what subset of the records will be offered as input
        // to the map task.
        RecordScan scan = new RecordScan();
        scan.setRecordFilter(new RecordTypeFilter(new QName("mrsample", "Document")));


        // Need LilyClient here just to be able to serialize the RecordScan.
        // This is a bit lame, will improve in the future.
        LilyClient lilyClient = new LilyClient(zkConnectString, 30000);
        LRepository repository = lilyClient.getDefaultRepository();


        // Utility method will configure everything related to LilyInputFormat
        LilyMapReduceUtil.initMapperJob(scan, zkConnectString, repository, job);


        Closer.close(lilyClient);


        // Launch the job
        boolean b = job.waitForCompletion(true);
        if (!b) {
            throw new IOException("error executing job!");
        }


        return 0;
    }


    @SuppressWarnings("static-access")
    protected int parseArgs(String[] args) {
        Options cliOptions = new Options();


        Option zkOption = OptionBuilder
                .isRequired()
                .withArgName("connection-string")
                .hasArg()
                .withDescription("ZooKeeper connection string: hostname1:port,hostname2:port,...")
                .withLongOpt("zookeeper")
                .create("z");
        cliOptions.addOption(zkOption);


        CommandLineParser parser = new PosixParser();
        CommandLine cmd;
        try {
            cmd = parser.parse(cliOptions, args);
        } catch (ParseException e) {
            System.out.println(e.getMessage());
            System.out.println();


            HelpFormatter help = new HelpFormatter();
            help.printHelp(getClass().getSimpleName(), cliOptions, true);
            return 1;
        }


        zkConnectString = cmd.getOptionValue(zkOption.getOpt());


        return 0;
    }
}
Source Code of $.MyJob

Related Classes of $.MyJob