Source Code of org.apache.accumulo.server.master.LogSort$SortCommit

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.server.master;


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
import org.apache.accumulo.core.client.mapreduce.InputFormatBase;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.accumulo.server.client.HdfsZooInstance;
import org.apache.accumulo.server.logger.IdentityReducer;
import org.apache.accumulo.server.logger.LogFileKey;
import org.apache.accumulo.server.logger.LogFileValue;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;


/* Borrows from the Apache sort example program */
public class LogSort extends Configured implements Tool {


  private static final Logger log = Logger.getLogger(LogSort.class);
  public static final String INSTANCE_ID_PROPERTY = "accumulo.instance.id";
  private Job job = null;


  public static String getJobName() {
    return "LogSort_" + HdfsZooInstance.getInstance().getInstanceID();
  }


  private void printUsage() {
    System.out.println("accumulo " + this.getClass().getName() + " [-r <reducers>] [-q queue] [-p pool] <input> <output>");
    ToolRunner.printGenericCommandUsage(System.out);
  }


  public static class SortCommit extends FileOutputCommitter {


    final private Path outputPath;
    final private FileSystem outputFileSystem;


    public SortCommit(Path outputPath, TaskAttemptContext context) throws IOException {
      super(outputPath, context);
      this.outputPath = outputPath;
      outputFileSystem = outputPath.getFileSystem(InputFormatBase.getConfiguration(context));
    }


    @Override
    public void abortTask(TaskAttemptContext context) {
      try {
        super.abortTask(context);
        outputFileSystem.delete(outputPath, true);
      } catch (IOException ex) {
        throw new RuntimeException(ex);
      }
    }


    @Deprecated
    @Override
    public void cleanupJob(JobContext context) throws IOException {
      super.cleanupJob(context);
      int parts = 0;
      if (outputFileSystem.exists(outputPath)) {
        for (FileStatus status : outputFileSystem.listStatus(outputPath)) {
          if (status.getPath().getName().startsWith("part")) {
            parts++;
          }
        }
      }
      if (parts != context.getNumReduceTasks() || !outputFileSystem.createNewFile(new Path(outputPath, "finished"))) {
        log.error("Unable to create finished flag file");
        outputFileSystem.delete(outputPath, true);
      }
    }
  }


  /**
   * The main driver for sort program. Invoke this method to submit the map/reduce job.
   */
  @Override
  public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    if (job != null)
      throw new RuntimeException("job has already run");


    // parse options
    int num_reduces = 1;
    String queueName = "default";
    String poolName = "recovery";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
      try {
        if ("-r".equals(args[i]))
          num_reduces = Integer.parseInt(args[++i]);
        else if ("-q".equals(args[i]))
          queueName = args[++i];
        else if ("-p".equals(args[i]))
          poolName = args[++i];
        else
          otherArgs.add(args[i]);
      } catch (NumberFormatException e) {
        log.error("Integer expected instead of " + args[i], e);
        printUsage();
        return 1;
      } catch (ArrayIndexOutOfBoundsException e) {
        log.error("Required parameter missing from " + args[i - 1], e);
        printUsage();
        return 1;
      }
    }


    // validate arguments
    if (otherArgs.size() != 2) {
      log.error("Wrong number of parameters: " + otherArgs.size() + " instead of 2.", new Exception());
      printUsage();
      return 0;
    }


    // create job
    job = new Job(getConf(), getJobName());
    job.setJarByClass(this.getClass());


    // set input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, otherArgs.get(0));


    // set identity mappers
    job.setMapperClass(Mapper.class);
    job.setOutputKeyClass(LogFileKey.class);
    job.setOutputValueClass(LogFileValue.class);


    // set custom partitioner
    job.setPartitionerClass(RoundRobinPartitioner.class);


    // set identity reducer
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(num_reduces);


    // set output
    job.setOutputFormatClass(LoggerMapFileOutputFormat.class);
    LoggerMapFileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));


    // submit the job to the job queue
    job.getConfiguration().set("mapred.job.queue.name", queueName);
    job.getConfiguration().set("mapred.job.pool.name", poolName);
    job.getConfiguration().set(INSTANCE_ID_PROPERTY, HdfsZooInstance.getInstance().getInstanceID());
    log.info("Running on some nodes to sort from " + SequenceFileInputFormat.getInputPaths(job)[0] + " into " + AccumuloFileOutputFormat.getOutputPath(job)
        + " with " + num_reduces + " reduces.");
    return 0;
  }


  public static Job startSort(boolean background, String[] args) throws Exception {
    LogSort sort = new LogSort();
    ToolRunner.run(CachedConfiguration.getInstance(), sort, args);
    if (background)
      sort.job.submit();
    else
      sort.job.waitForCompletion(true);
    return sort.job;
  }


  public static void main(String[] args) throws Exception {
    long startTime = System.currentTimeMillis();
    log.info("Job started");
    Job job = startSort(false, args);
    log.info("The job finished after " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds.");
    if (!job.isSuccessful())
      System.exit(1);
  }


  private static class LoggerMapFileOutputFormat extends FileOutputFormat<WritableComparable<?>,Writable> {
    @Override
    public RecordWriter<WritableComparable<?>,Writable> getRecordWriter(final TaskAttemptContext job) throws IOException, InterruptedException {
      // get the path of the temporary output file
      Path file = getDefaultWorkFile(job, "");


      FileSystem fs = file.getFileSystem(InputFormatBase.getConfiguration(job));
      CompressionCodec codec = null;
      CompressionType compressionType = CompressionType.NONE;
      if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);


        // find the right codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, InputFormatBase.getConfiguration(job));
      }


      Progressable progress = new Progressable() {
        @Override
        public void progress() {
          job.progress();
        }
      };
      final MapFile.Writer out = new MapFile.Writer(job.getConfiguration(), fs, file.toString(), job.getOutputKeyClass().asSubclass(WritableComparable.class),
          job.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, progress);
      return new RecordWriter<WritableComparable<?>,Writable>() {


        @Override
        public void write(WritableComparable<?> key, Writable value) throws IOException {
          out.append(key, value);
        }


        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
          out.close();
        }
      };


    }


    @Override
    public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
      return new SortCommit(getOutputPath(context), context);
    }
  }
}
Source Code of org.apache.accumulo.server.master.LogSort$SortCommit

Related Classes of org.apache.accumulo.server.master.LogSort$SortCommit