Package org.apache.accumulo.server.master

Source Code of org.apache.accumulo.server.master.LogSort$SortCommit

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.server.master;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
import org.apache.accumulo.core.client.mapreduce.InputFormatBase;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.accumulo.server.client.HdfsZooInstance;
import org.apache.accumulo.server.logger.IdentityReducer;
import org.apache.accumulo.server.logger.LogFileKey;
import org.apache.accumulo.server.logger.LogFileValue;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* Borrows from the Apache sort example program */
public class LogSort extends Configured implements Tool {

  private static final Logger log = Logger.getLogger(LogSort.class);
  public static final String INSTANCE_ID_PROPERTY = "accumulo.instance.id";
  private Job job = null;

  public static String getJobName() {
    return "LogSort_" + HdfsZooInstance.getInstance().getInstanceID();
  }

  private void printUsage() {
    System.out.println("accumulo " + this.getClass().getName() + " [-r <reducers>] [-q queue] [-p pool] <input> <output>");
    ToolRunner.printGenericCommandUsage(System.out);
  }

  public static class SortCommit extends FileOutputCommitter {

    final private Path outputPath;
    final private FileSystem outputFileSystem;

    public SortCommit(Path outputPath, TaskAttemptContext context) throws IOException {
      super(outputPath, context);
      this.outputPath = outputPath;
      outputFileSystem = outputPath.getFileSystem(InputFormatBase.getConfiguration(context));
    }

    @Override
    public void abortTask(TaskAttemptContext context) {
      try {
        super.abortTask(context);
        outputFileSystem.delete(outputPath, true);
      } catch (IOException ex) {
        throw new RuntimeException(ex);
      }
    }

    @Deprecated
    @Override
    public void cleanupJob(JobContext context) throws IOException {
      super.cleanupJob(context);
      int parts = 0;
      if (outputFileSystem.exists(outputPath)) {
        for (FileStatus status : outputFileSystem.listStatus(outputPath)) {
          if (status.getPath().getName().startsWith("part")) {
            parts++;
          }
        }
      }
      if (parts != context.getNumReduceTasks() || !outputFileSystem.createNewFile(new Path(outputPath, "finished"))) {
        log.error("Unable to create finished flag file");
        outputFileSystem.delete(outputPath, true);
      }
    }
  }

  /**
   * The main driver for sort program. Invoke this method to submit the map/reduce job.
   */
  @Override
  public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    if (job != null)
      throw new RuntimeException("job has already run");

    // parse options
    int num_reduces = 1;
    String queueName = "default";
    String poolName = "recovery";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
      try {
        if ("-r".equals(args[i]))
          num_reduces = Integer.parseInt(args[++i]);
        else if ("-q".equals(args[i]))
          queueName = args[++i];
        else if ("-p".equals(args[i]))
          poolName = args[++i];
        else
          otherArgs.add(args[i]);
      } catch (NumberFormatException e) {
        log.error("Integer expected instead of " + args[i], e);
        printUsage();
        return 1;
      } catch (ArrayIndexOutOfBoundsException e) {
        log.error("Required parameter missing from " + args[i - 1], e);
        printUsage();
        return 1;
      }
    }

    // validate arguments
    if (otherArgs.size() != 2) {
      log.error("Wrong number of parameters: " + otherArgs.size() + " instead of 2.", new Exception());
      printUsage();
      return 0;
    }

    // create job
    job = new Job(getConf(), getJobName());
    job.setJarByClass(this.getClass());

    // set input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, otherArgs.get(0));

    // set identity mappers
    job.setMapperClass(Mapper.class);
    job.setOutputKeyClass(LogFileKey.class);
    job.setOutputValueClass(LogFileValue.class);

    // set custom partitioner
    job.setPartitionerClass(RoundRobinPartitioner.class);

    // set identity reducer
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(num_reduces);

    // set output
    job.setOutputFormatClass(LoggerMapFileOutputFormat.class);
    LoggerMapFileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

    // submit the job to the job queue
    job.getConfiguration().set("mapred.job.queue.name", queueName);
    job.getConfiguration().set("mapred.job.pool.name", poolName);
    job.getConfiguration().set(INSTANCE_ID_PROPERTY, HdfsZooInstance.getInstance().getInstanceID());
    log.info("Running on some nodes to sort from " + SequenceFileInputFormat.getInputPaths(job)[0] + " into " + AccumuloFileOutputFormat.getOutputPath(job)
        + " with " + num_reduces + " reduces.");
    return 0;
  }

  public static Job startSort(boolean background, String[] args) throws Exception {
    LogSort sort = new LogSort();
    ToolRunner.run(CachedConfiguration.getInstance(), sort, args);
    if (background)
      sort.job.submit();
    else
      sort.job.waitForCompletion(true);
    return sort.job;
  }

  public static void main(String[] args) throws Exception {
    long startTime = System.currentTimeMillis();
    log.info("Job started");
    Job job = startSort(false, args);
    log.info("The job finished after " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds.");
    if (!job.isSuccessful())
      System.exit(1);
  }

  private static class LoggerMapFileOutputFormat extends FileOutputFormat<WritableComparable<?>,Writable> {
    @Override
    public RecordWriter<WritableComparable<?>,Writable> getRecordWriter(final TaskAttemptContext job) throws IOException, InterruptedException {
      // get the path of the temporary output file
      Path file = getDefaultWorkFile(job, "");

      FileSystem fs = file.getFileSystem(InputFormatBase.getConfiguration(job));
      CompressionCodec codec = null;
      CompressionType compressionType = CompressionType.NONE;
      if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);

        // find the right codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, InputFormatBase.getConfiguration(job));
      }

      Progressable progress = new Progressable() {
        @Override
        public void progress() {
          job.progress();
        }
      };
      final MapFile.Writer out = new MapFile.Writer(job.getConfiguration(), fs, file.toString(), job.getOutputKeyClass().asSubclass(WritableComparable.class),
          job.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, progress);
      return new RecordWriter<WritableComparable<?>,Writable>() {

        @Override
        public void write(WritableComparable<?> key, Writable value) throws IOException {
          out.append(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
          out.close();
        }
      };

    }

    @Override
    public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
      return new SortCommit(getOutputPath(context), context);
    }
  }
}
TOP

Related Classes of org.apache.accumulo.server.master.LogSort$SortCommit

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.