Package com.manning.hip.ch8

Source Code of com.manning.hip.ch8.BinaryFilenameInputFormat

package com.manning.hip.ch8;

import com.manning.hip.ch3.passwd.Passwd;
import com.manning.hip.common.HadoopCompat;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;

import java.io.IOException;

/**
* An {@link org.apache.hadoop.mapreduce.InputFormat} for
* working with binary files in Streaming.
* <p/>
* Keys are the filename ostensibly byte offsets in the file, and values
* are {@link com.manning.hip.ch3.passwd.Passwd} objects.
*/
public class BinaryFilenameInputFormat extends
    FileInputFormat<LongWritable, Passwd> {

  @Override
  public RecordReader<LongWritable, Passwd>
  createRecordReader(InputSplit split,
                     TaskAttemptContext context) {
    return new PasswdRecordReader();
  }

  @Override
  protected boolean isSplitable(JobContext context, Path file) {
    CompressionCodec codec =
        new CompressionCodecFactory(HadoopCompat.getConfiguration(context))
            .getCodec(file);
    return codec == null;
  }

  public static class PasswdRecordReader
      extends RecordReader<LongWritable, Passwd> {
    public final static String PASSWD_LINE_SEPARATOR = ":";
    private LineRecordReader reader = new LineRecordReader();
    private Passwd value;

    @Override
    public void initialize(InputSplit split,
                           TaskAttemptContext context)
        throws IOException, InterruptedException {
      reader.initialize(split, context);
    }

    @Override
    public boolean nextKeyValue()
        throws IOException, InterruptedException {
      if (reader.nextKeyValue()) {
        parseLine();
        return true;
      } else {
        value = null;
        return false;
      }
    }

    private void parseLine() {
      String line = reader.getCurrentValue().toString();
      String[] tokens =
          StringUtils.splitPreserveAllTokens(line,
              PASSWD_LINE_SEPARATOR);
      value = new Passwd(
          StringUtils.trimToNull(tokens[0]),
          StringUtils.trimToNull(tokens[1]),
          StringUtils.trimToNull(tokens[2]) == null ?
              null : Long.valueOf(tokens[2]),
          StringUtils.trimToNull(tokens[3]) == null ?
              null : Long.valueOf(tokens[3]),
          StringUtils.trimToNull(tokens[4]),
          StringUtils.trimToNull(tokens[5]),
          StringUtils.trimToNull(tokens[6])
      );
    }

    @Override
    public LongWritable getCurrentKey()
        throws IOException, InterruptedException {
      return reader.getCurrentKey();
    }

    @Override
    public Passwd getCurrentValue()
        throws IOException, InterruptedException {
      return value;
    }

    @Override
    public float getProgress()
        throws IOException, InterruptedException {
      return reader.getProgress();
    }

    @Override
    public void close() throws IOException {
      reader.close();
    }
  }

}
TOP

Related Classes of com.manning.hip.ch8.BinaryFilenameInputFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.