Package org.honu.inputtools.converter

Source Code of org.honu.inputtools.converter.CmdLineConverter

package org.honu.inputtools.converter;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.UnknownHostException;

import org.apache.hadoop.chukwa.ChukwaArchiveKey;
import org.apache.hadoop.chukwa.Chunk;
import org.apache.hadoop.chukwa.ChunkBuilder;
import org.apache.hadoop.chukwa.ChunkImpl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.util.ReflectionUtils;

public class CmdLineConverter {

  static String localHostAddr = null;
 
  static {
    try {
      localHostAddr = InetAddress.getLocalHost().getHostName();
    } catch (UnknownHostException e) {
      localHostAddr = "N/A";
    }
  }
 
  /**
   * @param args
   * @throws ClassNotFoundException
   */
  @SuppressWarnings("unchecked")
  public static void main(String[] args) throws ClassNotFoundException {
   
    if (args.length != 3) {
      System.out.println("java org.honu.inputtools.converter.CmdLineConverter <dataType> <codec> <outputFile>");
      System.out.println("codec: NONE , for uncompressed seqFile");
      System.out.println("codec: org.apache.hadoop.io.compress.GzipCodec , for GZIP compressed seqFile");
      System.out.println("codec: org.apache.hadoop.io.compress.LzoCodec , for LZO compressed seqFile");
     
      System.exit(-1);
    }
   
    String dataType = args[0];
    String codecClass = args[1];
    String outpFileName = args[2];
   
    if (codecClass.equalsIgnoreCase("none")) {
      codecClass = null;
    }
    int lineCount = 0;
    Path newOutputPath = null;
   
    try {
      BufferedReader in = new BufferedReader(new InputStreamReader(System.in));

      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.getLocal(conf);
      newOutputPath = new Path(outpFileName);

      CompressionCodec codec = null;
      if (codecClass != null) {
        Class classDefinition =  Class.forName(codecClass);
        codec =  (CompressionCodec) ReflectionUtils.newInstance(classDefinition, conf);       
      }
     
        FSDataOutputStream newOutputStr = fs.create(newOutputPath);
      SequenceFile.Writer  seqFileWriter = null;
     
      if (codec != null) {
        seqFileWriter = SequenceFile.createWriter(conf, newOutputStr,
            ChukwaArchiveKey.class, ChunkImpl.class,
            SequenceFile.CompressionType.BLOCK, codec);
      } else {
        seqFileWriter = SequenceFile.createWriter(conf, newOutputStr,
            ChukwaArchiveKey.class, ChunkImpl.class,
            SequenceFile.CompressionType.NONE, codec);
      }
     
     
      String str = null;
      ChunkBuilder cb = null;
    
      do
      {
        str = in.readLine();
       
        if (str != null) {
          lineCount ++;
          if (cb == null) {
            cb = new ChunkBuilder();
          }
          cb.addRecord(str.getBytes());
          if (lineCount%300 == 0) {
            append(seqFileWriter,getChunk(cb,dataType));
            cb = null;
          }
        }  
      } while (str != null);
     
      if (cb != null) {
        append(seqFileWriter,getChunk(cb,dataType));
      }
     
      seqFileWriter.close();
      newOutputStr.close();
     
    } catch (Throwable e) {
      e.printStackTrace();
      System.exit(-1);
    }
    System.out.println(new java.util.Date() + ", CmdLineConverter [" + dataType + "] [" +  newOutputPath + "], Total lineCount: " + lineCount);
    System.exit(0);
  }
 
  public static void append(SequenceFile.Writer  seqFileWriter, Chunk chunk) throws Throwable {
    ChukwaArchiveKey archiveKey = new ChukwaArchiveKey();
    archiveKey.setTimePartition(System.currentTimeMillis());
    archiveKey.setDataType(chunk.getDataType());
    archiveKey.setStreamName("CmdLineConverter");
    archiveKey.setSeqId(chunk.getSeqID());
    seqFileWriter.append(archiveKey, chunk);
  }
 
  public static Chunk getChunk(ChunkBuilder cb,String dataType) {
    Chunk c = cb.getChunk();
    c.setApplication("CmdLineConverter");
    c.setDataType(dataType);
    c.setSeqID(System.currentTimeMillis());
    c.setSource(localHostAddr);
    return c;
  }
}
TOP

Related Classes of org.honu.inputtools.converter.CmdLineConverter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.