Package com.twitter.elephantbird.mapreduce.io

Examples of com.twitter.elephantbird.mapreduce.io.RawBlockWriter


*/
public class LzoBinaryBlockOutputFormat extends LzoOutputFormat<byte[], RawBytesWritable> {
  @Override
  public RecordWriter<byte[], RawBytesWritable> getRecordWriter(TaskAttemptContext job)
      throws IOException, InterruptedException {
    return new LzoBinaryBlockRecordWriter<byte[], RawBytesWritable>(new RawBlockWriter(
        getOutputStream(job)));
  }
View Full Code Here


    final File inDir = new File(testDir, "in");
    inDir.mkdirs();

    // block writer
    RawBlockWriter blk_writer = new RawBlockWriter(createLzoOut(new File(inDir, "1-block.lzo"), conf));
    //b64 writer
    OutputStream b64_writer = createLzoOut(new File(inDir, "2-b64.lzo"), conf);

    Base64 base64 = Codecs.createStandardBase64();

    for (TestPerson rec : records) {
      //write a regular record and an empty record
      byte[] bytes = tConverter.toBytes(rec);
      blk_writer.write(bytes);
      blk_writer.write(new byte[0]);
      b64_writer.write(base64.encode(bytes));
      b64_writer.write(Protobufs.NEWLINE_UTF8_BYTE);
      b64_writer.write(Protobufs.NEWLINE_UTF8_BYTE); // empty line.
    }
    blk_writer.close();
    b64_writer.close();
    // end of initialization.

    pigServer.registerQuery(String.format(
        "A = load '%s' using %s('%s');\n",
View Full Code Here

    final File inDir = new File(testDir, "in");
    inDir.mkdirs();

    // create input with 100 records with 10% of records with errors.

    RawBlockWriter blk_writer = new RawBlockWriter(createLzoOut(new File(inDir, "1-block.lzo"), conf));

    TestPerson person = records[records.length - 1];
    String expectedStr = personToString(person);
    byte[] properRec = tConverter.toBytes(person);
    byte[] truncatedRec = Arrays.copyOfRange(properRec, 0, properRec.length*3/4);

    final int totalRecords = 100;
    final int pctErrors = 10;
    final int totalErrors = totalRecords * pctErrors / 100;
    final int goodRecords = totalRecords - totalErrors;

    int corruptIdx = new Random().nextInt(10);
    for(int i=0; i<totalRecords; i++) {
      blk_writer.write((i%10 == corruptIdx) ? truncatedRec : properRec);
    }
    blk_writer.close();

    String[] expectedRows = new String[goodRecords];
    for (int i=0; i<goodRecords; i++){
      expectedRows[i] = expectedStr;
    }
View Full Code Here

TOP

Related Classes of com.twitter.elephantbird.mapreduce.io.RawBlockWriter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.