ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = OrcStruct.createObjectInspector(0, types);
}
HiveDecimal maxValue = new HiveDecimal("100000000000000000000");
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf)
.inspector(inspector)
.stripeSize(1000)
.compress(CompressionKind.NONE)
.bufferSize(100)
.blockPadding(false));
OrcStruct row = new OrcStruct(3);
OrcUnion union = new OrcUnion();
row.setFieldValue(1, union);
row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00"));
HiveDecimal value = new HiveDecimal("12345678.6547456");
row.setFieldValue(2, value);
union.set((byte) 0, new IntWritable(42));
writer.addRow(row);
row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789"));
union.set((byte) 1, new Text("hello"));
value = new HiveDecimal("-5643.234");
row.setFieldValue(2, value);
writer.addRow(row);
row.setFieldValue(0, null);
row.setFieldValue(1, null);
row.setFieldValue(2, null);
writer.addRow(row);
row.setFieldValue(1, union);
union.set((byte) 0, null);
writer.addRow(row);
union.set((byte) 1, null);
writer.addRow(row);
union.set((byte) 0, new IntWritable(200000));
row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00"));
value = new HiveDecimal("100000000000000000000");
row.setFieldValue(2, value);
writer.addRow(row);
Random rand = new Random(42);
for(int i=1900; i < 2200; ++i) {
row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i));
if ((i & 1) == 0) {
union.set((byte) 0, new IntWritable(i*i));
} else {
union.set((byte) 1, new Text(new Integer(i*i).toString()));
}
value = new HiveDecimal(new BigInteger(118, rand),
rand.nextInt(36));
row.setFieldValue(2, value);
if (maxValue.compareTo(value) < 0) {
maxValue = value;
}
writer.addRow(row);
}
// let's add a lot of constant rows to test the rle
row.setFieldValue(0, null);
union.set((byte) 0, new IntWritable(1732050807));
row.setFieldValue(2, null);
for(int i=0; i < 5000; ++i) {
writer.addRow(row);
}
union.set((byte) 0, new IntWritable(0));
writer.addRow(row);
union.set((byte) 0, new IntWritable(10));
writer.addRow(row);
union.set((byte) 0, new IntWritable(138));
writer.addRow(row);
writer.close();
Reader reader = OrcFile.createReader(fs, testFilePath);
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(5309, reader.getNumberOfRows());
DecimalColumnStatistics stats =
(DecimalColumnStatistics) reader.getStatistics()[5];
assertEquals(303, stats.getNumberOfValues());
assertEquals(new HiveDecimal("-5643.234"), stats.getMinimum());
assertEquals(maxValue, stats.getMaximum());
assertEquals(null, stats.getSum());
int stripeCount = 0;
int rowCount = 0;
long currentOffset = -1;
for(StripeInformation stripe: reader.getStripes()) {
stripeCount += 1;
rowCount += stripe.getNumberOfRows();
if (currentOffset < 0) {
currentOffset = stripe.getOffset() + stripe.getIndexLength() +
stripe.getDataLength() + stripe.getFooterLength();
} else {
assertEquals(currentOffset, stripe.getOffset());
currentOffset += stripe.getIndexLength() +
stripe.getDataLength() + stripe.getFooterLength();
}
}
assertEquals(reader.getNumberOfRows(), rowCount);
assertEquals(2, stripeCount);
assertEquals(reader.getContentLength(), currentOffset);
RecordReader rows = reader.rows(null);
assertEquals(0, rows.getRowNumber());
assertEquals(0.0, rows.getProgress(), 0.000001);
assertEquals(true, rows.hasNext());
row = (OrcStruct) rows.next(null);
assertEquals(1, rows.getRowNumber());
inspector = reader.getObjectInspector();
assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal>",
inspector.getTypeName());
assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
row.getFieldValue(0));
union = (OrcUnion) row.getFieldValue(1);
assertEquals(0, union.getTag());
assertEquals(new IntWritable(42), union.getObject());
assertEquals(new HiveDecimal("12345678.6547456"), row.getFieldValue(2));
row = (OrcStruct) rows.next(row);
assertEquals(2, rows.getRowNumber());
assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
row.getFieldValue(0));
assertEquals(1, union.getTag());
assertEquals(new Text("hello"), union.getObject());
assertEquals(new HiveDecimal("-5643.234"), row.getFieldValue(2));
row = (OrcStruct) rows.next(row);
assertEquals(null, row.getFieldValue(0));
assertEquals(null, row.getFieldValue(1));
assertEquals(null, row.getFieldValue(2));
row = (OrcStruct) rows.next(row);
assertEquals(null, row.getFieldValue(0));
union = (OrcUnion) row.getFieldValue(1);
assertEquals(0, union.getTag());
assertEquals(null, union.getObject());
assertEquals(null, row.getFieldValue(2));
row = (OrcStruct) rows.next(row);
assertEquals(null, row.getFieldValue(0));
assertEquals(1, union.getTag());
assertEquals(null, union.getObject());
assertEquals(null, row.getFieldValue(2));
row = (OrcStruct) rows.next(row);
assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"),
row.getFieldValue(0));
assertEquals(new IntWritable(200000), union.getObject());
assertEquals(new HiveDecimal("100000000000000000000"),
row.getFieldValue(2));
rand = new Random(42);
for(int i=1900; i < 2200; ++i) {
row = (OrcStruct) rows.next(row);
assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
row.getFieldValue(0));
if ((i & 1) == 0) {
assertEquals(0, union.getTag());
assertEquals(new IntWritable(i*i), union.getObject());
} else {
assertEquals(1, union.getTag());
assertEquals(new Text(new Integer(i*i).toString()), union.getObject());
}
assertEquals(new HiveDecimal(new BigInteger(118, rand),
rand.nextInt(36)), row.getFieldValue(2));
}
for(int i=0; i < 5000; ++i) {
row = (OrcStruct) rows.next(row);
assertEquals(new IntWritable(1732050807), union.getObject());
}
row = (OrcStruct) rows.next(row);
assertEquals(new IntWritable(0), union.getObject());
row = (OrcStruct) rows.next(row);
assertEquals(new IntWritable(10), union.getObject());
row = (OrcStruct) rows.next(row);
assertEquals(new IntWritable(138), union.getObject());
assertEquals(false, rows.hasNext());
assertEquals(1.0, rows.getProgress(), 0.00001);
assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
rows.seekToRow(1);
row = (OrcStruct) rows.next(row);
assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
row.getFieldValue(0));
assertEquals(1, union.getTag());
assertEquals(new Text("hello"), union.getObject());
assertEquals(new HiveDecimal("-5643.234"), row.getFieldValue(2));
rows.close();
}