Package com.facebook.hive.orc

Source Code of com.facebook.hive.orc.TestOrcFile

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.facebook.hive.orc;

import static com.facebook.hive.orc.OrcTestUtils.byteBuf;
import static com.facebook.hive.orc.OrcTestUtils.bytes;
import static com.facebook.hive.orc.OrcTestUtils.inner;
import static com.facebook.hive.orc.OrcTestUtils.list;
import static com.facebook.hive.orc.OrcTestUtils.map;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertNotNull;
import static junit.framework.Assert.assertNull;
import static junit.framework.Assert.assertTrue;

import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;

import com.facebook.hive.orc.compression.CompressionKind;
import com.facebook.hive.orc.statistics.BooleanColumnStatistics;
import com.facebook.hive.orc.statistics.ColumnStatistics;
import com.facebook.hive.orc.statistics.DoubleColumnStatistics;
import com.facebook.hive.orc.statistics.IntegerColumnStatistics;
import com.facebook.hive.orc.statistics.StringColumnStatistics;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;

import com.facebook.hive.orc.OrcTestUtils.BigRow;
import com.facebook.hive.orc.OrcTestUtils.DoubleRow;
import com.facebook.hive.orc.OrcTestUtils.InnerStruct;
import com.facebook.hive.orc.OrcTestUtils.IntStruct;
import com.facebook.hive.orc.OrcTestUtils.MiddleStruct;
import com.facebook.hive.orc.OrcTestUtils.ReallyBigRow;
import com.facebook.hive.orc.OrcTestUtils.StringListWithId;
import com.facebook.hive.orc.OrcTestUtils.StringStruct;
import com.facebook.hive.orc.lazy.LazyTreeReader;
import com.facebook.hive.orc.lazy.OrcLazyBinary;
import com.facebook.hive.orc.lazy.OrcLazyBoolean;
import com.facebook.hive.orc.lazy.OrcLazyByte;
import com.facebook.hive.orc.lazy.OrcLazyDouble;
import com.facebook.hive.orc.lazy.OrcLazyFloat;
import com.facebook.hive.orc.lazy.OrcLazyInt;
import com.facebook.hive.orc.lazy.OrcLazyList;
import com.facebook.hive.orc.lazy.OrcLazyLong;
import com.facebook.hive.orc.lazy.OrcLazyMap;
import com.facebook.hive.orc.lazy.OrcLazyObject;
import com.facebook.hive.orc.lazy.OrcLazyObjectInspectorUtils;
import com.facebook.hive.orc.lazy.OrcLazyRow;
import com.facebook.hive.orc.lazy.OrcLazyShort;
import com.facebook.hive.orc.lazy.OrcLazyString;
import com.facebook.hive.orc.lazy.OrcLazyStruct;
import com.facebook.hive.orc.lazy.OrcLazyTimestamp;
import com.facebook.hive.orc.lazy.OrcLazyUnion;

/**
* Tests for the top level reader/streamFactory of ORC files.
*/
public class TestOrcFile {

  Path workDir = new Path(System.getProperty("test.tmp.dir",
      "target" + File.separator + "test" + File.separator + "tmp"));

  Configuration conf;
  FileSystem fs;
  Path testFilePath, testFilePath2;

  @Rule
  public TestName testCaseName = new TestName();

  @Before
  public void openFileSystem () throws Exception {
    conf = new Configuration();
    fs = FileSystem.getLocal(conf);
    testFilePath = new Path(workDir, "TestOrcFile." +
        testCaseName.getMethodName() + ".orc");
    testFilePath2 = new Path(workDir, "TestOrcFile2." +
        testCaseName.getMethodName() + ".orc");
    fs.delete(testFilePath, false);
    fs.delete(testFilePath2, false);
  }

  @Test
  public void testHash() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        100000, CompressionKind.ZLIB, 10000, 10000);
    writer.addRow(new BigRow(false, (byte) 1, (short) 1, 1,
        1L, (float) 1.0, 1.0, bytes(1), "1",
        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
        list(inner(3, "good"), inner(4, "bad")),
        map(inner(3, "good"), inner(4, "bad"))));
    writer.addRow(new BigRow(null, null, null, null,
        null, null, null, null, null, null, null, null));
    writer.close();
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(1, row.getFieldValue(0).hashCode());
    assertEquals(1, row.getFieldValue(1).hashCode());
    assertEquals(1, row.getFieldValue(2).hashCode());
    assertEquals(1, row.getFieldValue(3).hashCode());
    assertEquals(1, row.getFieldValue(4).hashCode());
    assertEquals(1065353216, row.getFieldValue(5).hashCode());
    assertEquals(1072693248, row.getFieldValue(6).hashCode());
    assertEquals(32, row.getFieldValue(7).hashCode());
    assertEquals(80, row.getFieldValue(8).hashCode());
    assertEquals(8417130, row.getFieldValue(9).hashCode());
    assertEquals(127296452, row.getFieldValue(10).hashCode());
    assertEquals(7, row.getFieldValue(11).hashCode());

    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(0, row.getFieldValue(0).hashCode());
    assertEquals(0, row.getFieldValue(1).hashCode());
    assertEquals(0, row.getFieldValue(2).hashCode());
    assertEquals(0, row.getFieldValue(3).hashCode());
    assertEquals(0, row.getFieldValue(4).hashCode());
    assertEquals(0, row.getFieldValue(5).hashCode());
    assertEquals(0, row.getFieldValue(6).hashCode());
    assertEquals(0, row.getFieldValue(7).hashCode());
    assertEquals(0, row.getFieldValue(8).hashCode());
    assertEquals(0, row.getFieldValue(9).hashCode());
    assertEquals(0, row.getFieldValue(10).hashCode());
    assertEquals(0, row.getFieldValue(11).hashCode());
  }

  @Test
  public void testDeepCopy() throws Exception {
    // Create a table and write a row to it
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        100000, CompressionKind.ZLIB, 10000, 10000);
    writer.addRow(new BigRow(false, (byte) 1, (short) 1, 1,
        1L, (float) 1.0, 1.0, bytes(1), "1",
        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
        list(inner(3, "good"), inner(4, "bad")),
        map(inner(3, "good"), inner(4, "bad"))));

    writer.close();

    // Prepare to tread back the row
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();

    // Check that the object read equals what is expected, then copy the object, and make the same
    // check
    OrcLazyObject obj;
    assertEquals(false,
        ((BooleanWritable) ((OrcLazyBoolean) row.getFieldValue(0)).materialize()).get());
    obj = new OrcLazyBoolean((OrcLazyBoolean) row.getFieldValue(0));
    assertEquals(false, ((BooleanWritable) obj.materialize()).get());

    assertEquals(1, ((ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize()).get());
    obj = new OrcLazyByte((OrcLazyByte) row.getFieldValue(1));
    assertEquals(1, ((ByteWritable) obj.materialize()).get());

    assertEquals(1, ((ShortWritable) ((OrcLazyShort) row.getFieldValue(2)).materialize()).get());
    obj = new OrcLazyShort((OrcLazyShort) row.getFieldValue(2));
    assertEquals(1, ((ShortWritable) obj.materialize()).get());

    assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(3)).materialize()).get());
    obj = new OrcLazyInt((OrcLazyInt) row.getFieldValue(3));
    assertEquals(1, ((IntWritable) obj.materialize()).get());

    assertEquals(1, ((LongWritable) ((OrcLazyLong) row.getFieldValue(4)).materialize()).get());
    obj = new OrcLazyLong((OrcLazyLong) row.getFieldValue(4));
    assertEquals(1, ((LongWritable) obj.materialize()).get());

    assertEquals(1.0f,
        ((FloatWritable) ((OrcLazyFloat) row.getFieldValue(5)).materialize()).get());
    obj = new OrcLazyFloat((OrcLazyFloat) row.getFieldValue(5));
    assertEquals(1.0f, ((FloatWritable) obj.materialize()).get());

    assertEquals(1.0,
        ((DoubleWritable) ((OrcLazyDouble) row.getFieldValue(6)).materialize()).get());
    obj = new OrcLazyDouble((OrcLazyDouble) row.getFieldValue(6));
    assertEquals(1.0, ((DoubleWritable) obj.materialize()).get());

    assertEquals(bytes(1), ((OrcLazyBinary) row.getFieldValue(7)).materialize());
    obj = new OrcLazyBinary((OrcLazyBinary) row.getFieldValue(7));
    assertEquals(bytes(1), obj.materialize());

    assertEquals("1", ((Text) ((OrcLazyString) row.getFieldValue(8)).materialize()).toString());
    obj = new OrcLazyString((OrcLazyString) row.getFieldValue(8));
    assertEquals("1", ((Text) obj.materialize()).toString());

    // Currently copies are not supported for complex types
  }

  @Test
  public void testSeekAcrossChunks() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (DoubleRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }

    // Create a table consisting of a single column of doubles
    // Add enough values to it to get 3 index strides (doubles are 8 bytes) more is ok
    // Note that the compression buffer size and index stride length are very important
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 2097152,
        CompressionKind.ZLIB, 262144, 10000);
    Random rand = new Random(42);
    double[] values = new double[131702];

    // The first compression block is all 0's
    for (int i = 0; i < 32768; i++) {
      values[i] = 0;
      writer.addRow(new DoubleRow(values[i]));
    }

    // The second compression block is random doubles
    for (int i = 0; i < 32768; i++) {
      values[i + 32768] = rand.nextDouble();
      writer.addRow(new DoubleRow(values[i + 32768]));
    }

    // The third compression block is all 0's
    // (important so it compresses to the same size as the first)
    for (int i = 0; i < 32768; i++) {
      values[i + 32768 + 32768] = 0;
      writer.addRow(new DoubleRow(values[i + 32768 + 32768]));
    }

    // The fourth compression block is random
    for (int i = 0; i < 32768; i++) {
      values[i + 32768 + 32768 + 32768] = rand.nextDouble();
      writer.addRow(new DoubleRow(values[i + 32768 + 32768 + 32768]));
    }

    writer.close();
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_READ_COMPRESSION_STRIDES, 2);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, false);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);

    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    DoubleObjectInspector columnInspector =
        (DoubleObjectInspector) fields.get(0).getFieldObjectInspector();

    RecordReader rows = reader.rows(null);
    Object row = null;

    // Skip enough values to get to the 2nd index stride in the first chunk
    for (int i = 0; i < 40001; i++) {
      row = rows.next(row);
    }

    // This will set previousOffset to be the size of the first compression block and the
    // compressionOffset to some other value (doesn't matter what point is it's different from the
    // start of the compression block)
    assertEquals(values[40000], columnInspector.get(readerInspector.getStructFieldData(row,
        fields.get(0))));

    // Skip enough values to get to the 2nd index stride of the second chunk
    for (int i = 0; i < 80000; i++) {
      rows.next(row);
    }

    // When seek is called, previousOffset will equal newCompressedOffset since the former is the
    // the length of the first compression block and the latter is the length of the third
    // compression block (remember the chunks contain 2 index strides), so if we only check this
    // (or for some other reason) we will not adjust compressedIndex, we will read the wrong data
    assertEquals(values[120000], columnInspector.get(readerInspector.getStructFieldData(row, fields.get(0))));

    rows.close();
  }

  @Test
  public void test1() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        100000, CompressionKind.ZLIB, 10000, 10000);
    writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536,
        Long.MAX_VALUE, (float) 1.0, -15.0, bytes(0,1,2,3,4), "hi",
        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
        list(inner(3, "good"), inner(4, "bad")),
        map()));
    writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536,
        Long.MAX_VALUE, (float) 2.0, -5.0, bytes(), "bye",
        new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
        list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
        map(inner(5,"chani"), inner(1,"mauddib"))));
    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);

    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(2, stats[1].getNumberOfValues());
    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
    assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
    assertEquals("count: 2 true: 1", stats[1].toString());

    assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
    assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
    assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
    assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
    assertEquals("count: 2 min: 1024 max: 2048 sum: 3072",
        stats[3].toString());

    assertEquals(Long.MAX_VALUE,
        ((IntegerColumnStatistics) stats[5]).getMaximum());
    assertEquals(Long.MAX_VALUE,
        ((IntegerColumnStatistics) stats[5]).getMinimum());
    assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
    assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807",
        stats[5].toString());

    assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
    assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
    assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
    assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0",
        stats[7].toString());

    assertEquals("count: 2 min: bye max: hi", stats[9].toString());

    // check the inspectors
    StructObjectInspector readerInspector =
        (StructObjectInspector) reader.getObjectInspector();
    assertEquals(ObjectInspector.Category.STRUCT,
        readerInspector.getCategory());
    assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
        + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
        + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
        + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
        + "map:map<string,struct<int1:int,string1:string>>>",
        readerInspector.getTypeName());
    List<? extends StructField> fields =
        readerInspector.getAllStructFieldRefs();
    BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.
        getStructFieldRef("boolean1").getFieldObjectInspector();
    ByteObjectInspector by = (ByteObjectInspector) readerInspector.
        getStructFieldRef("byte1").getFieldObjectInspector();
    ShortObjectInspector sh = (ShortObjectInspector) readerInspector.
        getStructFieldRef("short1").getFieldObjectInspector();
    IntObjectInspector in = (IntObjectInspector) readerInspector.
        getStructFieldRef("int1").getFieldObjectInspector();
    LongObjectInspector lo = (LongObjectInspector) readerInspector.
        getStructFieldRef("long1").getFieldObjectInspector();
    FloatObjectInspector fl = (FloatObjectInspector) readerInspector.
        getStructFieldRef("float1").getFieldObjectInspector();
    DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.
        getStructFieldRef("double1").getFieldObjectInspector();
    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.
        getStructFieldRef("bytes1").getFieldObjectInspector();
    StringObjectInspector st = (StringObjectInspector) readerInspector.
        getStructFieldRef("string1").getFieldObjectInspector();
    StructObjectInspector mid = (StructObjectInspector) readerInspector.
        getStructFieldRef("middle").getFieldObjectInspector();
    List<? extends StructField> midFields =
        mid.getAllStructFieldRefs();
    ListObjectInspector midli =
        (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
    StructObjectInspector inner = (StructObjectInspector)
        midli.getListElementObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector inner_in = (IntObjectInspector) inFields.get(0).getFieldObjectInspector();
    StringObjectInspector inner_st = (StringObjectInspector) inFields.get(1).getFieldObjectInspector();
    ListObjectInspector li = (ListObjectInspector) readerInspector.
        getStructFieldRef("list").getFieldObjectInspector();
    MapObjectInspector ma = (MapObjectInspector) readerInspector.
        getStructFieldRef("map").getFieldObjectInspector();
    StructObjectInspector lc = (StructObjectInspector)
        li.getListElementObjectInspector();
    StringObjectInspector mk = (StringObjectInspector)
        ma.getMapKeyObjectInspector();
    StructObjectInspector mv = (StructObjectInspector)
        ma.getMapValueObjectInspector();
    RecordReader rows = reader.rows(null);
    Object row = rows.next(null);
    assertNotNull(row);
    // check the contents of the first row
    assertEquals(false,
        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(1, by.get(readerInspector.getStructFieldData(row,
        fields.get(1))));
    assertEquals(1024, sh.get(readerInspector.getStructFieldData(row,
        fields.get(2))));
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
        fields.get(3))));
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
        getStructFieldData(row, fields.get(4))));
    assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row,
        fields.get(5))), 0.00001);
    assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row,
        fields.get(6))), 0.00001);
    assertEquals(bytes(0,1,2,3,4), bi.getPrimitiveWritableObject(
        readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.
        getStructFieldData(row, fields.get(8))));
    List<?> midRow = midli.getList(mid.getStructFieldData(readerInspector.
        getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertNotNull(midRow);
    assertEquals(2, midRow.size());
    assertEquals(1, inner_in.get(inner.getStructFieldData(midRow.get(0),
        inFields.get(0))));
    assertEquals("bye", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (midRow.get(0), inFields.get(1))));
    assertEquals(2, inner_in.get(inner.getStructFieldData(midRow.get(1),
        inFields.get(0))));
    assertEquals("sigh", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (midRow.get(1), inFields.get(1))));
    List<?> list = li.getList(readerInspector.getStructFieldData(row,
        fields.get(10)));
    assertEquals(2, list.size());
    assertEquals(3, inner_in.get(inner.getStructFieldData(list.get(0),
        inFields.get(0))));
    assertEquals("good", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(0), inFields.get(1))));
    assertEquals(4, inner_in.get(inner.getStructFieldData(list.get(1),
        inFields.get(0))));
    assertEquals("bad", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(1), inFields.get(1))));
    Map<?,?> map = ma.getMap(readerInspector.getStructFieldData(row,
        fields.get(11)));
    assertEquals(0, map.size());

    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
    assertEquals(true,
        bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals(100, by.get(readerInspector.getStructFieldData(row,
        fields.get(1))));
    assertEquals(2048, sh.get(readerInspector.getStructFieldData(row,
        fields.get(2))));
    assertEquals(65536, in.get(readerInspector.getStructFieldData(row,
        fields.get(3))));
    assertEquals(Long.MAX_VALUE, lo.get(readerInspector.
        getStructFieldData(row, fields.get(4))));
    assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row,
        fields.get(5))), 0.00001);
    assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row,
        fields.get(6))), 0.00001);
    assertEquals(bytes(), bi.getPrimitiveWritableObject(
        readerInspector.getStructFieldData(row, fields.get(7))));
    assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.
        getStructFieldData(row, fields.get(8))));
    midRow = midli.getList(mid.getStructFieldData(readerInspector.
        getStructFieldData(row, fields.get(9)), midFields.get(0)));
    assertNotNull(midRow);
    assertEquals(2, midRow.size());
    assertEquals(1, inner_in.get(inner.getStructFieldData(midRow.get(0),
        inFields.get(0))));
    assertEquals("bye", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (midRow.get(0), inFields.get(1))));
    assertEquals(2, inner_in.get(inner.getStructFieldData(midRow.get(1),
        inFields.get(0))));
    assertEquals("sigh", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (midRow.get(1), inFields.get(1))));
    list = li.getList(readerInspector.getStructFieldData(row,
        fields.get(10)));
    assertEquals(3, list.size());
    assertEquals(100000000, inner_in.get(inner.getStructFieldData(list.get(0),
        inFields.get(0))));
    assertEquals("cat", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(0), inFields.get(1))));
    assertEquals(-100000, inner_in.get(inner.getStructFieldData(list.get(1),
        inFields.get(0))));
    assertEquals("in", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(1), inFields.get(1))));
    assertEquals(1234, inner_in.get(inner.getStructFieldData(list.get(2),
        inFields.get(0))));
    assertEquals("hat", inner_st.getPrimitiveJavaObject(inner.getStructFieldData
        (list.get(2), inFields.get(1))));
    map = ma.getMap(readerInspector.getStructFieldData(row,
        fields.get(11)));
    assertEquals(2, map.size());
    boolean[] found = new boolean[2];
    for(Object key: map.keySet()) {
      String str = mk.getPrimitiveJavaObject(key);
      if (str.equals("chani")) {
        assertEquals(false, found[0]);
        assertEquals(5, inner_in.get(inner.getStructFieldData(map.get(key),
            inFields.get(0))));
        assertEquals(str, inner_st.getPrimitiveJavaObject(
            inner.getStructFieldData(map.get(key), inFields.get(1))));
        found[0] = true;
      } else if (str.equals("mauddib")) {
        assertEquals(false, found[1]);
        assertEquals(1, inner_in.get(inner.getStructFieldData(map.get(key),
            inFields.get(0))));
        assertEquals(str, inner_st.getPrimitiveJavaObject(
            inner.getStructFieldData(map.get(key), inFields.get(1))));
        found[1] = true;
      } else {
        throw new IllegalArgumentException("Unknown key " + str);
      }
    }
    assertEquals(true, found[0]);
    assertEquals(true, found[1]);

    // handle the close up
    assertEquals(false, rows.hasNext());
    rows.close();
  }

  @Test
  public void testColumnProjection() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (InnerStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        1000, CompressionKind.NONE, 100, 1000);
    Random r1 = new Random(1);
    Random r2 = new Random(2);
    int x;
    int minInt=0, maxInt=0;
    String y;
    String minStr = null, maxStr = null;
    for(int i=0; i < 21000; ++i) {
      x = r1.nextInt();
      y = Long.toHexString(r2.nextLong());
      if (i == 0 || x < minInt) {
        minInt = x;
      }
      if (i == 0 || x > maxInt) {
        maxInt = x;
      }
      if (i == 0 || y.compareTo(minStr) < 0) {
        minStr = y;
      }
      if (i == 0 || y.compareTo(maxStr) > 0) {
        maxStr = y;
      }
      writer.addRow(inner(x, y));
    }
    writer.close();
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);

    // check out the statistics
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(3, stats.length);
    for(ColumnStatistics s: stats) {
      assertEquals(21000, s.getNumberOfValues());
      if (s instanceof IntegerColumnStatistics) {
        assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
        assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
      } else if (s instanceof StringColumnStatistics) {
        assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
        assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
      }
    }

    // check out the types
    List<OrcProto.Type> types = reader.getTypes();
    assertEquals(3, types.size());
    assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
    assertEquals(2, types.get(0).getSubtypesCount());
    assertEquals(1, types.get(0).getSubtypes(0));
    assertEquals(2, types.get(0).getSubtypes(1));
    assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
    assertEquals(0, types.get(1).getSubtypesCount());
    assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
    assertEquals(0, types.get(2).getSubtypesCount());

    // read the contents and make sure they match
    RecordReader rows1 = reader.rows(new boolean[]{true, true, false});
    RecordReader rows2 = reader.rows(new boolean[]{true, false, true});
    r1 = new Random(1);
    r2 = new Random(2);
    OrcLazyStruct row1 = null;
    OrcLazyStruct row2 = null;
    for(int i = 0; i < 21000; ++i) {
      assertEquals(true, rows1.hasNext());
      assertEquals(true, rows2.hasNext());
      row1 = (OrcLazyStruct) rows1.next(row1);
      row2 = (OrcLazyStruct) rows2.next(row2);
      assertEquals(r1.nextInt(), ((IntWritable) ((OrcLazyInt) ((OrcStruct) row1.materialize()).getFieldValue(0)).materialize()).get());
      assertEquals(Long.toHexString(r2.nextLong()),
          ((OrcLazyString) ((OrcStruct) row2.materialize()).getFieldValue(1)).materialize().toString());
    }
    assertEquals(false, rows1.hasNext());
    assertEquals(false, rows2.hasNext());
    rows1.close();
    rows2.close();
  }

  @Test
  public void testEmptyFile() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        1000, CompressionKind.NONE, 100, 10000);
    writer.close();
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(false, reader.rows(null).hasNext());
    assertEquals(CompressionKind.NONE, reader.getCompression());
    assertEquals(0, reader.getNumberOfRows());
    assertEquals(0, reader.getCompressionSize());
    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    assertEquals(3, reader.getContentLength());
    assertEquals(false, reader.getStripes().iterator().hasNext());
  }

  @Test
  public void testMetaData() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        1000, CompressionKind.NONE, 100, 10000);
    writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128));
    writer.addUserMetadata("clobber", byteBuf(1,2,3));
    writer.addUserMetadata("clobber", byteBuf(4,3,2,1));
    ByteBuffer bigBuf = ByteBuffer.allocate(40000);
    Random random = new Random(0);
    random.nextBytes(bigBuf.array());
    writer.addUserMetadata("big", bigBuf);
    bigBuf.position(0);
    writer.addRow(new BigRow(true, (byte) 127, (short) 1024, 42,
        42L * 1024 * 1024 * 1024, (float) 3.1415, -2.713, null,
        null, null, null, null));
    writer.addUserMetadata("clobber", byteBuf(5,7,11,13,17,19));
    writer.close();
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(byteBuf(5,7,11,13,17,19), reader.getMetadataValue("clobber"));
    assertEquals(byteBuf(1,2,3,4,5,6,7,-1,-2,127,-128),
        reader.getMetadataValue("my.meta"));
    assertEquals(bigBuf, reader.getMetadataValue("big"));
    try {
      reader.getMetadataValue("unknown");
      assertTrue(false);
    } catch (IllegalArgumentException iae) {
      // PASS
    }
    int i = 0;
    for(String key: reader.getMetadataKeys()) {
      if ("my.meta".equals(key) ||
          "clobber".equals(key) ||
          "big".equals(key)) {
        i += 1;
      } else {
        throw new IllegalArgumentException("unknown key " + key);
      }
    }
    assertEquals(3, i);
  }

  /**
   * We test union and timestamp separately since we need to make the
   * object inspector manually. (The Hive reflection-based doesn't handle
   * them properly.)
   */
  @Test
  public void testUnionAndTimestamp() throws Exception {
    final List<OrcProto.Type> types = ImmutableList.of(
        OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT)
                                  .addFieldNames("time")
                                  .addFieldNames("union")
                                  .addSubtypes(1)
                                  .addSubtypes(2)
                                  .build(),
        OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP)
                                  .build(),
        OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION)
                                  .addSubtypes(3)
                                  .addSubtypes(4)
                                  .build(),
        OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT)
                                  .build(),
        OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING)
                                  .build()
    );

    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = OrcLazyObjectInspectorUtils.createWritableObjectInspector(0, types);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        15 * 1024, CompressionKind.NONE, 100, 10000);
    OrcStruct row = new OrcStruct(types.get(0).getFieldNamesList());
    OrcUnion union = new OrcUnion();
    row.setFieldValue(1, union);
    row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00"));
    union.set((byte) 0, new IntWritable(42));
    writer.addRow(row);
    row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789"));
    union.set((byte)1, new Text("hello"));
    writer.addRow(row);
    row.setFieldValue(0, null);
    row.setFieldValue(1, null);
    writer.addRow(row);
    row.setFieldValue(1, union);
    union.set((byte) 0, null);
    writer.addRow(row);
    union.set((byte) 1, null);
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(200000));
    row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00"));
    writer.addRow(row);
    for(int i=1900; i < 2200; ++i) {
      row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i));
      if ((i & 1) == 0) {
        union.set((byte) 0, new IntWritable(i*i));
      } else {
        union.set((byte) 1, new Text(new Integer(i*i).toString()));
      }
      writer.addRow(row);
    }
    // let's add a lot of constant rows to test the rle
    row.setFieldValue(0, null);
    union.set((byte) 0, new IntWritable(1732050807));
    for(int i=0; i < 5000; ++i) {
      writer.addRow(row);
    }
    union.set((byte) 0, new IntWritable(0));
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(10));
    writer.addRow(row);
    union.set((byte) 0, new IntWritable(138));
    writer.addRow(row);
    writer.close();
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    assertEquals(5309, reader.getNumberOfRows());
    int stripeCount = 0;
    int rowCount = 0;
    long currentOffset = -1;
    for(StripeInformation stripe: reader.getStripes()) {
      stripeCount += 1;
      rowCount += stripe.getNumberOfRows();
      if (currentOffset < 0) {
        currentOffset = stripe.getOffset() + stripe.getIndexLength() +
            stripe.getDataLength() + stripe.getFooterLength();
      } else {
        assertEquals(currentOffset, stripe.getOffset());
        currentOffset += stripe.getIndexLength() +
            stripe.getDataLength() + stripe.getFooterLength();
      }
    }
    assertEquals(reader.getNumberOfRows(), rowCount);
    assertEquals(2, stripeCount);
    assertEquals(reader.getContentLength(), currentOffset);
    RecordReader rows = reader.rows(null);
    assertEquals(0, rows.getRowNumber());
    assertEquals(0.0, rows.getProgress(), 0.000001);
    assertEquals(true, rows.hasNext());
    OrcLazyStruct lazyRow = (OrcLazyStruct) rows.next(null);
    row = (OrcStruct) lazyRow.materialize();
    inspector = reader.getObjectInspector();
    assertEquals("struct<time:timestamp,union:uniontype<int,string>>",
        inspector.getTypeName());
    assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
        ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
    union = (OrcUnion) ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(0, union.getTag());
    assertEquals(new IntWritable(42), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
        ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(1, union.getTag());
    assertEquals(new Text("hello"), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
    assertEquals(null, ((OrcLazyObject) row.getFieldValue(1)).materialize());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
    union = (OrcUnion) ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(0, union.getTag());
    assertEquals(null, union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(1, union.getTag());
    assertEquals(null, union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"),
        ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(new IntWritable(200000), union.getObject());
    for(int i=1900; i < 2200; ++i) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
          ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
      ((OrcLazyUnion) row.getFieldValue(1)).materialize();
      if ((i & 1) == 0) {
        assertEquals(0, union.getTag());
        assertEquals(new IntWritable(i*i), union.getObject());
      } else {
        assertEquals(1, union.getTag());
        assertEquals(new Text(new Integer(i*i).toString()), union.getObject());
      }
    }
    for(int i=0; i < 5000; ++i) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      ((OrcLazyUnion) row.getFieldValue(1)).materialize();
      assertEquals(new IntWritable(1732050807), union.getObject());
    }
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(new IntWritable(0), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(new IntWritable(10), union.getObject());
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    ((OrcLazyUnion) row.getFieldValue(1)).materialize();
    assertEquals(new IntWritable(138), union.getObject());
    assertEquals(false, rows.hasNext());
    assertEquals(1.0, rows.getProgress(), 0.00001);
    assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
    rows.close();
  }

  /**
   * Read and write a randomly generated snappy file.
   * @throws Exception
   */
  @Test
  public void testSnappy() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (InnerStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        1000, CompressionKind.SNAPPY, 100, 10000);
    Random rand = new Random(12);
    for(int i=0; i < 10000; ++i) {
      writer.addRow(new InnerStruct(rand.nextInt(),
          Integer.toHexString(rand.nextInt())));
    }
    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    rand = new Random(12);
    OrcLazyStruct row = null;
    for(int i=0; i < 10000; ++i) {
      assertEquals(true, rows.hasNext());
      row = (OrcLazyStruct) rows.next(row);
      assertEquals(rand.nextInt(), ((IntWritable) ((OrcLazyInt) ((OrcStruct) row.materialize()).getFieldValue(0)).materialize()).get());
      assertEquals(Integer.toHexString(rand.nextInt()),
          ((OrcLazyString) ((OrcStruct) row.materialize()).getFieldValue(1)).materialize().toString());
    }
    assertEquals(false, rows.hasNext());
    rows.close();
  }

  /**
   * Read and write a randomly generated snappy file.
   * @throws Exception
   */
  @Test
  public void testWithoutIndex() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (InnerStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }

    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        5000, CompressionKind.SNAPPY, 1000, 0);
    Random rand = new Random(24);
    for(int i=0; i < 10000; ++i) {
      InnerStruct row = new InnerStruct(rand.nextInt(),
          Integer.toBinaryString(rand.nextInt()));
      for(int j=0; j< 5; ++j) {
        writer.addRow(row);
      }
    }
    writer.close();
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(50000, reader.getNumberOfRows());
    assertEquals(0, reader.getRowIndexStride());
    StripeInformation stripe = reader.getStripes().iterator().next();
    assertEquals(true, stripe.getDataLength() != 0);
    assertEquals(0, stripe.getIndexLength());
    RecordReader rows = reader.rows(null);
    rand = new Random(24);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for(int i=0; i < 10000; ++i) {
      int intVal = rand.nextInt();
      String strVal = Integer.toBinaryString(rand.nextInt());
      for(int j=0; j < 5; ++j) {
        assertEquals(true, rows.hasNext());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(intVal, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        assertEquals(strVal, ((OrcLazyString) row.getFieldValue(1)).materialize().toString());
      }
    }
    assertEquals(false, rows.hasNext());
    rows.close();
  }

  private static class RandomRowInputs {
    long[] intValues;
    double[] doubleValues;
    String[] stringValues;
    BytesWritable[] byteValues;
    String[] words = new String[128];

    public RandomRowInputs(int count) {
      intValues= new long[count];
      doubleValues = new double[count];
      stringValues = new String[count];
      byteValues = new BytesWritable[count];
    }
  }

  private RandomRowInputs writeRandomRows(int count, boolean lowMemoryMode) throws IOException {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (ReallyBigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }

    ReaderWriterProfiler.setProfilerOptions(conf);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_FILE_ENABLE_LOW_MEMORY_MODE, lowMemoryMode);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        8000000, CompressionKind.ZLIB, 65536, 1000,
        new MemoryManager(conf));
    Random rand = new Random(42);
    RandomRowInputs inputs = new RandomRowInputs(count);
    long[] intValues = inputs.intValues;
    double[] doubleValues = inputs.doubleValues;
    String[] stringValues = inputs.stringValues;
    BytesWritable[] byteValues = inputs.byteValues;
    String[] words = inputs.words;
    for(int i=0; i < words.length; ++i) {
      words[i] = Integer.toHexString(rand.nextInt());
    }
    for(int i=0; i < count/2; ++i) {
      intValues[2*i] = rand.nextLong();
      intValues[2*i+1] = rand.nextLong();
      stringValues[2*i] = words[rand.nextInt(words.length)];
      stringValues[2*i+1] = words[rand.nextInt(words.length)];
    }
    for(int i=0; i < count; ++i) {
      doubleValues[i] = rand.nextDouble();
      byte[] buf = new byte[20];
      rand.nextBytes(buf);
      byteValues[i] = new BytesWritable(buf);
    }
    for(int i=0; i < count; ++i) {
      ReallyBigRow bigrow = createRandomRow(intValues, doubleValues, stringValues,
          byteValues, words, i);
      writer.addRow(bigrow);
    }
    writer.close();
    writer = null;
    return inputs;
  }

  private static enum NumberOfNulls {
    // No nulls
    NONE,
    // Every nth value is null
    SOME,
    // Every nth value is NOT null
    MANY
  }

  private void compareRowsUsingPrimitives(ReallyBigRow expected, OrcLazyBoolean boolean1,
      OrcLazyShort short1, OrcLazyInt int1, OrcLazyLong long1, OrcLazyShort short2,
      OrcLazyInt int2, OrcLazyLong long2, OrcLazyShort short3, OrcLazyInt int3, OrcLazyLong long3,
      OrcLazyFloat float1, OrcLazyDouble double1) throws IOException {
    try {
      boolean b1 = boolean1.materializeBoolean();
      assertEquals(expected.boolean1.booleanValue(), b1);
    }
    catch(IOException e) {
      assert(boolean1.nextIsNull());
      assertNull(expected.boolean1);
    }

    if (short1.nextIsNull()) {
      assertNull(expected.short1);
    } else {
      assertEquals(expected.short1.shortValue(),
          ((ShortWritable) short1.materialize()).get());
    }

    try {
      int i1 = int1.materializeInt();
      assertEquals(expected.int1.intValue(), i1);
    }
    catch(IOException e) {
      assert(int1.nextIsNull());
      assertNull(expected.int1);
    }

    try {
      long l1 =  long1.materializeLong();
      assertEquals(expected.long1.longValue(), l1);
    }
    catch(IOException e) {
      assert(long1.nextIsNull());
      assertNull(expected.long1);
    }

    try {
      short s2 = short2.materializeShort();
      assertEquals(expected.short2.shortValue(), s2);
    }
    catch(IOException e) {
      assert(short2.nextIsNull());
      assertNull(expected.short2);
    }

    try {
      int i2 = int2.materializeInt();
      assertEquals(expected.int2.intValue(), i2);
    }
    catch(IOException e) {
      assert(int2.nextIsNull());
      assertNull(expected.int2);
    }

    try {
      long l2 = long2.materializeLong();
      assertEquals(expected.long2.longValue(), l2);
    }
    catch(IOException e) {
      assert(long2.nextIsNull());
      assertNull(expected.long2);
    }

    try {
      short s3 = short3.materializeShort();
      assertEquals(expected.short3.shortValue(), s3);
    }
    catch(IOException e) {
      assert(short3.nextIsNull());
      assertNull(expected.short3);
    }

    try {
      int i3 = int3.materializeInt();
      assertEquals(expected.int3.intValue(), i3);
    }
    catch(IOException e) {
      assert(int3.nextIsNull());
      assertNull(expected.int3);
    }

    try {
      long l3 = long3.materializeLong();
      assertEquals(expected.long3.longValue(), l3);
    }
    catch(IOException e) {
      assert(long3.nextIsNull());
      assertNull(expected.long3);
    }

    try {
      float f1 = float1.materializeFloat();
      assertEquals(expected.float1.floatValue(), f1, 0.0001);
    }
    catch(IOException e) {
      assert(float1.nextIsNull());
      assertNull(expected.float1);
    }

    try {
      double d1 = double1.materializeDouble();
      assertEquals(expected.double1.doubleValue(), d1, 0.0001);
    }
    catch(IOException e) {
      assert(double1.nextIsNull());
      assertNull(expected.double1);
    }
  }

  private void compareRows(OrcStruct row, RandomRowInputs inputs, int rowNumber,
      NumberOfNulls numNulls, boolean testPrimitives) throws Exception {
    ReallyBigRow expected = null ;
    switch (numNulls) {
    case MANY:
    case SOME:
      expected = createRandomRowWithNulls(inputs.intValues, inputs.doubleValues,
          inputs.stringValues, inputs.byteValues, inputs.words, rowNumber, numNulls);
      break;
    case NONE:
      expected = createRandomRow(inputs.intValues, inputs.doubleValues,
          inputs.stringValues, inputs.byteValues, inputs.words, rowNumber);
      break;
    }
    OrcLazyBoolean boolean1 = (OrcLazyBoolean) row.getFieldValue(0);
    if (boolean1.nextIsNull()) {
      assertNull(expected.boolean1);
    } else {
      assertEquals(expected.boolean1.booleanValue(),
          ((BooleanWritable) boolean1.materialize()).get());
    }

    if (((OrcLazyObject) row.getFieldValue(1)).nextIsNull()) {
      assertNull(expected.byte1);
    } else {
      assertEquals(expected.byte1.byteValue(),
          ((ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize()).get());
    }

    OrcLazyShort short1 = (OrcLazyShort) row.getFieldValue(2);
    try
    {
      short s1 = short1.materializeShort();
      assertEquals(expected.short1.shortValue(), s1);
    }
    catch(IOException e) {
      assert(short1.nextIsNull());
      assertNull(expected.short1);
    }

    OrcLazyInt int1 = (OrcLazyInt)row.getFieldValue(3);
    if (int1.nextIsNull()) {
      assertNull(expected.int1);
    } else {
      assertEquals(expected.int1.intValue(),
          ((IntWritable) int1.materialize()).get());
    }

    OrcLazyLong long1 = (OrcLazyLong)row.getFieldValue(4);
    if (long1.nextIsNull()) {
      assertNull(expected.long1);
    } else {
      assertEquals(expected.long1.longValue(),
          ((LongWritable) long1.materialize()).get());
    }

    OrcLazyShort short2 = (OrcLazyShort)row.getFieldValue(5);
    if (short2.nextIsNull()) {
      assertNull(expected.short2);
    } else {
      assertEquals(expected.short2.shortValue(),
          ((ShortWritable) short2.materialize()).get());
    }

    OrcLazyInt int2 = (OrcLazyInt) row.getFieldValue(6);
    if (int2.nextIsNull()) {
      assertNull(expected.int2);
    } else {
      assertEquals(expected.int2.intValue(),
          ((IntWritable) int2.materialize()).get());
    }

    OrcLazyLong long2 = (OrcLazyLong) row.getFieldValue(7);
    if (long2.nextIsNull()) {
      assertNull(expected.long2);
    } else {
      assertEquals(expected.long2.longValue(),
          ((LongWritable) long2.materialize()).get());
    }

    OrcLazyShort short3 = (OrcLazyShort)row.getFieldValue(8);
    if (short3.nextIsNull()) {
      assertNull(expected.short3);
    } else {
      assertEquals(expected.short3.shortValue(),
          ((ShortWritable) short3.materialize()).get());
    }

    OrcLazyInt int3 = (OrcLazyInt) row.getFieldValue(9);
    if (int3.nextIsNull()) {
      assertNull(expected.int3);
    } else {
      assertEquals(expected.int3.intValue(),
          ((IntWritable) int3.materialize()).get());
    }

    OrcLazyLong long3 = (OrcLazyLong) row.getFieldValue(10);
    if (long3.nextIsNull()) {
      assertNull(expected.long3);
    } else {
      assertEquals(expected.long3.longValue(),
          ((LongWritable) long3.materialize()).get());
    }

    OrcLazyFloat float1 = (OrcLazyFloat) row.getFieldValue(11);
    if (float1.nextIsNull()) {
      assertNull(expected.float1);
    } else {
      assertEquals(expected.float1.floatValue(),
          ((FloatWritable) float1.materialize()).get(), 0.0001);
    }

    OrcLazyDouble double1 = (OrcLazyDouble) row.getFieldValue(12);
    if (double1.nextIsNull()) {
      assertNull(expected.double1);
    } else {
      assertEquals(expected.double1.doubleValue(),
          ((DoubleWritable) double1.materialize()).get(), 0.0001);
    }

    if (((OrcLazyObject) row.getFieldValue(13)).nextIsNull()) {
      assertNull(expected.bytes1);
    } else {
      assertEquals(expected.bytes1, ((OrcLazyBinary) row.getFieldValue(13)).materialize());
    }

    if (((OrcLazyObject) row.getFieldValue(14)).nextIsNull()) {
      assertNull(expected.string1);
    } else {
      assertEquals(expected.string1, ((OrcLazyString) row.getFieldValue(14)).materialize());
    }

    if (((OrcLazyString) row.getFieldValue(15)).nextIsNull()) {
      assertNull(expected.string2);
    } else {
      assertEquals(expected.string2, ((OrcLazyString) row.getFieldValue(15)).materialize());
    }

    if (((OrcLazyString) row.getFieldValue(16)).nextIsNull()) {
      assertNull(expected.string3);
    } else {
      assertEquals(expected.string3, ((OrcLazyString) row.getFieldValue(16)).materialize());
    }

    if (((OrcLazyObject) row.getFieldValue(17)).nextIsNull()) {
      assertNull(expected.middle);
    } else {
      final List<InnerStruct> expectedList = expected.middle.list;
      final OrcStruct actualMiddle = (OrcStruct) ((OrcLazyStruct) row.getFieldValue(17)).materialize();
      final List<OrcStruct> actualList =
          (List) actualMiddle.getFieldValue(0);
      compareListOfStructs(expectedList, actualList);
      final List<String> actualFieldNames = actualMiddle.getFieldNames();
      final List<String> expectedFieldNames = ImmutableList.of("list");
      compareLists(expectedFieldNames, actualFieldNames);
    }
    if (((OrcLazyObject) row.getFieldValue(18)).nextIsNull()) {
      assertNull(expected.list);
    } else {
      compareListOfStructs(expected.list, (List) ((OrcLazyList) row.getFieldValue(18)).materialize());
    }
    if (((OrcLazyObject) row.getFieldValue(19)).nextIsNull()) {
      assertNull(expected.map);
    } else {
      compareMap(expected.map, (Map) ((OrcLazyMap) row.getFieldValue(19)).materialize());
    }

    if (testPrimitives) {
      compareRowsUsingPrimitives(expected, boolean1, short1, int1, long1, short2, int2, long2,
          short3, int3, long3, float1, double1);
    }
  }

  private void compareRowsWithoutNextIsNull(OrcStruct row, RandomRowInputs inputs, int rowNumber,
      NumberOfNulls numNulls, boolean usingPrimitives) throws Exception {

    ReallyBigRow expected = null;
    switch (numNulls) {
    case MANY:
    case SOME:
      expected = createRandomRowWithNulls(inputs.intValues, inputs.doubleValues,
          inputs.stringValues, inputs.byteValues, inputs.words, rowNumber, numNulls);
      break;
    case NONE:
      expected = createRandomRow(inputs.intValues, inputs.doubleValues,
          inputs.stringValues, inputs.byteValues, inputs.words, rowNumber);
      break;
    }

    OrcLazyBoolean lazyBoolean1 = (OrcLazyBoolean) row.getFieldValue(0);
    BooleanWritable boolean1 = (BooleanWritable) lazyBoolean1.materialize();
    if (boolean1 == null) {
      assertNull(expected.boolean1);
    } else {
      assertEquals(expected.boolean1.booleanValue(), boolean1.get());
    }

    ByteWritable byte1 = (ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize();
    if (byte1 == null) {
      assertNull(expected.byte1);
    } else {
      assertEquals(expected.byte1.byteValue(), byte1.get());
    }

    OrcLazyShort lazyShort1 = (OrcLazyShort) row.getFieldValue(2);
    ShortWritable short1 = (ShortWritable) lazyShort1.materialize();
    if (short1 == null) {
      assertNull(expected.short1);
    } else {
      assertEquals(expected.short1.shortValue(), short1.get());
    }

    OrcLazyInt lazyInt1 = (OrcLazyInt) row.getFieldValue(3);
    IntWritable int1 = (IntWritable) lazyInt1.materialize();
    if (int1 == null) {
      assertNull(expected.int1);
    } else {
      assertEquals(expected.int1.intValue(), int1.get());
    }

    OrcLazyLong lazyLong1 = (OrcLazyLong) row.getFieldValue(4);
    LongWritable long1 = (LongWritable) lazyLong1.materialize();
    if (long1 == null) {
      assertNull(expected.long1);
    } else {
      assertEquals(expected.long1.longValue(), long1.get());
    }

    OrcLazyShort lazyShort2 = (OrcLazyShort) row.getFieldValue(5);
    ShortWritable short2 = (ShortWritable) lazyShort2.materialize();
    if (short2 == null) {
      assertNull(expected.short2);
    } else {
      assertEquals(expected.short2.shortValue(), short2.get());
    }

    OrcLazyInt lazyInt2 = (OrcLazyInt) row.getFieldValue(6);
    IntWritable int2 = (IntWritable) lazyInt2.materialize();
    if (int2 == null) {
      assertNull(expected.int2);
    } else {
      assertEquals(expected.int2.intValue(), int2.get());
    }

    OrcLazyLong lazyLong2 = (OrcLazyLong) row.getFieldValue(7);
    LongWritable long2 = (LongWritable) lazyLong2.materialize();
    if (long2 == null) {
      assertNull(expected.long2);
    } else {
      assertEquals(expected.long2.longValue(), long2.get());
    }

    OrcLazyShort lazyShort3 = (OrcLazyShort) row.getFieldValue(8);
    ShortWritable short3 = (ShortWritable) lazyShort3.materialize();
    if (short3 == null) {
      assertNull(expected.short3);
    } else {
      assertEquals(expected.short3.shortValue(), short3.get());
    }

    OrcLazyInt lazyInt3 = (OrcLazyInt) row.getFieldValue(9);
    IntWritable int3 = (IntWritable) lazyInt3.materialize();
    if (int3 == null) {
      assertNull(expected.int3);
    } else {
      assertEquals(expected.int3.intValue(), int3.get());
    }

    OrcLazyLong lazyLong3 = (OrcLazyLong) row.getFieldValue(10);
    LongWritable long3 = (LongWritable) lazyLong3.materialize();
    if (long3 == null) {
      assertNull(expected.long3);
    } else {
      assertEquals(expected.long3.longValue(), long3.get());
    }

    OrcLazyFloat lazyFloat1 = (OrcLazyFloat) row.getFieldValue(11);
    FloatWritable float1 = (FloatWritable) lazyFloat1.materialize();
    if (float1 == null) {
      assertNull(expected.float1);
    } else {
      assertEquals(expected.float1.floatValue(), float1.get(), 0.0001);
    }

    OrcLazyDouble lazyDouble1 = (OrcLazyDouble) row.getFieldValue(12);
    DoubleWritable double1 = (DoubleWritable) lazyDouble1.materialize();
    if (double1 == null) {
      assertNull(expected.double1);
    } else {
      assertEquals(expected.double1.doubleValue(), double1.get(), 0.0001);
    }

    BytesWritable bytes1 = (BytesWritable) ((OrcLazyBinary) row.getFieldValue(13)).materialize();
    if (bytes1 == null) {
      assertNull(expected.bytes1);
    } else {
      assertEquals(expected.bytes1, bytes1);
    }

    Text string1 = (Text) ((OrcLazyString) row.getFieldValue(14)).materialize();
    if (string1 == null) {
      assertNull(expected.string1);
    } else {
      assertEquals(expected.string1, string1);
    }

    Text string2 = (Text) ((OrcLazyString) row.getFieldValue(15)).materialize();
    if (string2 == null) {
      assertNull(expected.string2);
    } else {
      assertEquals(expected.string2, string2);
    }

    Text string3 = (Text) ((OrcLazyString) row.getFieldValue(16)).materialize();
    if (string3 == null) {
      assertNull(expected.string3);
    } else {
      assertEquals(expected.string3, string3);
    }

    OrcStruct middle = (OrcStruct) ((OrcLazyStruct) row.getFieldValue(17)).materialize();
    if (middle == null) {
      assertNull(expected.middle);
    } else {
      final List<InnerStruct> expectedList = expected.middle.list;
      final List<OrcStruct> actualList = (List) middle.getFieldValue(0);
      compareListOfStructs(expectedList, actualList);
      final List<String> actualFieldNames = middle.getFieldNames();
      final List<String> expectedFieldNames = ImmutableList.of("list");
      compareLists(expectedFieldNames, actualFieldNames);
    }

    List list = (List) ((OrcLazyList) row.getFieldValue(18)).materialize();
    if (list == null) {
      assertNull(expected.list);
    } else {
      compareListOfStructs(expected.list, list);
    }

    Map map = (Map) ((OrcLazyMap) row.getFieldValue(19)).materialize();
    if (map == null) {
      assertNull(expected.map);
    } else {
      compareMap(expected.map, map);
    }

    if (usingPrimitives) {
      compareRowsUsingPrimitives(expected, lazyBoolean1, lazyShort1, lazyInt1, lazyLong1,
          lazyShort2, lazyInt2, lazyLong2, lazyShort3, lazyInt3, lazyLong3, lazyFloat1,
          lazyDouble1);
    }
  }

  @Test
  public void testSeek() throws Exception {
    testSeek(false, true, false);
  }

  @Test
  public void testSeekLowMemory() throws Exception {
    testSeek(true, false, false);
  }

  @Test
  public void testSeekLazyHdfsReads() throws Exception {
    testSeek(false, false, true);
  }

  private void testSeek(boolean lowMemory, boolean testPrimitives, boolean lazyHdfsReads)
      throws Exception {
    final int COUNT=32768;
    RandomRowInputs inputs = writeRandomRows(COUNT, lowMemory);
    ReaderWriterProfiler.setProfilerOptions(conf);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, !lazyHdfsReads);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(COUNT, reader.getNumberOfRows());
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for(int i=COUNT-1; i >= 0; --i) {
      rows.seekToRow(i);
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      compareRows(row, inputs, i, NumberOfNulls.NONE, testPrimitives);
    }
    rows.close();
  }

  private void readEveryNthRow(int n, boolean withoutNextIsNull, NumberOfNulls numNulls) throws Exception {
    final int COUNT=32768;
    RandomRowInputs inputs = null;
    switch (numNulls) {
    case NONE:
      inputs = writeRandomRows(COUNT, false);
      break;
    case SOME:
    case MANY:
      inputs = writeRandomRowsWithNulls(COUNT, numNulls, false);
      break;
    }

    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(COUNT, reader.getNumberOfRows());
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for(int i = 0; i < COUNT / n; i++) {
      rows.seekToRow(i * n);
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      if (withoutNextIsNull) {
        compareRowsWithoutNextIsNull(row, inputs, i * n, numNulls, true);
      } else {
        compareRows(row, inputs, i * n, numNulls, true);
      }
    }
    rows.close();
  }

  @Test
  public void testEveryRow() throws Exception {
    readEveryNthRow(1, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryOtherRow() throws Exception {
    readEveryNthRow(2, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryThirdRow() throws Exception {
    readEveryNthRow(3, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryFourthRow() throws Exception {
    readEveryNthRow(4, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryFifthRow() throws Exception {
    readEveryNthRow(5, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEverySixthRow() throws Exception {
    readEveryNthRow(6, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEverySeventhRow() throws Exception {
    readEveryNthRow(7, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryEighthRow() throws Exception {
    readEveryNthRow(8, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryNinthRow() throws Exception {
    readEveryNthRow(9, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryTenthRow() throws Exception {
    readEveryNthRow(10, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryHundredthRow() throws Exception {
    readEveryNthRow(100, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryThousandthRow() throws Exception {
    readEveryNthRow(1000, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryTenThousandthRow() throws Exception {
    readEveryNthRow(10000, false, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(1, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryOtherRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(2, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryThirdRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(3, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryFourthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(4, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryFifthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(5, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEverySixthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(6, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEverySeventhRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(7, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryEighthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(8, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryNinthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(9, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryTenthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(10, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryHundredthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(100, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryThousandthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(1000, true, NumberOfNulls.NONE);
  }

  @Test
  public void testEveryTenThousandthRowWithoutNextIsNull() throws Exception {
    readEveryNthRow(10000, true, NumberOfNulls.NONE);
  }

  private RandomRowInputs writeRandomRowsWithNulls(int count, NumberOfNulls numNulls,
      boolean lowMemoryMode) throws IOException {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (ReallyBigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        lowMemoryMode ? 200000 : 4000000, CompressionKind.ZLIB, 65536, 1000,
            new MemoryManager(conf));
    Random rand = new Random(42);
    RandomRowInputs inputs = new RandomRowInputs(count);
    long[] intValues = inputs.intValues;
    double[] doubleValues = inputs.doubleValues;
    String[] stringValues = inputs.stringValues;
    BytesWritable[] byteValues = inputs.byteValues;
    String[] words = inputs.words;
    for(int i=0; i < words.length; ++i) {
      words[i] = Integer.toHexString(rand.nextInt());
    }
    for(int i=0; i < count/2; ++i) {
      intValues[2*i] = rand.nextLong();
      intValues[2*i+1] = rand.nextLong();
      stringValues[2*i] = words[rand.nextInt(words.length)];
      stringValues[2*i+1] = words[rand.nextInt(words.length)];
    }
    for(int i=0; i < count; ++i) {
      doubleValues[i] = rand.nextDouble();
      byte[] buf = new byte[20];
      rand.nextBytes(buf);
      byteValues[i] = new BytesWritable(buf);
    }
    for(int i=0; i < count; ++i) {
      ReallyBigRow bigrow = createRandomRowWithNulls(intValues, doubleValues, stringValues,
          byteValues, words, i, numNulls);
      writer.addRow(bigrow);
    }
    writer.close();
    writer = null;
    return inputs;
  }

  @Test
  public void testEveryRowWithNulls() throws Exception {
    readEveryNthRow(1, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryOtherRowWithNulls() throws Exception {
    readEveryNthRow(2, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryThirdRowWithNulls() throws Exception {
    readEveryNthRow(3, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryFourthRowWithNulls() throws Exception {
    readEveryNthRow(4, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryFifthRowWithNulls() throws Exception {
    readEveryNthRow(5, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEverySixthRowWithNulls() throws Exception {
    readEveryNthRow(6, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEverySeventhRowWithNulls() throws Exception {
    readEveryNthRow(7, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryEighthRowWithNulls() throws Exception {
    readEveryNthRow(8, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryNinthRowWithNulls() throws Exception {
    readEveryNthRow(9, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryTenthRowWithNulls() throws Exception {
    readEveryNthRow(10, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryHundredthRowWithNulls() throws Exception {
    readEveryNthRow(100, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryThousandthRowWithNulls() throws Exception {
    readEveryNthRow(1000, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryTenThousandthRowWithNulls() throws Exception {
    readEveryNthRow(10000, false, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(1, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryOtherRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(2, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryThirdRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(3, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryFourthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(4, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryFifthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(5, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEverySixthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(6, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEverySeventhRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(7, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryEighthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(8, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryNinthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(9, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryTenthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(10, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryHundredthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(100, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryThousandthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(1000, true, NumberOfNulls.SOME);
  }

  @Test
  public void testEveryTenThousandthRowWithNullsWithoutNextIsNull() throws Exception {
    readEveryNthRow(10000, true, NumberOfNulls.SOME);
  }

  private void skipEveryNthRow(int n, boolean withoutNextIsNull, NumberOfNulls numNulls) throws Exception {
    final int COUNT=32768;
    RandomRowInputs inputs = null;
    switch (numNulls) {
    case NONE:
      inputs = writeRandomRows(COUNT, false);
      break;
    case SOME:
    case MANY:
      inputs = writeRandomRowsWithNulls(COUNT, numNulls, false);
      break;
    }

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    assertEquals(COUNT, reader.getNumberOfRows());
    RecordReader rows = reader.rows(null);
    OrcLazyRow lazyRow = null;
    OrcStruct row = null;
    for(int i=0; i < COUNT; i++) {
      lazyRow = (OrcLazyRow) rows.next(lazyRow);
      if (i % n != 0) {
        row = (OrcStruct) lazyRow.materialize();
        if (withoutNextIsNull) {
          compareRowsWithoutNextIsNull(row, inputs, i, numNulls, false);
        } else {
          compareRows(row, inputs, i, numNulls, false);
        }
      }
    }
    rows.close();
  }

  @Test
  public void testEveryRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(1, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryOtherRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(2, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryThirdRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(3, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryFourthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(4, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryFifthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(5, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEverySixthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(6, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEverySeventhRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(7, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryEighthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(8, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryNinthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(9, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryTenthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(10, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryHundredthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(100, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryThousandthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(1000, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryTenThousandthRowWithLotsOfNulls() throws Exception {
    skipEveryNthRow(10000, false, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(1, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryOtherRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(2, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryThirdRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(3, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryFourthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(4, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryFifthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(5, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEverySixthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(6, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEverySeventhRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(7, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryEighthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(8, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryNinthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(9, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryTenthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(10, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryHundredthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(100, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryThousandthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(1000, true, NumberOfNulls.MANY);
  }

  @Test
  public void testEveryTenThousandthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
    skipEveryNthRow(10000, true, NumberOfNulls.MANY);
  }

  private void compareInner(InnerStruct expect,
      OrcStruct actual) throws Exception {
    if (expect == null || actual == null) {
      assertEquals(expect, actual);
    } else {
      if (actual.getFieldValue(0) == null) {
        assertNull(expect.int1);
      } else {
        assertEquals(expect.int1.intValue(), ((IntWritable) actual.getFieldValue(0)).get());
      }
      assertEquals(expect.string1, actual.getFieldValue(1));
    }
  }

  private void compareListOfStructs(List<InnerStruct> expect,
      List<OrcStruct> actual) throws Exception {
    assertEquals(expect.size(), actual.size());
    for(int j=0; j < expect.size(); ++j) {
      compareInner(expect.get(j), actual.get(j));
    }
  }

  private void compareLists(List expect, List actual) throws Exception {
    assertEquals(expect.size(), actual.size());
    assertTrue(expect.containsAll(actual));
  }

  private void compareMap(Map<Text, InnerStruct> expect, Map<Text, OrcStruct> actual)
      throws Exception {
    assertEquals(expect.size(), actual.size());
    for (Text key : expect.keySet()) {
      compareInner(expect.get(key), actual.get(key));
    }
  }

  private ReallyBigRow createRandomRow(long[] intValues, double[] doubleValues,
      String[] stringValues,
      BytesWritable[] byteValues,
      String[] words, int i) {
    InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
    InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32),
        words[i % words.length] + "-x");
    // Every 10th value of this string should be unique-ish in the file
    String stringWithUniques = i % 10 == 0 ? Integer.toHexString(i) : stringValues[i];
    Short shortWithUniques = i % 10 == 0 ? (short) i :  (short) (intValues[i] % 10);
    Integer intWithUniques = i % 10 == 0 ? (int) (Short.MAX_VALUE + i) :
      (int) (Short.MAX_VALUE + (intValues[i] % 10));
    Long longWithUniques = i % 10 == 0 ? (long) (Integer.MAX_VALUE + i) :
      (long) (Integer.MAX_VALUE + (intValues[i] % 10));
    return new ReallyBigRow((intValues[i] & 1) == 0, (byte) intValues[i],
        (short) intValues[i], (int) (Short.MAX_VALUE + intValues[i]),
        (long) (Integer.MAX_VALUE + intValues[i]), (short) (intValues[i] % 10),
        (int) (Short.MAX_VALUE + (intValues[i] % 10)),
        (long) (Integer.MAX_VALUE + (intValues[i] % 10)),
        shortWithUniques, intWithUniques, longWithUniques, (float) doubleValues[i],
        doubleValues[i], byteValues[i], stringValues[i], Integer.toHexString(i),
        stringWithUniques, new MiddleStruct(inner, inner2), list(), map(inner,inner2));
  }

  private ReallyBigRow createRandomRowWithNulls(long[] intValues, double[] doubleValues,
      String[] stringValues, BytesWritable[] byteValues, String[] words, int i, NumberOfNulls numNulls) {
    boolean lotsOfNulls = numNulls == NumberOfNulls.MANY;
    Boolean booleanVal = intValues[i] % 10 == 0 ^ lotsOfNulls ? null : (intValues[i] & 1) == 0;
    Byte byteVal = intValues[i] % 11 == 0 ^ lotsOfNulls ? null : (byte) intValues[i];
    Short shortVal = intValues[i] % 12 == 0 ^ lotsOfNulls  ? null : (short) intValues[i];
    Integer intVal = intValues[i] % 13 == 0 ^ lotsOfNulls  ? null : (int) (Short.MAX_VALUE + i);
    Long longVal = intValues[i] % 14 == 0 ^ lotsOfNulls  ? null : (long) (Integer.MAX_VALUE + i);
    Float floatVal = intValues[i] % 15 == 0 ^ lotsOfNulls  ? null : (float) doubleValues[i];
    Double doubleVal = intValues[i] % 16 == 0 ^ lotsOfNulls  ? null : doubleValues[i];
    BytesWritable bytesVal = intValues[i] % 17 == 0 ^ lotsOfNulls  ? null : byteValues[i];
    String strVal = intValues[i] % 18 == 0 ^ lotsOfNulls  ? null : stringValues[i];
    InnerStruct inner = intValues[i] % 19 == 0 ^ lotsOfNulls  ? null : new InnerStruct(
        intValues[i] % 10 == 0 ^ lotsOfNulls  ? null : (int) intValues[i],
            intValues[i] % 11 == 0 ^ lotsOfNulls  ? null : stringValues[i]);
    InnerStruct inner2 = intValues[i] % 12 == 0 ^ lotsOfNulls  ? null : new InnerStruct(
        intValues[i] % 13 == 0 ^ lotsOfNulls  ? null : (int) (intValues[i] >> 32),
            intValues[i] % 14 == 0 ^ lotsOfNulls  ? null : words[i % words.length] + "-x");
    MiddleStruct middle = intValues[i] % 15 == 0 ^ lotsOfNulls  ? null :
      new MiddleStruct(inner, inner2);
    List<InnerStruct> list = intValues[i] % 16 == 0 ^ lotsOfNulls  ? null : list(inner, inner2);
    Map<Text, InnerStruct> map = intValues[i] % 17 == 0 ^ lotsOfNulls  ? null : map(inner, inner2);
    String strVal2 = intValues[i] % 18 == 0 ^ lotsOfNulls  ? null : Integer.toHexString(i);
    Short shortVal2 = intValues[i] % 19 == 0 ^ lotsOfNulls  ? null : (short) (intValues[i] % 10);
    Integer intVal2 = intValues[i] % 10 == 0 ^ lotsOfNulls  ? null :
      (int) (Short.MAX_VALUE + (intValues[i] % 10));
    Long longVal2 = intValues[i] % 11 == 0 ^ lotsOfNulls  ? null :
      (long) (Integer.MAX_VALUE + (intValues[i] % 10));
    String strVal3 = intValues[i] % 12 == 0 ^ lotsOfNulls ? null :
      (intValues[i] % 10 == 0 ? Integer.toHexString(i) : stringValues[i]);
    Short shortVal3 = intValues[i] % 13 == 0 ^ lotsOfNulls  ? null :
      (intValues[i] % 10 == 0 ? (short) i : (short) (intValues[i] % 10));
    Integer intVal3 = intValues[i] % 14 == 0 ^ lotsOfNulls  ? null :
      (intValues[i] % 10 == 0 ? (int) (Short.MAX_VALUE + i) :
        (int) (Short.MAX_VALUE + (intValues[i] % 10)));
    Long longVal3 = intValues[i] % 15 == 0 ^ lotsOfNulls  ? null :
      (intValues[i] % 10 == 0 ? (long) (Integer.MAX_VALUE + i) :
        (long) (Integer.MAX_VALUE + (intValues[i] % 10)));
    return new ReallyBigRow(booleanVal, byteVal, shortVal, intVal, longVal, shortVal2, intVal2,
        longVal2, shortVal3, intVal3, longVal3, floatVal, doubleVal, bytesVal, strVal, strVal2,
        strVal3, middle, list, map);
  }

  private static class MyMemoryManager extends MemoryManager {
    final long totalSpace;
    double rate;
    Path path = null;
    long lastAllocation = 0;
    int rows = 0;
    MemoryManager.Callback callback;

    MyMemoryManager(Configuration conf, long totalSpace, double rate) {
      super(conf);
      this.totalSpace = totalSpace;
      this.rate = rate;
    }

    @Override
    void addWriter(Path path, long requestedAllocation,
        MemoryManager.Callback callback, long initialAllocation) {
      this.path = path;
      this.lastAllocation = requestedAllocation;
      this.callback = callback;
    }

    @Override
    synchronized void removeWriter(Path path) {
      this.path = null;
      this.lastAllocation = 0;
    }

    @Override
    long getTotalMemoryPool() {
      return totalSpace;
    }

    @Override
    double getAllocationScale() {
      return rate;
    }

    @Override
    void addedRow() throws IOException {

    }

    @Override
    boolean shouldFlush(MemoryEstimate memoryEstimate, Path path, long stripeSize,
        long maxDictSize) {
      long limit = Math.round(stripeSize * rate);
      return memoryEstimate.getTotalMemory() > limit ||
          (maxDictSize > 0 && memoryEstimate.getDictionaryMemory() > maxDictSize);
    }
  }

  @Test
  public void testMemoryManagement() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (InnerStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        50000, CompressionKind.NONE, 100, 0, memory);
    assertEquals(testFilePath, memory.path);
    for(int i=0; i < 2500; ++i) {
      writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));
    }
    writer.close();
    assertEquals(null, memory.path);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    int i = 0;
    for(StripeInformation stripe: reader.getStripes()) {
      i += 1;
      assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
          stripe.getDataLength() < 6000);
    }
    assertEquals(5, i);
    assertEquals(2500, reader.getNumberOfRows());
  }

  @Test
  /**
   * Test a stride dictionary that contains only the empty string
   */
  public void testEmptyStringStrideDictionary() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);
    writer.addRow(new StringStruct(""));
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    }
    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals("", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 999; i++) {
      rows.next(lazyRow);
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    }
  }

  @Test
  /**
   * Tests writing a stripe containing a string column, which is not dictionary encoded in the
   * first stripe, this is carried over to the third stripe, then dictionary encoding is turned
   * back on.  This will cause the dictionary to be nulled out, then reinitialized.
   */
  public void testStrideDictionariesWithoutStripeCarryover() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS, true);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
    // Write a stripe which is not dictionary encoded
    for (int i = 0; i < 2000; i++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }
    writer.forceFlushStripe();
    // Write another stripe (doesn't matter what)
    for (int i = 0; i < 2000; i++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }
    writer.forceFlushStripe();
    // Write a stripe which will be dictionary encoded
    // Note: it is important that this string is lexicographically after the string in the next
    // index stride.  This way, if sorting by index strides is not working, this value will appear
    // after the next one, though it should appear before, yielding incorrect results.
    writer.addRow(new StringStruct("b"));
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    }
    writer.addRow(new StringStruct("a"));
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    }
    writer.forceFlushStripe();
    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    for (int i =0; i < 4000; i++) {
      assertEquals(Integer.toString(i % 2000), ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
      rows.next(lazyRow);
    }
    assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 999; i++) {
      rows.next(lazyRow);
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    }
    rows.next(lazyRow);
    assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 999; i++) {
      rows.next(lazyRow);
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    }
  }

  @Test
  /**
   * Tests writing a stripe that contains a single string column across two index strides where
   * the column is dictionary encoded with a stride dictionary in both strides.
   * When reading, all rows in the first stride whose values are in the stride dictionary are
   * skipped, and in the second stride the values in the stride dictionary are read.
   * This can cause problems if seeking across strides is broken for stride dictionary streams.
   * @throws Exception
   */
  public void testSeekAcrossStrideDictionaries() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    // Set configs so the column is dictionary encoded and stride dictioanries are used
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);
    // Write this value once, so it's added to a stride dictionary
    writer.addRow(new StringStruct("a"));
    // Fill out the rest of the stride
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    }
    // Write this value once, so it's added to a stride dictionary
    writer.addRow(new StringStruct("b"));
    // Fill out the rest of the stride
    for (int i = 0; i < 999; i++) {
      writer.addRow(new StringStruct("123"));
    }
    writer.close();

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    // Skip the one row in the stride dictionary in the first stride ("a")
    rows.next(lazyRow);
    // Read the rest of the values in the stride
    for (int i =0; i < 999; i++) {
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
      rows.next(lazyRow);
    }
    // Read the row in the stride dictionary in the second stride (note that seek won't be called
    // because we read the previous row
    assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    rows.close();
  }

  @Test
  /**
   * Tests a writing a stripe with a stride dictionary, followed by a stripe without
   * followed by a stripe with.
   */
  public void testEmptyInStringDictionaryStream() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
    writer.addRow(new StringStruct("a"));
    writer.addRow(new StringStruct("b"));
    writer.addRow(new StringStruct("c"));
    for (int i = 0; i < 997; i++) {
      writer.addRow(new StringStruct("123"));
    }
    writer.forceFlushStripe();
    for (int i = 0; i < 1000; i++) {
      writer.addRow(new StringStruct("123"));
    }
    writer.forceFlushStripe();
    writer.addRow(new StringStruct("a"));
    writer.addRow(new StringStruct("b"));
    writer.addRow(new StringStruct("c"));
    for (int i = 0; i < 997; i++) {
      writer.addRow(new StringStruct("123"));
    }
    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    rows.next(lazyRow);
    assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    rows.next(lazyRow);
    assertEquals("c", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 997; i++) {
      rows.next(lazyRow);
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    }
    for (int i =0; i < 1000; i++) {
      rows.next(lazyRow);
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    }
    rows.next(lazyRow);
    assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    rows.next(lazyRow);
    assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    rows.next(lazyRow);
    assertEquals("c", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    for (int i =0; i < 997; i++) {
      rows.next(lazyRow);
      assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
    }
  }

  @Test
  /**
   * Tests a writing a stripe with a stride dictionary, followed by a stripe without
   * followed by a stripe with.
   */
  public void testEmptyInIntDictionaryStream() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (IntStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
    writer.addRow(new IntStruct(1));
    writer.addRow(new IntStruct(2));
    writer.addRow(new IntStruct(3));
    for (int i = 0; i < 997; i++) {
      writer.addRow(new IntStruct(123));
    }
    writer.forceFlushStripe();
    for (int i = 0; i < 1000; i++) {
      writer.addRow(new IntStruct(123));
    }
    writer.forceFlushStripe();
    writer.addRow(new IntStruct(1));
    writer.addRow(new IntStruct(2));
    writer.addRow(new IntStruct(3));
    for (int i = 0; i < 997; i++) {
      writer.addRow(new IntStruct(123));
    }
    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    rows.next(lazyRow);
    assertEquals(2, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    rows.next(lazyRow);
    assertEquals(3, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    for (int i =0; i < 997; i++) {
      rows.next(lazyRow);
      assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
    for (int i =0; i < 1000; i++) {
      rows.next(lazyRow);
      assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
    rows.next(lazyRow);
    assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    rows.next(lazyRow);
    assertEquals(2, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    rows.next(lazyRow);
    assertEquals(3, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    for (int i =0; i < 997; i++) {
      rows.next(lazyRow);
      assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
  }

  /**
   * Verifies the scenario when {@link com.facebook.hive.orc.BitFieldReader#skip(long)} skips to
   * the last value and doesn't load the next value if it has reached the end of the stream.
   *
   * @throws Exception
   */
  @Test
  public void testSkipWithEmptyArrayInEnd() throws Exception {
    ObjectInspector inspector;
    List<String> emptyList = Collections.emptyList();
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringListWithId.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }

    ReaderWriterProfiler.setProfilerOptions(conf);
    OrcConf.setFloatVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_KEY_STRING_SIZE_THRESHOLD, 0.01f);
    OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_FILE_ENABLE_LOW_MEMORY_MODE, false);
    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 1000000, CompressionKind.ZLIB, 1000, 1000);

    int numNulls = 4;
    int numNonNulls = 8;
    for(int i = 0; i < numNonNulls; i++) {
      List<String> filledList = new ArrayList<String>(2);
      filledList.add("SomeText");
      filledList.add("SomeMoreText" + i);
      writer.addRow(new StringListWithId(i, filledList));
    }
    for(int j = 0; j < numNulls; j++) {
      writer.addRow(new StringListWithId(numNonNulls+j, emptyList));
    }

    writer.close();

    // Prepare to read back the data
    ReaderWriterProfiler.setProfilerOptions(conf);
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = (OrcLazyStruct) rows.next(null);
    OrcStruct row = (OrcStruct) lazyRow.materialize();
    OrcLazyList list = ((OrcLazyList) row.getFieldValue(1));
    LazyTreeReader lazyReader = list.getLazyTreeReader();

    Object prev = lazyReader.get(numNonNulls - 1, null);

    boolean gotException = false;
    String expectedExceptionMessage = "Read past end of buffer RLE byte from compressed stream Stream for column 3 " +
        "kind IN_DICTIONARY base: 60 limit: 66 current stride: 1 compressed offset: 66 uncompressed: 66 to 66";
    try {
      lazyReader.get(numNonNulls + 1, prev);
    } catch (EOFException e) {
      if(e.getMessage().compareTo(expectedExceptionMessage) == 0) {
        gotException = true;
      } else {
        throw e;
      }
    }

    assertFalse("Got EOFException for reading past end of buffer RLE byte", gotException);
  }

  @Test
  /**
   * Tests a writing a stripe with an integer column, which enters low memory mode before the first
   * index stride is complete.
   */
  public void testIntEnterLowMemoryModeInFirstStride() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (IntStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 10000, memory);

    // Write 500 rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i));
    }

    // Force the writer to enter low memory mode, note since the stride length was set to 10000
    // we're still in the first stride
    memory.forceEnterLowMemoryMode();

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i + 500));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1000; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
    rows.close();
  }

  @Test
  /**
   * Tests a writing a stripe with a string column, which enters low memory mode before the first
   * index stride is complete.
   */
  public void testStringEnterLowMemoryModeInFirstStride() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 10000, memory);

    // Write 500 rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }

    // Force the writer to enter low memory mode, note since the stride length was set to 10000
    // we're still in the first stride
    memory.forceEnterLowMemoryMode();

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i + 500)));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1000; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
    }
    rows.close();
  }

  @Test
  /**
   * Tests a writing a stripe with a string column, which enters low memory mode before the second
   * index stride is complete, and does not complete that stride.
   */
  public void testStringEnterLowMemoryModeInSecondStride() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }

    // Write 250 more rows (a portion of the second stride)
    for (int i = 0; i < 250; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're still in the second stride
    memory.forceEnterLowMemoryMode();

    // Write 250 more rows (which still gets written to the second stride, but not enough to fill
    // it)
    for (int i = 0; i < 250; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i + 250)));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i % 1000),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
    }
    rows.close();
  }

  @Test
  /**
   * Tests a writing a stripe with an int column, which enters low memory mode before the second
   * index stride is complete, and does not complete that stride.
   */
  public void testIntEnterLowMemoryModeInSecondStride() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (IntStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new IntStruct(i));
    }

    // Write 250 more rows (a portion of the second stride)
    for (int i = 0; i < 250; i ++) {
      writer.addRow(new IntStruct(i));
    }

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're still in the second stride
    memory.forceEnterLowMemoryMode();

    // Write 250 more rows (which still gets written to the second stride, but not enough to fill
    // it)
    for (int i = 0; i < 250; i ++) {
      writer.addRow(new IntStruct(i + 250));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i % 1000,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
    rows.close();
  }

  @Test
  /**
   * Tests a writing a stripe with a string column, which enters low memory mode just before the
   * second stride starts
   */
  public void testStringEnterLowMemoryModeAtStrideStart() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're just starting the second stride
    memory.forceEnterLowMemoryMode();

    // Write 500 more rows (a portion of the second stride)
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i % 1000),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
    }
    rows.close();
  }

  @Test
  /**
   * Tests a writing a stripe with an int column, which enters low memory mode just before the
   * second stride starts
   */
  public void testIntEnterLowMemoryModeAtStrideStart() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (IntStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new IntStruct(i));
    }

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're just starting the second stride
    memory.forceEnterLowMemoryMode();

    // Write 500 more rows (a portion of the second stride)
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i % 1000,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
    rows.close();
  }

  @Test
  /**
   * Tests a writing a stripe with a string column, which enters low memory mode just after the
   * second stride starts
   */
  public void testStringEnterLowMemoryModeAfterStrideStart() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1001; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i % 1000)));
    }

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're just after starting the second stride
    memory.forceEnterLowMemoryMode();

    // Write 499 more rows (a portion of the second stride)
    for (int i = 1; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i % 1000),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
    }
    rows.close();
  }

  @Test
  /**
   * Tests a writing a stripe with an int column, which enters low memory mode just after the
   * second stride starts
   */
  public void testIntEnterLowMemoryModeAfterStrideStart() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (IntStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
        1000000, CompressionKind.NONE, 100, 1000, memory);

    // Write 1000 rows (the first stride)
    for (int i = 0; i < 1001; i ++) {
      writer.addRow(new IntStruct(i % 1000));
    }

    // Force the writer to enter low memory mode, note since the stride length was set to 1000
    // we're just after starting the second stride
    memory.forceEnterLowMemoryMode();

    // Write 499 more rows (a portion of the second stride)
    for (int i = 1; i < 500; i ++) {
      writer.addRow(new IntStruct(i));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 1500; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i % 1000,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
    rows.close();
  }

  @Test
  /**
   * Tests writing a stripe with a string column, which doesn't do dictionary encoding, then
   * re-evaluates whether it should do dictionary encoding or not.  While it's re-evaluating, it
   * enters low memory mode.
   */
  public void testStringEnterLowMemoryModeAndOnNotCarriedOverStripe() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (StringStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // Reevaluate if we should use dictionary encoding on every stripe
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 10000, memory);

    // Write 500 rows, they wil be directly encoded
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }

    // Flush the first stripe
    writer.forceFlushStripe();

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i)));
    }

    // Force the writer to enter low memory mode
    memory.forceEnterLowMemoryMode();

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new StringStruct(Integer.toString(i + 500)));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 2000; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(Integer.toString(i % 1000),
          ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
    }
    rows.close();
  }

  @Test
  /**
   * Tests writing a stripe with an int column, which doesn't do dictionary encoding, then
   * re-evaluates whether it should do dictionary encoding or not.  While it's re-evaluating, it
   * enters low memory mode
   */
  public void testIntegerEnterLowMemoryModeAndOnNotCarriedOverStripe() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (IntStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // Reevaluate if we should use dictionary encoding on every stripe
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
    MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
    ReaderWriterProfiler.setProfilerOptions(conf);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 10000, memory);

    // Write 500 rows
    for (int i = 0; i < 1000; i ++) {
      writer.addRow(new IntStruct(i));
    }

    // Flush the first stripe
    writer.forceFlushStripe();

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i));
    }

    // Force the writer to enter low memory mode
    memory.forceEnterLowMemoryMode();

    // Write 500 more rows
    for (int i = 0; i < 500; i ++) {
      writer.addRow(new IntStruct(i + 500));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    for (int i = 0; i < 2000; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i % 1000,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
    rows.close();
  }

  @Test
  /**
   * Tests calling seekToRow to make sure it updates the stripe accordingly
   */
  public void testSeekToRow() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (IntStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    ReaderWriterProfiler.setProfilerOptions(conf);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 10000, new MemoryManager(conf));

    // Write 100 rows
    for (int i = 0; i < 100; i++) {
      writer.addRow(new IntStruct(i));
    }

    // Flush the first stripe
    writer.forceFlushStripe();

    // Write 100 more rows
    for (int i = 0; i < 100; i++) {
      writer.addRow(new IntStruct(i + 100));
    }

    writer.close();
    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    RecordReader rows = reader.rows(null);
    OrcLazyStruct lazyRow = null;
    OrcStruct row = null;
    lazyRow = (OrcLazyStruct) rows.next(lazyRow);
    row = (OrcStruct) lazyRow.materialize();
    assertEquals(0, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    // Seek to row 98 which is almost at the end of a stripe, this way it stays in the current
    // stripe, and if the row is not updated correctly will read off the end of a stream.
    rows.seekToRow(98);
    for (int i = 98; i < 200; i++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }
    rows.close();
  }

  @Test
  /**
   * Tests that when a reader is initialized using offset, length the stripes included are
   * those that start in the range [offset, offset + length)
   */
  public void testSplitStripe() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (IntStruct.class,
              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // Reevaluate if we should use dictionary encoding on every stripe
    OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
    ReaderWriterProfiler.setProfilerOptions(conf);
    WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf,
        inspector, 1000000, CompressionKind.NONE, 100, 10000, new MemoryManager(conf));

    // Write 100 rows
    for (int i = 0; i < 100; i ++) {
      writer.addRow(new IntStruct(i));
    }

    // Flush the first stripe
    writer.forceFlushStripe();

    // Write 100 more rows
    for (int i = 0; i < 100; i ++) {
      writer.addRow(new IntStruct(i + 100));
    }

    writer.close();

    Reader reader = OrcFile.createReader(fs, testFilePath, conf);
    Iterator<StripeInformation> stripes = reader.getStripes().iterator();

    StripeInformation firstStripe = stripes.next();
    StripeInformation secondStripe = stripes.next();

    // Create a record reader that has the offset and length of the first stripe
    RecordReader rows = reader.rows(firstStripe.getOffset(),
        secondStripe.getOffset() - firstStripe.getOffset(), null);

    // Read what we wrote for the first stripe
    OrcLazyStruct lazyRow = null;
    OrcStruct row;
    for (int i = 0; i < 100; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }

    // Make sure that there's no additional data
    assertFalse(rows.hasNext());
    rows.close();

    // Create a record reader that has the offset and length of the second stripe
    // Since this is the last stripe it has length equal to the length of the file containing
    // stripes - the offset of the second stripe
    rows = reader.rows(secondStripe.getOffset(),
        reader.getContentLength() - secondStripe.getOffset(), null);

    // Read what we wrote for the first stripe
    for (int i = 0; i < 100; i ++) {
      lazyRow = (OrcLazyStruct) rows.next(lazyRow);
      row = (OrcStruct) lazyRow.materialize();
      assertEquals(i + 100,
          ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
    }

    // Make sure that there's no additional data
    assertFalse(rows.hasNext());
    rows.close();
  }
}
TOP

Related Classes of com.facebook.hive.orc.TestOrcFile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.