Package org.apache.hadoop.hive.ql.io.orc

Source Code of org.apache.hadoop.hive.ql.io.orc.TestVectorizedORCReader$MyRecord

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.io.orc;

import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.junit.Before;
import org.junit.Test;

import java.io.File;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.Calendar;
import java.util.Random;

/**
*
* Class that tests ORC reader vectorization by comparing records that are
* returned by "row by row" reader with batch reader.
*
*/
public class TestVectorizedORCReader {

  private Configuration conf;
  private FileSystem fs;
  private Path testFilePath;

  @Before
  public void openFileSystem() throws Exception {
    conf = new Configuration();
    fs = FileSystem.getLocal(conf);
    Path workDir = new Path(System.getProperty("test.tmp.dir",
        "target" + File.separator + "test" + File.separator + "tmp"));
    fs.setWorkingDirectory(workDir);
    testFilePath = new Path("TestVectorizedORCReader.testDump.orc");
    fs.delete(testFilePath, false);
  }

  @SuppressWarnings("unused")
  static class MyRecord {
    private final Boolean bo;
    private final Byte by;
    private final Integer i;
    private final Long l;
    private final Short s;
    private final Double d;
    private final String k;
    private final Timestamp t;
    private final Date dt;
    private final HiveDecimal hd;

    MyRecord(Boolean bo, Byte by, Integer i, Long l, Short s, Double d, String k, Timestamp t,
             Date dt, HiveDecimal hd) {
      this.bo = bo;
      this.by = by;
      this.i = i;
      this.l = l;
      this.s = s;
      this.d = d;
      this.k = k;
      this.t = t;
      this.dt = dt;
      this.hd = hd;
    }
  }

  @Test
  public void createFile() throws Exception {
    ObjectInspector inspector;
    synchronized (TestVectorizedORCReader.class) {
      inspector = ObjectInspectorFactory.getReflectionObjectInspector
          (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }

    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
        100000, CompressionKind.ZLIB, 10000, 10000);
    Random r1 = new Random(1);
    String[] words = new String[] {"It", "was", "the", "best", "of", "times,",
        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
        "before", "us,", "we", "were", "all", "going", "direct", "to",
        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
        "way"};
    String[] dates = new String[] {"1991-02-28", "1970-01-31", "1950-04-23"};
    String[] decimalStrings = new String[] {"234.443", "10001000", "0.3333367", "67788798.0", "-234.443",
        "-10001000", "-0.3333367", "-67788798.0", "0"};
    for (int i = 0; i < 21000; ++i) {
      if ((i % 7) != 0) {
        writer.addRow(new MyRecord(((i % 3) == 0), (byte)(i % 5), i, (long) 200, (short) (300 + i), (double) (400 + i),
            words[r1.nextInt(words.length)], new Timestamp(Calendar.getInstance().getTime().getTime()),
            Date.valueOf(dates[i % 3]), HiveDecimal.create(decimalStrings[i % decimalStrings.length])));
      } else {
        writer.addRow(new MyRecord(null, null, i, (long) 200, null, null, null, null, null, null));
      }
    }
    writer.close();
    checkVectorizedReader();
  }

  private void checkVectorizedReader() throws Exception {

    Reader vreader = OrcFile.createReader(testFilePath,
        OrcFile.readerOptions(conf));
    Reader reader = OrcFile.createReader(testFilePath,
        OrcFile.readerOptions(conf));
    RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
    RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
    VectorizedRowBatch batch = null;
    OrcStruct row = null;

    // Check Vectorized ORC reader against ORC row reader
    while (vrr.hasNext()) {
      batch = vrr.nextBatch(batch);
      for (int i = 0; i < batch.size; i++) {
        row = (OrcStruct) rr.next(row);
        for (int j = 0; j < batch.cols.length; j++) {
          Object a = (row.getFieldValue(j));
          Object b = batch.cols[j].getWritableObject(i);
          // Boolean values are stores a 1's and 0's, so convert and compare
          if (a instanceof BooleanWritable) {
            Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
            Assert.assertEquals(true, temp.toString().equals(b.toString()));
            continue;
          }
          // Timestamps are stored as long, so convert and compare
          if (a instanceof Timestamp) {
            Timestamp t = ((Timestamp) a);
            // Timestamp.getTime() is overriden and is
            // long time = super.getTime();
            // return (time + (nanos / 1000000));
            Long timeInNanoSec = (t.getTime() * 1000000) + (t.getNanos() % 1000000);
            Assert.assertEquals(true, timeInNanoSec.toString().equals(b.toString()));
            continue;
          }

          // Dates are stored as long, so convert and compare
          if (a instanceof Date) {
            Date adt = (Date) a;
            Assert.assertEquals(adt.getTime(), DateWritable.daysToMillis((int) ((LongWritable) b).get()));
            continue;
          }

          // Decimals are stored as BigInteger, so convert and compare
          if (a instanceof HiveDecimal) {
            HiveDecimalWritable dec = (HiveDecimalWritable) b;
            Assert.assertEquals(a, dec.getHiveDecimal());
          }

          if (null == a) {
            Assert.assertEquals(true, (b == null || (b instanceof NullWritable)));
          } else {
            Assert.assertEquals(true, b.toString().equals(a.toString()));
          }
        }
      }

      // Check repeating
      Assert.assertEquals(false, batch.cols[0].isRepeating);
      Assert.assertEquals(false, batch.cols[1].isRepeating);
      Assert.assertEquals(false, batch.cols[2].isRepeating);
      Assert.assertEquals(true, batch.cols[3].isRepeating);
      Assert.assertEquals(false, batch.cols[4].isRepeating);
      Assert.assertEquals(false, batch.cols[5].isRepeating);
      Assert.assertEquals(false, batch.cols[6].isRepeating);
      Assert.assertEquals(false, batch.cols[7].isRepeating);
      Assert.assertEquals(false, batch.cols[8].isRepeating);
      Assert.assertEquals(false, batch.cols[9].isRepeating);

      // Check non null
      Assert.assertEquals(false, batch.cols[0].noNulls);
      Assert.assertEquals(false, batch.cols[1].noNulls);
      Assert.assertEquals(true, batch.cols[2].noNulls);
      Assert.assertEquals(true, batch.cols[3].noNulls);
      Assert.assertEquals(false, batch.cols[4].noNulls);
      Assert.assertEquals(false, batch.cols[5].noNulls);
      Assert.assertEquals(false, batch.cols[6].noNulls);
      Assert.assertEquals(false, batch.cols[7].noNulls);
      Assert.assertEquals(false, batch.cols[8].noNulls);
      Assert.assertEquals(false, batch.cols[9].noNulls);
    }
    Assert.assertEquals(false, rr.hasNext());
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.io.orc.TestVectorizedORCReader$MyRecord

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.