Package org.apache.drill.exec.store

Source Code of org.apache.drill.exec.store.ParquetRecordReaderTest$MockOutputMutator

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store;

import com.beust.jcommander.internal.Lists;
import com.google.common.base.Charsets;
import com.google.common.io.Files;

import com.google.common.util.concurrent.SettableFuture;
import org.apache.drill.common.config.DrillConfig;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.util.FileUtils;
import org.apache.drill.exec.client.DrillClient;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.physical.impl.OutputMutator;

import org.apache.drill.exec.proto.UserProtos;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.RecordBatchLoader;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.rpc.RpcException;
import org.apache.drill.exec.rpc.user.QueryResultBatch;
import org.apache.drill.exec.rpc.user.UserResultsListener;
import org.apache.drill.exec.server.Drillbit;
import org.apache.drill.exec.server.RemoteServiceSet;

import org.apache.drill.exec.store.json.JsonSchemaProvider;
import org.apache.drill.exec.vector.BaseDataValueVector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Ignore;
import org.junit.Test;
import parquet.bytes.BytesInput;
import parquet.column.ColumnDescriptor;

import parquet.hadoop.ParquetFileWriter;
import parquet.hadoop.metadata.CompressionCodecName;
import parquet.schema.MessageType;
import parquet.schema.MessageTypeParser;

import java.util.*;

import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static parquet.column.Encoding.PLAIN;

public class ParquetRecordReaderTest {
  org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetRecordReaderTest.class);

  private static final boolean VERBOSE_DEBUG = false;

  // { 00000001, 00000010, 00000100, 00001000, 00010000, ... }
  byte[] bitFields = {1, 2, 4, 8, 16, 32, 64, -128};
  byte allBitsTrue = -1;
  byte allBitsFalse = 0;
  int DEFAULT_BYTES_PER_PAGE = 1024 * 1024 * 1;
  static Object[] intVals = {-200, 100, Integer.MAX_VALUE };
  static Object[] longVals = { -5000l, 5000l, Long.MAX_VALUE};
  static Object[] floatVals = { 1.74f, Float.MAX_VALUE, Float.MIN_VALUE};
  static Object[] doubleVals = {100.45d, Double.MAX_VALUE, Double.MIN_VALUE,};
  static Object[] boolVals = {false, false, true};
  static byte[] varLen1 = {50, 51, 52, 53, 54, 55, 56, 57, 58, 59};
  static byte[] varLen2 = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
  static byte[] varLen3 = {100, 99, 98};
  static Object[] binVals = { varLen3, varLen2, varLen3};
  static Object[] bin2Vals = { varLen3, varLen2, varLen1};

  private void populateFieldInfoMap(ParquetTestProperties props){
    props.fields.put("integer", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props));
    props.fields.put("bigInt", new FieldInfo("int64", "bigInt", 64, longVals, TypeProtos.MinorType.BIGINT, props));
    props.fields.put("f", new FieldInfo("float", "f", 32, floatVals, TypeProtos.MinorType.FLOAT4, props));
    props.fields.put("d", new FieldInfo("double", "d", 64, doubleVals, TypeProtos.MinorType.FLOAT8, props));
    props.fields.put("b", new FieldInfo("boolean", "b", 1, boolVals, TypeProtos.MinorType.BIT, props));
    props.fields.put("bin", new FieldInfo("binary", "bin", -1, binVals, TypeProtos.MinorType.VARBINARY, props));
    props.fields.put("bin2", new FieldInfo("binary", "bin2", -1, bin2Vals, TypeProtos.MinorType.VARBINARY, props));
  }

  private void populatePigTPCHCustomerFields(ParquetTestProperties props){
    // all of the data in the fieldInfo constructors doesn't matter because the file is generated outside the test
    props.fields.put("C_CUSTKEY", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props));
    props.fields.put("C_NATIONKEY", new FieldInfo("int64", "bigInt", 64, longVals, TypeProtos.MinorType.BIGINT, props));
    props.fields.put("C_ACCTBAL", new FieldInfo("float", "f", 32, floatVals, TypeProtos.MinorType.FLOAT4, props));
    props.fields.put("C_NAME", new FieldInfo("double", "d", 64, doubleVals, TypeProtos.MinorType.FLOAT8, props));
    props.fields.put("C_ADDRESS", new FieldInfo("boolean", "b", 1, boolVals, TypeProtos.MinorType.BIT, props));
    props.fields.put("C_PHONE", new FieldInfo("binary", "bin", -1, binVals, TypeProtos.MinorType.VARBINARY, props));
    props.fields.put("C_MKTSEGMENT", new FieldInfo("binary", "bin2", -1, bin2Vals, TypeProtos.MinorType.VARBINARY, props));
    props.fields.put("C_COMMENT", new FieldInfo("binary", "bin2", -1, bin2Vals, TypeProtos.MinorType.VARBINARY, props));
  }

  private void populatePigTPCHSupplierFields(ParquetTestProperties props){
    // all of the data in the fieldInfo constructors doesn't matter because the file is generated outside the test
    props.fields.put("S_SUPPKEY", new FieldInfo("int32", "integer", 32, intVals, TypeProtos.MinorType.INT, props));
    props.fields.put("S_NATIONKEY", new FieldInfo("int64", "bigInt", 64, longVals, TypeProtos.MinorType.BIGINT, props));
    props.fields.put("S_ACCTBAL", new FieldInfo("float", "f", 32, floatVals, TypeProtos.MinorType.FLOAT4, props));
    props.fields.put("S_NAME", new FieldInfo("double", "d", 64, doubleVals, TypeProtos.MinorType.FLOAT8, props));
    props.fields.put("S_ADDRESS", new FieldInfo("boolean", "b", 1, boolVals, TypeProtos.MinorType.BIT, props));
    props.fields.put("S_PHONE", new FieldInfo("binary", "bin", -1, binVals, TypeProtos.MinorType.VARBINARY, props));
    props.fields.put("S_COMMENT", new FieldInfo("binary", "bin2", -1, bin2Vals, TypeProtos.MinorType.VARBINARY, props));
  }

  @Test
  public void testMultipleRowGroups() throws Exception {
    HashMap<String, FieldInfo> fields = new HashMap<>();
    ParquetTestProperties props = new ParquetTestProperties(3, 3000, DEFAULT_BYTES_PER_PAGE, fields);
    populateFieldInfoMap(props);
    testParquetFullEngine(true, "/parquet_scan_screen.json", "/tmp/test.parquet", 1, props);
  }

  // TODO - Test currently marked ignore to prevent breaking of the build process, requires a binary file that was
  // generated using pig. Will need to find a good place to keep files like this.
  // For now I will upload it to the JIRA as an attachment.
  @Ignore
  @Test
  public void testNullableColumns() throws Exception {
    HashMap<String, FieldInfo> fields = new HashMap<>();
    ParquetTestProperties props = new ParquetTestProperties(1, 3000000, DEFAULT_BYTES_PER_PAGE, fields);
    Object[] boolVals = {true, null, null};
    props.fields.put("a", new FieldInfo("boolean", "a", 1, boolVals, TypeProtos.MinorType.BIT, props));
    testParquetFullEngine(false, "/parquet_nullable.json", "/tmp/nullable.parquet", 1, props);
  }

  @Ignore
  @Test
  public void testNullableColumnsVarLen() throws Exception {
    HashMap<String, FieldInfo> fields = new HashMap<>();
    ParquetTestProperties props = new ParquetTestProperties(1, 3000000, DEFAULT_BYTES_PER_PAGE, fields);
    byte[] val = {'b'};
//    Object[] boolVals = { val, null, null};
//    Object[] boolVals = { null, null, null};
    Object[] boolVals = { val, val, val};
    props.fields.put("a", new FieldInfo("boolean", "a", 1, boolVals, TypeProtos.MinorType.BIT, props));
    testParquetFullEngine(false, "/parquet_nullable_varlen.json", "/tmp/nullable.parquet", 1, props);
  }

  @Test
  public void testMultipleRowGroupsAndReads() throws Exception {
    HashMap<String, FieldInfo> fields = new HashMap<>();
    ParquetTestProperties props = new ParquetTestProperties(4, 3000, DEFAULT_BYTES_PER_PAGE, fields);
    populateFieldInfoMap(props);
    String readEntries = "";
    // number of times to read the file
    int i = 3;
    for (int j = 0; j < i; j++){
      readEntries += "{path: \"/tmp/test.parquet\"}";
      if (j < i - 1)
        readEntries += ",";
    }
    testParquetFullEngineEventBased(true, "/parquet_scan_screen_read_entry_replace.json", readEntries,
        "/tmp/test.parquet", i, props);
  }

  // requires binary file generated by pig from TPCH data, also have to disable assert where data is coming in
  @Ignore
  @Test
  public void testMultipleRowGroupsAndReadsPigError() throws Exception {
    HashMap<String, FieldInfo> fields = new HashMap<>();
    ParquetTestProperties props = new ParquetTestProperties(4, 3000, DEFAULT_BYTES_PER_PAGE, fields);
    populatePigTPCHCustomerFields(props);
//    populatePigTPCHSupplierFields(props);
    String readEntries = "";
    // number of times to read the file
    int i = 1;
    for (int j = 0; j < i; j++){
      readEntries += "{path: \"/tmp/tpc-h/customer\"}";
      if (j < i - 1)
        readEntries += ",";
    }
    testParquetFullEngineEventBased(false, "/parquet_scan_screen_read_entry_replace.json", readEntries,
        "/tmp/test.parquet", i, props);
  }

  @Test
  public void testMultipleRowGroupsEvent() throws Exception {
    HashMap<String, FieldInfo> fields = new HashMap<>();
    ParquetTestProperties props = new ParquetTestProperties(4, 3000, DEFAULT_BYTES_PER_PAGE, fields);
    populateFieldInfoMap(props);
    testParquetFullEngineEventBased(true, "/parquet_scan_screen.json", "/tmp/test.parquet", 1, props);
  }


  private class ParquetTestProperties{
    int numberRowGroups;
    int recordsPerRowGroup;
    int bytesPerPage = 1024 * 1024 * 1;
    HashMap<String, FieldInfo> fields = new HashMap<>();

    public ParquetTestProperties(int numberRowGroups, int recordsPerRowGroup, int bytesPerPage,
                                 HashMap<String, FieldInfo> fields){
      this.numberRowGroups = numberRowGroups;
      this.recordsPerRowGroup = recordsPerRowGroup;
      this.bytesPerPage = bytesPerPage;
      this.fields = fields;
    }

  }

  private static class FieldInfo {

    String parquetType;
    String name;
    int bitLength;
    int numberOfPages;
    Object[] values;
    TypeProtos.MinorType type;

    FieldInfo(String parquetType, String name, int bitLength, Object[] values, TypeProtos.MinorType type, ParquetTestProperties props){
      this.parquetType = parquetType;
      this.name = name;
      this.bitLength  = bitLength;
      this.numberOfPages = Math.max(1, (int) Math.ceil( ((long) props.recordsPerRowGroup) * bitLength / 8.0 / props.bytesPerPage));
      this.values = values;
      // generator is designed to use 3 values
      assert values.length == 3;
      this.type = type;
    }
  }

  private String getResource(String resourceName) {
    return "resource:" + resourceName;
  }

  public void generateParquetFile(String filename, ParquetTestProperties props) throws Exception {

    int currentBooleanByte = 0;
    WrapAroundCounter booleanBitCounter = new WrapAroundCounter(7);

    Configuration configuration = new Configuration();
    configuration.set(JsonSchemaProvider.HADOOP_DEFAULT_NAME, "file:///");
    //"message m { required int32 integer; required int64 integer64; required boolean b; required float f; required double d;}"

    FileSystem fs = FileSystem.get(configuration);
    Path path = new Path(filename);
    if (fs.exists(path)) fs.delete(path, false);


    String messageSchema = "message m {";
    for (FieldInfo fieldInfo : props.fields.values()) {
      messageSchema += " required " + fieldInfo.parquetType + " " + fieldInfo.name + ";";
    }
    // remove the last semicolon, java really needs a join method for strings...
    // TODO - nvm apparently it requires a semicolon after every field decl, might want to file a bug
    //messageSchema = messageSchema.substring(schemaType, messageSchema.length() - 1);
    messageSchema += "}";

    MessageType schema = MessageTypeParser.parseMessageType(messageSchema);

    CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
    w.start();
    HashMap<String, Integer> columnValuesWritten = new HashMap();
    int valsWritten;
    for (int k = 0; k < props.numberRowGroups; k++){
      w.startBlock(1);
      currentBooleanByte = 0;
      booleanBitCounter.reset();

      for (FieldInfo fieldInfo : props.fields.values()) {

        if ( ! columnValuesWritten.containsKey(fieldInfo.name)){
          columnValuesWritten.put((String) fieldInfo.name, 0);
          valsWritten = 0;
        } else {
          valsWritten = columnValuesWritten.get(fieldInfo.name);
        }

        String[] path1 = {(String) fieldInfo.name};
        ColumnDescriptor c1 = schema.getColumnDescription(path1);

        w.startColumn(c1, props.recordsPerRowGroup, codec);
        int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages);
        byte[] bytes;
        // for variable length binary fields
        int bytesNeededToEncodeLength = 4;
        if ((int) fieldInfo.bitLength > 0) {
          bytes = new byte[(int) Math.ceil(valsPerPage * (int) fieldInfo.bitLength / 8.0)];
        } else {
          // the twelve at the end is to account for storing a 4 byte length with each value
          int totalValLength = ((byte[]) fieldInfo.values[0]).length + ((byte[]) fieldInfo.values[1]).length + ((byte[]) fieldInfo.values[2]).length + 3 * bytesNeededToEncodeLength;
          // used for the case where there is a number of values in this row group that is not divisible by 3
          int leftOverBytes = 0;
          if ( valsPerPage % 3 > 0 ) leftOverBytes += ((byte[])fieldInfo.values[1]).length + bytesNeededToEncodeLength;
          if ( valsPerPage % 3 > 1 ) leftOverBytes += ((byte[])fieldInfo.values[2]).length + bytesNeededToEncodeLength;
          bytes = new byte[valsPerPage / 3 * totalValLength + leftOverBytes];
        }
        int bytesPerPage = (int) (valsPerPage * ((int) fieldInfo.bitLength / 8.0));
        int bytesWritten = 0;
        for (int z = 0; z < (int) fieldInfo.numberOfPages; z++, bytesWritten = 0) {
          for (int i = 0; i < valsPerPage; i++) {
            //System.out.print(i + ", " + (i % 25 == 0 ? "\n gen " + fieldInfo.name + ": " : ""));
            if (fieldInfo.values[0] instanceof Boolean) {

              bytes[currentBooleanByte] |= bitFields[booleanBitCounter.val] & ((boolean) fieldInfo.values[valsWritten % 3]
                  ? allBitsTrue : allBitsFalse);
              booleanBitCounter.increment();
              if (booleanBitCounter.val == 0) {
                currentBooleanByte++;
              }
              valsWritten++;
              if (currentBooleanByte > bytesPerPage) break;
            } else {
              if (fieldInfo.values[valsWritten % 3] instanceof byte[]){
                System.arraycopy(ByteArrayUtil.toByta(((byte[])fieldInfo.values[valsWritten % 3]).length),
                    0, bytes, bytesWritten, bytesNeededToEncodeLength);
                try{
                System.arraycopy(fieldInfo.values[valsWritten % 3],
                    0, bytes, bytesWritten + bytesNeededToEncodeLength, ((byte[])fieldInfo.values[valsWritten % 3]).length);
                }
                catch (Exception ex){
                  Math.min(4, 5);
                }
                bytesWritten += ((byte[])fieldInfo.values[valsWritten % 3]).length + bytesNeededToEncodeLength;
              }
              else{
                System.arraycopy( ByteArrayUtil.toByta(fieldInfo.values[valsWritten % 3]),
                    0, bytes, i * ((int) fieldInfo.bitLength / 8), (int) fieldInfo.bitLength / 8);
              }
              valsWritten++;
            }

          }
          w.writeDataPage((int) (props.recordsPerRowGroup / (int) fieldInfo.numberOfPages), bytes.length, BytesInput.from(bytes), PLAIN, PLAIN, PLAIN);
          currentBooleanByte = 0;
        }
        w.endColumn();
        columnValuesWritten.remove((String) fieldInfo.name);
        columnValuesWritten.put((String) fieldInfo.name, valsWritten);
      }

      w.endBlock();
    }
    w.end(new HashMap<String, String>());
    logger.debug("Finished generating parquet file.");
  }

  private class ParquetResultListener implements UserResultsListener {
    private SettableFuture<Void> future = SettableFuture.create();
    int count = 0;
    RecordBatchLoader batchLoader;

    int batchCounter = 1;
    HashMap<String, Integer> valuesChecked = new HashMap();
    ParquetTestProperties props;

    ParquetResultListener(RecordBatchLoader batchLoader, ParquetTestProperties props){
      this.batchLoader = batchLoader;
      this.props = props;
    }

    @Override
    public void submissionFailed(RpcException ex) {
      logger.debug("Submission failed.", ex);
      future.setException(ex);
    }

    @Override
    public void resultArrived(QueryResultBatch result) {
      logger.debug("result arrived in test batch listener.");
      if(result.getHeader().getIsLastChunk()){
        future.set(null);
      }
      int columnValCounter = 0;
      int i = 0;
      FieldInfo currentField;
      count += result.getHeader().getRowCount();
      boolean schemaChanged = false;
      try {
        schemaChanged = batchLoader.load(result.getHeader().getDef(), result.getData());
      } catch (SchemaChangeException e) {
        throw new RuntimeException(e);
      }

      int recordCount = 0;
      // print headers.
      if (schemaChanged) {
      } // do not believe any change is needed for when the schema changes, with the current mock scan use case

      for (VectorWrapper vw : batchLoader) {
        ValueVector vv = vw.getValueVector();
        currentField = props.fields.get(vv.getField().getName());
        if (VERBOSE_DEBUG){
          System.out.println("\n" + (String) currentField.name);
        }
        if ( ! valuesChecked.containsKey(vv.getField().getName())){
          valuesChecked.put(vv.getField().getName(), 0);
          columnValCounter = 0;
        } else {
          columnValCounter = valuesChecked.get(vv.getField().getName());
        }
        for (int j = 0; j < ((BaseDataValueVector)vv).getAccessor().getValueCount(); j++) {
          if (VERBOSE_DEBUG){
            System.out.print(vv.getAccessor().getObject(j) + ", " + (j % 25 == 0 ? "\n batch:" + batchCounter + " v:" + j + " - " : ""));
          }
          assertField(vv, j, (TypeProtos.MinorType) currentField.type,
              currentField.values[columnValCounter % 3], (String) currentField.name + "/");
          columnValCounter++;
        }
        if (VERBOSE_DEBUG){
          System.out.println("\n" + ((BaseDataValueVector)vv).getAccessor().getValueCount());
        }
        valuesChecked.remove(vv.getField().getName());
        valuesChecked.put(vv.getField().getName(), columnValCounter);
      }

      if (VERBOSE_DEBUG){
        for (i = 0; i < batchLoader.getRecordCount(); i++) {
          recordCount++;
          if (i % 50 == 0){
            System.out.println();
            for (VectorWrapper vw : batchLoader) {
              ValueVector v = vw.getValueVector();
              System.out.print(pad(v.getField().getName(), 20) + " ");

            }
            System.out.println();
            System.out.println();
          }

          for (VectorWrapper vw : batchLoader) {
            ValueVector v = vw.getValueVector();
            System.out.print(pad(v.getAccessor().getObject(i).toString(), 20) + " ");
          }
          System.out.println(

          );
        }
      }
      batchCounter++;
      if(result.getHeader().getIsLastChunk()){
        future.set(null);
      }
    }

    public void getResults() throws RpcException{
      try{
        future.get();
      }catch(Throwable t){
        throw RpcException.mapException(t);
      }
    }
  }

  // specific tests should call this method, but it is not marked as a test itself intentionally
  public void testParquetFullEngineEventBased(boolean generateNew, String plan, String filename, int numberOfTimesRead /* specified in json plan */, ParquetTestProperties props) throws Exception{
    testParquetFullEngine(generateNew, plan, null, filename, numberOfTimesRead, props);
  }

  // specific tests should call this method, but it is not marked as a test itself intentionally
  public void testParquetFullEngineEventBased(boolean generateNew, String plan, String readEntries, String filename,
                                              int numberOfTimesRead /* specified in json plan */, ParquetTestProperties props) throws Exception{
    RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();

    if (generateNew) generateParquetFile(filename, props);

    DrillConfig config = DrillConfig.create();

    try(Drillbit bit1 = new Drillbit(config, serviceSet); DrillClient client = new DrillClient(config, serviceSet.getCoordinator());){
      bit1.run();
      client.connect();
      RecordBatchLoader batchLoader = new RecordBatchLoader(bit1.getContext().getAllocator());
      ParquetResultListener resultListener = new ParquetResultListener(batchLoader, props);
      long C = System.nanoTime();
      if (readEntries != null){
        client.runQuery(UserProtos.QueryType.LOGICAL, (Files.toString(FileUtils.getResourceAsFile(plan), Charsets.UTF_8).replaceFirst( "&REPLACED_IN_PARQUET_TEST&", readEntries)), resultListener);
      }
      else{
        client.runQuery(UserProtos.QueryType.LOGICAL, Files.toString(FileUtils.getResourceAsFile(plan), Charsets.UTF_8), resultListener);
      }
      resultListener.getResults();
      for (String s : resultListener.valuesChecked.keySet()) {
        assertEquals("Record count incorrect for column: " + s,
            props.recordsPerRowGroup * props.numberRowGroups * numberOfTimesRead, (long) resultListener.valuesChecked.get(s));
        logger.debug("Column {}, Values read:{}", s, resultListener.valuesChecked.get(s));
      }
      long D = System.nanoTime();
      System.out.println(String.format("Took %f s to run query", (float)(D-C) / 1E9));
    }
  }

  // specific tests should call this method, but it is not marked as a test itself intentionally
  public void testParquetFullEngine(boolean generateNew, String plan, String filename, int numberOfTimesRead /* specified in json plan */, ParquetTestProperties props) throws Exception{
    testParquetFullEngine(generateNew, plan, null, filename, numberOfTimesRead, props);
  }

  // specific tests should call this method, but it is not marked as a test itself intentionally
  public void testParquetFullEngine(boolean generateNew, String plan, String readEntries, String filename,
                                    int numberOfTimesRead /* specified in json plan */, ParquetTestProperties props) throws Exception{
    RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();

    if (generateNew) generateParquetFile(filename, props);

    DrillConfig config = DrillConfig.create();

    try(Drillbit bit1 = new Drillbit(config, serviceSet); DrillClient client = new DrillClient(config, serviceSet.getCoordinator())) {
      long A = System.nanoTime();
      bit1.run();
      long B = System.nanoTime();
      client.connect();
      long C = System.nanoTime();
      List<QueryResultBatch> results;
      // insert a variable number of reads
      if (readEntries != null){
        results = client.runQuery(UserProtos.QueryType.LOGICAL, (Files.toString(FileUtils.getResourceAsFile(plan), Charsets.UTF_8).replaceFirst( "&REPLACED_IN_PARQUET_TEST&", readEntries)));
      }
      else{
        results = client.runQuery(UserProtos.QueryType.LOGICAL, Files.toString(FileUtils.getResourceAsFile(plan), Charsets.UTF_8));
      }
//      List<QueryResultBatch> results = client.runQuery(UserProtos.QueryType.PHYSICAL, Files.toString(FileUtils.getResourceAsFile("/parquet_scan_union_screen_physical.json"), Charsets.UTF_8));
      long D = System.nanoTime();
      System.out.println(String.format("Took %f s to start drillbit", (float)(B-A) / 1E9));
      System.out.println(String.format("Took %f s to connect", (float)(C-B) / 1E9));
      System.out.println(String.format("Took %f s to run query", (float)(D-C) / 1E9));
      //List<QueryResultBatch> results = client.runQuery(UserProtos.QueryType.PHYSICAL, Files.toString(FileUtils.getResourceAsFile("/parquet_scan_union_screen_physical.json"), Charsets.UTF_8));
      int count = 0;
//      RecordBatchLoader batchLoader = new RecordBatchLoader(new BootStrapContext(config).getAllocator());
      RecordBatchLoader batchLoader = new RecordBatchLoader(bit1.getContext().getAllocator());
      byte[] bytes;

      int batchCounter = 1;
      int columnValCounter = 0;
      int i = 0;
      FieldInfo currentField;
      HashMap<String, Integer> valuesChecked = new HashMap();
      for(QueryResultBatch b : results){

        count += b.getHeader().getRowCount();
        boolean schemaChanged = batchLoader.load(b.getHeader().getDef(), b.getData());

        int recordCount = 0;
        // print headers.
        if (schemaChanged) {
        } // do not believe any change is needed for when the schema changes, with the current mock scan use case

        for (VectorWrapper vw : batchLoader) {
          ValueVector vv = vw.getValueVector();
          currentField = props.fields.get(vv.getField().getName());
          if (VERBOSE_DEBUG){
            System.out.println("\n" + (String) currentField.name);
          }
          if ( ! valuesChecked.containsKey(vv.getField().getName())){
            valuesChecked.put(vv.getField().getName(), 0);
            columnValCounter = 0;
          } else {
            columnValCounter = valuesChecked.get(vv.getField().getName());
          }
          for (int j = 0; j < vv.getAccessor().getValueCount(); j++) {
            if (VERBOSE_DEBUG){
              System.out.print(vv.getAccessor().getObject(j) + ", " + (j % 25 == 0 ? "\n batch:" + batchCounter + " v:" + j + " - " : ""));
            }
            assertField(vv, j, currentField.type,
                currentField.values[columnValCounter % 3], currentField.name + "/");
            columnValCounter++;
          }
          if (VERBOSE_DEBUG){
            System.out.println("\n" + vv.getAccessor().getValueCount());
          }
          valuesChecked.remove(vv.getField().getName());
          valuesChecked.put(vv.getField().getName(), columnValCounter);
        }

        if (VERBOSE_DEBUG){
          for (i = 0; i < batchLoader.getRecordCount(); i++) {
            recordCount++;
            if (i % 50 == 0){
              System.out.println();
              for (VectorWrapper vw : batchLoader) {
                ValueVector v = vw.getValueVector();
                System.out.print(pad(v.getField().getName(), 20) + " ");

              }
              System.out.println();
              System.out.println();
            }

            for (VectorWrapper vw : batchLoader) {
              ValueVector v = vw.getValueVector();
              System.out.print(pad(v.getAccessor().getObject(i) + "", 20) + " ");
            }
            System.out.println(

            );
          }
        }
        batchCounter++;
      }
      for (String s : valuesChecked.keySet()) {
        assertEquals("Record count incorrect for column: " + s, props.recordsPerRowGroup * props.numberRowGroups * numberOfTimesRead, (long) valuesChecked.get(s));
      }
      assert valuesChecked.keySet().size() > 0;
    }
  }

  public String pad(String value, int length) {
    return pad(value, length, " ");
  }

  public String pad(String value, int length, String with) {
    StringBuilder result = new StringBuilder(length);
    result.append(value);

    while (result.length() < length) {
      result.insert(0, with);
    }

    return result.toString();
  }

  class MockOutputMutator implements OutputMutator {
    List<MaterializedField> removedFields = Lists.newArrayList();
    List<ValueVector> addFields = Lists.newArrayList();

    @Override
    public void removeField(MaterializedField field) throws SchemaChangeException {
      removedFields.add(field);
    }

    @Override
    public void addField(ValueVector vector) throws SchemaChangeException {
      addFields.add(vector);
    }

    @Override
    public void removeAllFields() {
      addFields.clear();
    }

    @Override
    public void setNewSchema() throws SchemaChangeException {
    }

    List<MaterializedField> getRemovedFields() {
      return removedFields;
    }

    List<ValueVector> getAddFields() {
      return addFields;
    }
  }

  private <T> void assertField(ValueVector valueVector, int index, TypeProtos.MinorType expectedMinorType, Object value, String name) {
    assertField(valueVector, index, expectedMinorType, value, name, 0);
  }

  private <T> void assertField(ValueVector valueVector, int index, TypeProtos.MinorType expectedMinorType, T value, String name, int parentFieldId) {
//    UserBitShared.FieldMetadata metadata = valueVector.getMetadata();
//    SchemaDefProtos.FieldDef def = metadata.getDef();
//    assertEquals(expectedMinorType, def.getMajorType().getMinorType());
//    assertEquals(name, def.getNameList().get(0).getName());
//    assertEquals(parentFieldId, def.getParentId());

    if (expectedMinorType == TypeProtos.MinorType.MAP) {
      return;
    }

    T val = (T) valueVector.getAccessor().getObject(index);
    if (val instanceof String){
      assertEquals(value, val);
    }
    else if (val instanceof byte[]) {
      assertTrue(Arrays.equals((byte[]) value, (byte[]) val));
    } else {
      assertEquals(value, val);
    }
  }

  private class WrapAroundCounter {

    int maxVal;
    int val;

    public WrapAroundCounter(int maxVal) {
      this.maxVal = maxVal;
    }

    public int increment() {
      val++;
      if (val > maxVal) {
        val = 0;
      }
      return val;
    }

    public void reset() {
      val = 0;
    }

  }
}
TOP

Related Classes of org.apache.drill.exec.store.ParquetRecordReaderTest$MockOutputMutator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.