Package org.apache.drill.exec.store.hive

Source Code of org.apache.drill.exec.store.hive.HiveTextRecordReader

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.hive;

import java.io.IOException;
import java.util.List;

import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.vector.NullableBigIntVector;
import org.apache.drill.exec.vector.NullableIntVector;
import org.apache.drill.exec.vector.NullableVarCharVector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.allocator.VectorAllocator;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.InputSplit;

import com.google.common.collect.Lists;

/**
* Note: Native hive text record reader is not complete in implementation. For now use
* {@link org.apache.drill.exec.store.hive.HiveRecordReader}.
*/
public class HiveTextRecordReader extends HiveRecordReader {

  public final byte delimiter;
  public final List<Integer> columnIds;
  private final int numCols;

  public HiveTextRecordReader(Table table, Partition partition, InputSplit inputSplit, List<SchemaPath> projectedColumns, FragmentContext context) throws ExecutionSetupException {
    super(table, partition, inputSplit, projectedColumns, context, null);
    String d = table.getSd().getSerdeInfo().getParameters().get("field.delim");
    if (d != null) {
      delimiter = d.getBytes()[0];
    } else {
      delimiter = (byte) 1;
    }
    assert delimiter > 0;
    List<Integer> ids = Lists.newArrayList();
    for (int i = 0; i < tableColumns.size(); i++) {
      if (selectedColumnNames.contains(tableColumns.get(i))) {
        ids.add(i);
      }
    }
    columnIds = ids;
    numCols = tableColumns.size();
  }

  public boolean setValue(PrimitiveObjectInspector.PrimitiveCategory pCat, ValueVector vv, int index, byte[] bytes, int start) {
    switch(pCat) {
      case BINARY:
        throw new UnsupportedOperationException();
      case BOOLEAN:
        throw new UnsupportedOperationException();
      case BYTE:
        throw new UnsupportedOperationException();
      case DECIMAL:
        throw new UnsupportedOperationException();
      case DOUBLE:
        throw new UnsupportedOperationException();
      case FLOAT:
        throw new UnsupportedOperationException();
      case INT: {
        int value = 0;
        byte b;
        for (int i = start; (b = bytes[i]) != delimiter; i++) {
          value = (value * 10) + b - 48;
        }
        return ((NullableIntVector) vv).getMutator().setSafe(index, value);
      }
      case LONG: {
        long value = 0;
        byte b;
        for (int i = start; (b = bytes[i]) != delimiter; i++) {
          value = (value * 10) + b - 48;
        }
        return ((NullableBigIntVector) vv).getMutator().setSafe(index, value);
      }
      case SHORT:
        throw new UnsupportedOperationException();
      case STRING: {
        int end = start;
        for (int i = start; i < bytes.length; i++) {
          if (bytes[i] == delimiter) {
            end = i;
            break;
          }
          end = bytes.length;
        }
        return ((NullableVarCharVector) vv).getMutator().setSafe(index, bytes, start, end - start);
      }
      case TIMESTAMP:
        throw new UnsupportedOperationException();

      default:
        throw new UnsupportedOperationException("Could not determine type");
    }
  }


  @Override
  public int next() {
    for (ValueVector vv : vectors) {
      VectorAllocator.getAllocator(vv, 50).alloc(TARGET_RECORD_COUNT);
    }
    try {
      int recordCount = 0;
      if (redoRecord != null) {
        int length = ((Text) value).getLength();
        byte[] bytes = ((Text) value).getBytes();
        int[] delimPositions = new int[numCols];
        delimPositions[0] = -1;
        int p = 0;
        for (int i = 0; i < length; i++) {
          if (bytes[i] == delimiter) {
            delimPositions[p++] = i;
          }
        }
        for (int id : columnIds) {
          boolean success = false; // setValue(primitiveCategories.get(id), vectors.get(id), recordCount, bytes, delimPositions[id]);
          if (!success) {
            throw new DrillRuntimeException(String.format("Failed to write value for column %s", selectedColumnNames.get(id)));
          }

        }
        redoRecord = null;
      }
      while (recordCount < TARGET_RECORD_COUNT && reader.next(key, value)) {
        int length = ((Text) value).getLength();
        byte[] bytes = ((Text) value).getBytes();
        int[] delimPositions = new int[numCols + 1];
        delimPositions[0] = -1;
        int p = 1;
        for (int i = 0; i < length; i++) {
          if (bytes[i] == delimiter) {
            delimPositions[p++] = i;
          }
        }
        for (int i = 0; i < columnIds.size(); i++) {
          int id = columnIds.get(i);
          boolean success = false; // setValue(primitiveCategories.get(i), vectors.get(i), recordCount, bytes, delimPositions[id] + 1);
          if (!success) {
            redoRecord = value;
            if (partition != null) populatePartitionVectors(recordCount);
            return recordCount;
          }
        }
        recordCount++;
      }
      if (partition != null) populatePartitionVectors(recordCount);
      return recordCount;
    } catch (IOException e) {
      throw new DrillRuntimeException(e);
    }
  }
}
TOP

Related Classes of org.apache.drill.exec.store.hive.HiveTextRecordReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.