Package org.apache.hadoop.hive.ql.io.parquet.write

Source Code of org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter

/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.parquet.write;

import org.apache.hadoop.hive.ql.io.parquet.writable.BigDecimalWritable;
import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;

import parquet.io.ParquetEncodingException;
import parquet.io.api.RecordConsumer;
import parquet.schema.GroupType;
import parquet.schema.Type;

/**
*
* DataWritableWriter is a writer,
* that will read an ArrayWritable and give the data to parquet
* with the expected schema
*
*/
public class DataWritableWriter {

  private final RecordConsumer recordConsumer;
  private final GroupType schema;

  public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType schema) {
    this.recordConsumer = recordConsumer;
    this.schema = schema;
  }

  public void write(final ArrayWritable arr) {
    if (arr == null) {
      return;
    }
    recordConsumer.startMessage();
    writeData(arr, schema);
    recordConsumer.endMessage();
  }

  private void writeData(final ArrayWritable arr, final GroupType type) {
    if (arr == null) {
      return;
    }
    final int fieldCount = type.getFieldCount();
    Writable[] values = arr.get();
    for (int field = 0; field < fieldCount; ++field) {
      final Type fieldType = type.getType(field);
      final String fieldName = fieldType.getName();
      final Writable value = values[field];
      if (value == null) {
        continue;
      }
      recordConsumer.startField(fieldName, field);

      if (fieldType.isPrimitive()) {
        writePrimitive(value);
      } else {
        recordConsumer.startGroup();
        if (value instanceof ArrayWritable) {
          if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) {
            writeArray((ArrayWritable) value, fieldType.asGroupType());
          } else {
            writeData((ArrayWritable) value, fieldType.asGroupType());
          }
        } else if (value != null) {
          throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value);
        }

        recordConsumer.endGroup();
      }

      recordConsumer.endField(fieldName, field);
    }
  }

  private void writeArray(final ArrayWritable array, final GroupType type) {
    if (array == null) {
      return;
    }
    final Writable[] subValues = array.get();
    final int fieldCount = type.getFieldCount();
    for (int field = 0; field < fieldCount; ++field) {
      final Type subType = type.getType(field);
      recordConsumer.startField(subType.getName(), field);
      for (int i = 0; i < subValues.length; ++i) {
        final Writable subValue = subValues[i];
        if (subValue != null) {
          if (subType.isPrimitive()) {
            if (subValue instanceof ArrayWritable) {
              writePrimitive(((ArrayWritable) subValue).get()[field]);// 0 ?
            } else {
              writePrimitive(subValue);
            }
          } else {
            if (!(subValue instanceof ArrayWritable)) {
              throw new RuntimeException("This should be a ArrayWritable: " + subValue);
            } else {
              recordConsumer.startGroup();
              writeData((ArrayWritable) subValue, subType.asGroupType());
              recordConsumer.endGroup();
            }
          }
        }
      }
      recordConsumer.endField(subType.getName(), field);
    }
  }

  private void writePrimitive(final Writable value) {
    if (value == null) {
      return;
    }
    if (value instanceof DoubleWritable) {
      recordConsumer.addDouble(((DoubleWritable) value).get());
    } else if (value instanceof BooleanWritable) {
      recordConsumer.addBoolean(((BooleanWritable) value).get());
    } else if (value instanceof FloatWritable) {
      recordConsumer.addFloat(((FloatWritable) value).get());
    } else if (value instanceof IntWritable) {
      recordConsumer.addInteger(((IntWritable) value).get());
    } else if (value instanceof LongWritable) {
      recordConsumer.addLong(((LongWritable) value).get());
    } else if (value instanceof ShortWritable) {
      recordConsumer.addInteger(((ShortWritable) value).get());
    } else if (value instanceof ByteWritable) {
      recordConsumer.addInteger(((ByteWritable) value).get());
    } else if (value instanceof BigDecimalWritable) {
      throw new UnsupportedOperationException("BigDecimal writing not implemented");
    } else if (value instanceof BinaryWritable) {
      recordConsumer.addBinary(((BinaryWritable) value).getBinary());
    } else {
      throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass());
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.