Package parquet.format

Examples of parquet.format.SchemaElement


    // loop to add up the length of the fixed width columns and build the schema
    for (int i = 0; i < columns.size(); ++i) {
      column = columns.get(i);
      logger.debug("name: " + fileMetaData.getSchema().get(i).name);
      SchemaElement se = schemaElements.get(column.getPath()[0]);
      MajorType mt = ParquetToDrillTypeConverter.toMajorType(column.getType(), se.getType_length(), getDataMode(column), se);
      field = MaterializedField.create(toFieldName(column.getPath()),mt);
      if ( ! fieldSelected(field)){
        continue;
      }
      columnsToScan++;
      // sum the lengths of all of the fixed length fields
      if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
        if (column.getMaxRepetitionLevel() > 0) {
          allFieldsFixedLength = false;
        }
        // There is not support for the fixed binary type yet in parquet, leaving a task here as a reminder
        // TODO - implement this when the feature is added upstream
          if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY){
              bitWidthAllFixedFields += se.getType_length() * 8;
          } else {
            bitWidthAllFixedFields += getTypeLengthInBits(column.getType());
          }
      } else {
        allFieldsFixedLength = false;
      }
    }
    rowGroupOffset = footer.getBlocks().get(rowGroupIndex).getColumns().get(0).getFirstDataPageOffset();

    // none of the columns in the parquet file matched the request columns from the query
    if (columnsToScan == 0){
      throw new ExecutionSetupException("Error reading from parquet file. No columns requested were found in the file.");
    }
    if (allFieldsFixedLength) {
      recordsPerBatch = (int) Math.min(Math.min(batchSize / bitWidthAllFixedFields,
          footer.getBlocks().get(0).getColumns().get(0).getValueCount()), 65535);
    }
    else {
      recordsPerBatch = DEFAULT_RECORDS_TO_READ_IF_NOT_FIXED_WIDTH;
    }

    try {
      ValueVector v;
      ConvertedType convertedType;
      SchemaElement schemaElement;
      ArrayList<VarLengthColumn> varLengthColumns = new ArrayList<>();
      // initialize all of the column read status objects
      boolean fieldFixedLength = false;
      for (int i = 0; i < columns.size(); ++i) {
        column = columns.get(i);
        columnChunkMetaData = footer.getBlocks().get(rowGroupIndex).getColumns().get(i);
        schemaElement = schemaElements.get(column.getPath()[0]);
        convertedType = schemaElement.getConverted_type();
        MajorType type = ParquetToDrillTypeConverter.toMajorType(column.getType(), schemaElement.getType_length(), getDataMode(column), schemaElement);
        field = MaterializedField.create(toFieldName(column.getPath()), type);
        // the field was not requested to be read
        if ( ! fieldSelected(field)) continue;

        fieldFixedLength = column.getType() != PrimitiveType.PrimitiveTypeName.BINARY;
View Full Code Here


    // loop to add up the length of the fixed width columns and build the schema
    for (int i = 0; i < columns.size(); ++i) {
      column = columns.get(i);
      logger.debug("name: " + fileMetaData.getSchema().get(i).name);
      SchemaElement se = schemaElements.get(column.getPath()[0]);
      MajorType mt = ParquetToDrillTypeConverter.toMajorType(column.getType(), se.getType_length(), getDataMode(column), se);
      field = MaterializedField.create(toFieldName(column.getPath()),mt);
      if ( ! fieldSelected(field)) {
        continue;
      }
      columnsToScan++;
      // sum the lengths of all of the fixed length fields
      if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
        if (column.getMaxRepetitionLevel() > 0) {
          allFieldsFixedLength = false;
        }
        if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
            bitWidthAllFixedFields += se.getType_length() * 8;
        } else {
          bitWidthAllFixedFields += getTypeLengthInBits(column.getType());
        }
      } else {
        allFieldsFixedLength = false;
      }
    }
//    rowGroupOffset = footer.getBlocks().get(rowGroupIndex).getColumns().get(0).getFirstDataPageOffset();

    if (columnsToScan != && allFieldsFixedLength) {
      recordsPerBatch = (int) Math.min(Math.min(batchSize / bitWidthAllFixedFields,
          footer.getBlocks().get(0).getColumns().get(0).getValueCount()), 65535);
    }
    else {
      recordsPerBatch = DEFAULT_RECORDS_TO_READ_IF_NOT_FIXED_WIDTH;
    }

    try {
      ValueVector v;
      SchemaElement schemaElement;
      ArrayList<VarLengthColumn> varLengthColumns = new ArrayList<>();
      // initialize all of the column read status objects
      boolean fieldFixedLength;
      for (int i = 0; i < columns.size(); ++i) {
        column = columns.get(i);
        columnChunkMetaData = footer.getBlocks().get(rowGroupIndex).getColumns().get(i);
        schemaElement = schemaElements.get(column.getPath()[0]);
        MajorType type = ParquetToDrillTypeConverter.toMajorType(column.getType(), schemaElement.getType_length(), getDataMode(column), schemaElement);
        field = MaterializedField.create(toFieldName(column.getPath()), type);
        // the field was not requested to be read
        if ( ! fieldSelected(field)) {
          continue;
        }
View Full Code Here

  private void addToList(final List<SchemaElement> result, parquet.schema.Type field) {
    field.accept(new TypeVisitor() {
      @Override
      public void visit(PrimitiveType primitiveType) {
        SchemaElement element = new SchemaElement(primitiveType.getName());
        element.setRepetition_type(toParquetRepetition(primitiveType.getRepetition()));
        element.setType(getType(primitiveType.getPrimitiveTypeName()));
        if (primitiveType.getOriginalType() != null) {
          element.setConverted_type(getConvertedType(primitiveType.getOriginalType()));
        }
        if (primitiveType.getTypeLength() > 0) {
          element.setType_length(primitiveType.getTypeLength());
        }
        result.add(element);
      }

      @Override
      public void visit(MessageType messageType) {
        SchemaElement element = new SchemaElement(messageType.getName());
        visitChildren(result, messageType.asGroupType(), element);
      }

      @Override
      public void visit(GroupType groupType) {
        SchemaElement element = new SchemaElement(groupType.getName());
        element.setRepetition_type(toParquetRepetition(groupType.getRepetition()));
        if (groupType.getOriginalType() != null) {
          element.setConverted_type(getConvertedType(groupType.getOriginalType()));
        }
        visitChildren(result, groupType, element);
      }

      private void visitChildren(final List<SchemaElement> result,
          GroupType groupType, SchemaElement element) {
        element.setNum_children(groupType.getFieldCount());
        result.add(element);
        for (parquet.schema.Type field : groupType.getFields()) {
          addToList(result, field);
        }
      }
View Full Code Here

    return ColumnPath.get(path);
  }

  MessageType fromParquetSchema(List<SchemaElement> schema) {
    Iterator<SchemaElement> iterator = schema.iterator();
    SchemaElement root = iterator.next();
    return new MessageType(root.getName(), convertChildren(iterator, root.getNum_children()));
  }
View Full Code Here

  }

  private parquet.schema.Type[] convertChildren(Iterator<SchemaElement> schema, int childrenCount) {
    parquet.schema.Type[] result = new parquet.schema.Type[childrenCount];
    for (int i = 0; i < result.length; i++) {
      SchemaElement schemaElement = schema.next();
      if ((!schemaElement.isSetType() && !schemaElement.isSetNum_children())
          || (schemaElement.isSetType() && schemaElement.isSetNum_children())) {
        throw new RuntimeException("bad element " + schemaElement);
      }
      Repetition repetition = fromParquetRepetition(schemaElement.getRepetition_type());
      String name = schemaElement.getName();
      OriginalType originalType = null;
      if (schemaElement.isSetConverted_type()) {
        originalType = getOriginalType(schemaElement.converted_type);
      }

      // Create Parquet Type.
      if (schemaElement.type != null) {
        if (schemaElement.isSetType_length()) {
          result[i] = new PrimitiveType(
              repetition,
              getPrimitive(schemaElement.getType()),
              schemaElement.type_length,
              name,
              originalType);
        } else {
          result[i] = new PrimitiveType(
              repetition,
              getPrimitive(schemaElement.getType()),
              name,
              originalType);
        }
      } else {
        result[i] = new GroupType(
            repetition,
            name,
            originalType,
            convertChildren(schema, schemaElement.getNum_children()));
      }
    }
    return result;
  }
View Full Code Here

    // loop to add up the length of the fixed width columns and build the schema
    for (int i = 0; i < columns.size(); ++i) {
      column = columns.get(i);
      logger.debug("name: " + fileMetaData.getSchema().get(i).name);
      SchemaElement se = schemaElements.get(column.getPath()[0]);
      MajorType mt = ParquetToDrillTypeConverter.toMajorType(column.getType(), se.getType_length(), getDataMode(column), se);
      field = MaterializedField.create(toFieldName(column.getPath()),mt);
      if ( ! fieldSelected(field)){
        continue;
      }
      columnsToScan++;
      // sum the lengths of all of the fixed length fields
      if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
        if (column.getMaxRepetitionLevel() > 0) {
          allFieldsFixedLength = false;
        }
        if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY){
            bitWidthAllFixedFields += se.getType_length() * 8;
        } else {
          bitWidthAllFixedFields += getTypeLengthInBits(column.getType());
        }
      } else {
        allFieldsFixedLength = false;
      }
    }
//    rowGroupOffset = footer.getBlocks().get(rowGroupIndex).getColumns().get(0).getFirstDataPageOffset();

    if (columnsToScan != && allFieldsFixedLength) {
      recordsPerBatch = (int) Math.min(Math.min(batchSize / bitWidthAllFixedFields,
          footer.getBlocks().get(0).getColumns().get(0).getValueCount()), 65535);
    }
    else {
      recordsPerBatch = DEFAULT_RECORDS_TO_READ_IF_NOT_FIXED_WIDTH;
    }

    try {
      ValueVector v;
      SchemaElement schemaElement;
      ArrayList<VarLengthColumn> varLengthColumns = new ArrayList<>();
      // initialize all of the column read status objects
      boolean fieldFixedLength;
      for (int i = 0; i < columns.size(); ++i) {
        column = columns.get(i);
        columnChunkMetaData = footer.getBlocks().get(rowGroupIndex).getColumns().get(i);
        schemaElement = schemaElements.get(column.getPath()[0]);
        MajorType type = ParquetToDrillTypeConverter.toMajorType(column.getType(), schemaElement.getType_length(), getDataMode(column), schemaElement);
        field = MaterializedField.create(toFieldName(column.getPath()), type);
        // the field was not requested to be read
        if ( ! fieldSelected(field)) continue;

        fieldFixedLength = column.getType() != PrimitiveType.PrimitiveTypeName.BINARY;
View Full Code Here

TOP

Related Classes of parquet.format.SchemaElement

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.