Examples of parquet.hadoop.metadata.ColumnChunkMetaData

parquet.hadoop.metadata.ColumnChunkMetaData
Column meta data for a block stored in the file footer and passed in the InputSplit @author Julien Le Dem

    totalRecords = footer.getBlocks().get(rowGroupIndex).getRowCount();


    List<ColumnDescriptor> columns = footer.getFileMetaData().getSchema().getColumns();
    allFieldsFixedLength = true;
    ColumnDescriptor column;
    ColumnChunkMetaData columnChunkMetaData;


    // loop to add up the length of the fixed width columns and build the schema
    for (int i = 0; i < columns.size(); ++i) {
      column = columns.get(i);
      // sum the lengths of all of the fixed length fields

View Full Code Here

  private void readFooter() throws IOException {
    watch.reset();
    watch.start();
    rowGroupInfos = new ArrayList();
    long start = 0, length = 0;
    ColumnChunkMetaData columnChunkMetaData;
    for (ReadEntryWithPath readEntryWithPath : entries){
      Path path = new Path(readEntryWithPath.getPath());
      List<Footer> footers = ParquetFileReader.readFooters(this.storageEngine.getHadoopConfig(), path);
      readEntryWithPath.getPath();


      for (Footer footer : footers) {
        int index = 0;
        ParquetMetadata metadata = footer.getParquetMetadata();
        for (BlockMetaData rowGroup : metadata.getBlocks()){
          // need to grab block information from HDFS
          columnChunkMetaData = rowGroup.getColumns().iterator().next();
          start = columnChunkMetaData.getFirstDataPageOffset();
          // this field is not being populated correctly, but the column chunks know their sizes, just summing them for now
          //end = start + rowGroup.getTotalByteSize();
          length = 0;
          for (ColumnChunkMetaData col : rowGroup.getColumns()){
            length += col.getTotalSize();

View Full Code Here

              codecFactoryExposer.getCodecFactory(), fs, filePath);


      for (String[] path : schema.getPaths()) {
        Type type = schema.getType(path);
        if (type.isPrimitive()) {
          ColumnChunkMetaData md = paths.get(ColumnPath.get(path));
          pageReadStore.addColumn(schema.getColumnDescription(path), md);
        }
      }


      writer = new VectorContainerWriter(output);

View Full Code Here

    columnStatuses = new ArrayList<>();
    totalRecords = footer.getBlocks().get(rowGroupIndex).getRowCount();
    List<ColumnDescriptor> columns = footer.getFileMetaData().getSchema().getColumns();
    allFieldsFixedLength = true;
    ColumnDescriptor column;
    ColumnChunkMetaData columnChunkMetaData;
    int columnsToScan = 0;


    MaterializedField field;
    ParquetMetadataConverter metaConverter = new ParquetMetadataConverter();
    FileMetaData fileMetaData;

View Full Code Here

    rowGroupInfos = Lists.newArrayList();
    long start = 0, length = 0;
    rowCount = 0;
    columnValueCounts = new HashMap<SchemaPath, Long>();


    ColumnChunkMetaData columnChunkMetaData;
    for (FileStatus status : statuses) {
      List<Footer> footers = ParquetFileReader.readFooters(formatPlugin.getHadoopConfig(), status);
      if (footers.size() == 0) {
        throw new IOException(String.format("Unable to find footer for file %s", status.getPath().getName()));
      }


      for (Footer footer : footers) {
        int index = 0;
        ParquetMetadata metadata = footer.getParquetMetadata();
        for (BlockMetaData rowGroup : metadata.getBlocks()) {
          long valueCountInGrp = 0;
          // need to grab block information from HDFS
          columnChunkMetaData = rowGroup.getColumns().iterator().next();
          start = columnChunkMetaData.getFirstDataPageOffset();
          // this field is not being populated correctly, but the column chunks know their sizes, just summing them for
          // now
          // end = start + rowGroup.getTotalByteSize();
          length = 0;
          for (ColumnChunkMetaData col : rowGroup.getColumns()) {

View Full Code Here

    int encodingsSize = in.readInt();
    Set<Encoding> encodings = new HashSet<Encoding>(encodingsSize);
    for (int i = 0; i < encodingsSize; i++) {
      encodings.add(Encoding.values()[in.readInt()]);
    }
    ColumnChunkMetaData column = ColumnChunkMetaData.get(
        ColumnPath.get(columnPath), type, codec, encodings,
        in.readLong(), in.readLong(), in.readLong(), in.readLong(), in.readLong());
    return column;
  }

View Full Code Here

              codecFactoryExposer.getCodecFactory(), operatorContext.getAllocator(), fs, filePath);


      for (String[] path : schema.getPaths()) {
        Type type = schema.getType(path);
        if (type.isPrimitive()) {
          ColumnChunkMetaData md = paths.get(ColumnPath.get(path));
          pageReadStore.addColumn(schema.getColumnDescription(path), md);
        }
      }


      writer = new VectorContainerWriter(output);

View Full Code Here

    rowGroupInfos = Lists.newArrayList();
    long start = 0, length = 0;
    rowCount = 0;
    columnValueCounts = new HashMap<SchemaPath, Long>();


    ColumnChunkMetaData columnChunkMetaData;
    for (FileStatus status : statuses) {
      List<Footer> footers = ParquetFileReader.readFooters(formatPlugin.getHadoopConfig(), status);
      if (footers.size() == 0) {
        throw new IOException(String.format("Unable to find footer for file %s", status.getPath().getName()));
      }


      for (Footer footer : footers) {
        int index = 0;
        ParquetMetadata metadata = footer.getParquetMetadata();
        for (BlockMetaData rowGroup : metadata.getBlocks()) {
          long valueCountInGrp = 0;
          // need to grab block information from HDFS
          columnChunkMetaData = rowGroup.getColumns().iterator().next();
          start = columnChunkMetaData.getFirstDataPageOffset();
          // this field is not being populated correctly, but the column chunks know their sizes, just summing them for
          // now
          // end = start + rowGroup.getTotalByteSize();
          length = 0;
          for (ColumnChunkMetaData col : rowGroup.getColumns()) {

View Full Code Here

    columnStatuses = new ArrayList<>();
//    totalRecords = footer.getBlocks().get(rowGroupIndex).getRowCount();
    List<ColumnDescriptor> columns = footer.getFileMetaData().getSchema().getColumns();
    allFieldsFixedLength = true;
    ColumnDescriptor column;
    ColumnChunkMetaData columnChunkMetaData;
    int columnsToScan = 0;
    mockRecordsRead = 0;


    MaterializedField field;
//    ParquetMetadataConverter metaConverter = new ParquetMetadataConverter();

View Full Code Here

            || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) {
          throw new ParquetDecodingException("all column chunks of the same row group must be in the same file for now");
        }
        parquet.format.ColumnMetaData metaData = columnChunk.meta_data;
        ColumnPath path = getPath(metaData);
        ColumnChunkMetaData column = ColumnChunkMetaData.get(
            path,
            messageType.getType(path.toArray()).asPrimitiveType().getPrimitiveTypeName(),
            CompressionCodecName.fromParquet(metaData.codec),
            fromFormatEncodings(metaData.encodings),
            metaData.data_page_offset,

View Full Code Here

0 1

TOP

Related Classes of parquet.hadoop.metadata.ColumnChunkMetaData

org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader

org.apache.drill.exec.store.parquet.ParquetGroupScan

org.apache.drill.exec.store.parquet.ParquetRecordReader

org.apache.drill.exec.store.parquet2.DrillParquetReader

parquet.format.converter.ParquetMetadataConverter

parquet.hadoop.ParquetInputSplit

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.