Examples of parquet.hadoop.metadata.BlockMetaData

parquet.hadoop.metadata.BlockMetaData
Block metadata stored in the footer and passed in an InputSplit @author Julien Le Dem


      CodecFactoryExposer codecFactoryExposer = new CodecFactoryExposer(conf);
      FileSystem fs = FileSystem.get(conf);
      Path filePath = new Path(entry.getPath());


      BlockMetaData blockMetaData = footer.getBlocks().get(entry.getRowGroupIndex());


      recordCount = (int) blockMetaData.getRowCount();


      ColumnChunkIncReadStore pageReadStore = new ColumnChunkIncReadStore(recordCount,
              codecFactoryExposer.getCodecFactory(), fs, filePath);


      for (String[] path : schema.getPaths()) {

View Full Code Here

    writeKeyValues(out, extraMetadata);
    writeKeyValues(out, readSupportMetadata);
  }


  private BlockMetaData readBlock(DataInput in) throws IOException {
    final BlockMetaData block = new BlockMetaData();
    int size = in.readInt();
    for (int i = 0; i < size; i++) {
      block.addColumn(readColumn(in));
    }
    block.setRowCount(in.readLong());
    block.setTotalByteSize(in.readLong());
    if (!in.readBoolean()) {
      block.setPath(in.readUTF().intern());
    }
    return block;
  }

View Full Code Here

   */
  public PageReadStore readNextRowGroup() throws IOException {
    if (currentBlock == blocks.size()) {
      return null;
    }
    BlockMetaData block = blocks.get(currentBlock);
    if (block.getRowCount() == 0) {
      throw new RuntimeException("Illegal row group of 0 rows");
    }
    ColumnChunkPageReadStore columnChunkPageReadStore = new ColumnChunkPageReadStore(block.getRowCount());
    // prepare the list of consecutive chunks to read them in one scan
    List<ConsecutiveChunkList> allChunks = new ArrayList<ConsecutiveChunkList>();
    ConsecutiveChunkList currentChunks = null;
    for (ColumnChunkMetaData mc : block.getColumns()) {
      ColumnPath pathKey = mc.getPath();
      BenchmarkCounter.incrementTotalBytes(mc.getTotalSize());
      ColumnDescriptor columnDescriptor = paths.get(pathKey);
      if (columnDescriptor != null) {
        long startingPos = getStartingPos(mc);

View Full Code Here

   */
  public void startBlock(long recordCount) throws IOException {
    state = state.startBlock();
    if (DEBUG) LOG.debug(out.getPos() + ": start block");
//    out.write(MAGIC); // TODO: add a magic delimiter
    currentBlock = new BlockMetaData();
    currentRecordCount = recordCount;
  }

View Full Code Here


      CodecFactoryExposer codecFactoryExposer = new CodecFactoryExposer(conf);
      FileSystem fs = FileSystem.get(conf);
      Path filePath = new Path(entry.getPath());


      BlockMetaData blockMetaData = footer.getBlocks().get(entry.getRowGroupIndex());


      recordCount = (int) blockMetaData.getRowCount();


      pageReadStore = new ColumnChunkIncReadStore(recordCount,
              codecFactoryExposer.getCodecFactory(), operatorContext.getAllocator(), fs, filePath);


      for (String[] path : schema.getPaths()) {

View Full Code Here

  public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws IOException {
    MessageType messageType = fromParquetSchema(parquetMetadata.getSchema());
    List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
    List<RowGroup> row_groups = parquetMetadata.getRow_groups();
    for (RowGroup rowGroup : row_groups) {
      BlockMetaData blockMetaData = new BlockMetaData();
      blockMetaData.setRowCount(rowGroup.getNum_rows());
      blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
      List<ColumnChunk> columns = rowGroup.getColumns();
      String filePath = columns.get(0).getFile_path();
      for (ColumnChunk columnChunk : columns) {
        if ((filePath == null && columnChunk.getFile_path() != null)
            || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) {
          throw new ParquetDecodingException("all column chunks of the same row group must be in the same file for now");
        }
        parquet.format.ColumnMetaData metaData = columnChunk.meta_data;
        ColumnPath path = getPath(metaData);
        ColumnChunkMetaData column = ColumnChunkMetaData.get(
            path,
            messageType.getType(path.toArray()).asPrimitiveType().getPrimitiveTypeName(),
            CompressionCodecName.fromParquet(metaData.codec),
            fromFormatEncodings(metaData.encodings),
            metaData.data_page_offset,
            metaData.dictionary_page_offset,
            metaData.num_values,
            metaData.total_compressed_size,
            metaData.total_uncompressed_size);
        // TODO
        // index_page_offset
        // key_value_metadata
        blockMetaData.addColumn(column);
      }
      blockMetaData.setPath(filePath);
      blocks.add(blockMetaData);
    }
    Map<String, String> keyValueMetaData = new HashMap<String, String>();
    List<KeyValue> key_value_metadata = parquetMetadata.getKey_value_metadata();
    if (key_value_metadata != null) {

View Full Code Here


      CodecFactoryExposer codecFactoryExposer = new CodecFactoryExposer(conf);
      FileSystem fs = FileSystem.get(conf);
      Path filePath = new Path(entry.getPath());


      BlockMetaData blockMetaData = footer.getBlocks().get(entry.getRowGroupIndex());


      recordCount = (int) blockMetaData.getRowCount();


      pageReadStore = new ColumnChunkIncReadStore(recordCount,
              codecFactoryExposer.getCodecFactory(), operatorContext.getAllocator(), fs, filePath);


      for (String[] path : schema.getPaths()) {

View Full Code Here

TOP

Related Classes of parquet.hadoop.metadata.BlockMetaData

org.apache.drill.exec.store.parquet2.DrillParquetReader

parquet.format.converter.ParquetMetadataConverter

parquet.hadoop.ParquetFileReader

parquet.hadoop.ParquetFileWriter

parquet.hadoop.ParquetInputSplit

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.