Package parquet.format

Examples of parquet.format.PageHeader


    // because it is needed, but there might be a problem with it
    ByteBufInputStream f = new ByteBufInputStream(parentColumnReader.parentReader.getBufferWithAllData().slice(
        (int) parentColumnReader.readPositionInBuffer,
        Math.min(200, parentColumnReader.parentReader.getBufferWithAllData().capacity() - (int) parentColumnReader.readPositionInBuffer)));
    int before = f.available();
    PageHeader pageHeader = readPageHeader(f);
    int length = before - f.available();
    f = new ByteBufInputStream(parentColumnReader.parentReader.getBufferWithAllData().slice(
        (int) parentColumnReader.readPositionInBuffer + length, pageHeader.getCompressed_page_size()));

    BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
        .decompress(BytesInput.from(f, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size(),
            parentColumnReader.columnChunkMetaData.getCodec());
    currentPage = new Page(
        bytesIn,
        pageHeader.data_page_header.num_values,
        pageHeader.uncompressed_page_size,
        ParquetStorageEngine.parquetMetadataConverter.getEncoding(pageHeader.data_page_header.repetition_level_encoding),
        ParquetStorageEngine.parquetMetadataConverter.getEncoding(pageHeader.data_page_header.definition_level_encoding),
        ParquetStorageEngine.parquetMetadataConverter.getEncoding(pageHeader.data_page_header.encoding)
    );

    parentColumnReader.readPositionInBuffer += pageHeader.compressed_page_size + length;
    byteLength = pageHeader.uncompressed_page_size;

    if (currentPage == null) {
      return false;
    }

    // if the buffer holding each page's data is not large enough to hold the current page, re-allocate, with a little extra space
    if (pageHeader.getUncompressed_page_size() > pageDataByteArray.length) {
      pageDataByteArray = new byte[pageHeader.getUncompressed_page_size() + 100];
    }
    // TODO - would like to get this into the mainline, hopefully before alpha
    pageDataByteArray = currentPage.getBytes().toByteArray();

    if (parentColumnReader.columnDescriptor.getMaxDefinitionLevel() != 0){
View Full Code Here


    @Override
    public DictionaryPage readDictionaryPage() {
      if (dictionaryPage == null) {
        try {
          long pos = in.getPos();
          PageHeader pageHeader = Util.readPageHeader(in);
          if (pageHeader.getDictionary_page_header() == null) {
            in.seek(pos);
            return null;
          }
          dictionaryPage =
                  new DictionaryPage(
                          decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()),
                          pageHeader.getDictionary_page_header().getNum_values(),
                          parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
                  );
          System.out.println(dictionaryPage);
        } catch (IOException e) {
          throw new RuntimeException(e);
View Full Code Here

    @Override
    public Page readPage() {
      try {
        while(valueReadSoFar < metaData.getValueCount()) {
          PageHeader pageHeader = Util.readPageHeader(in);
          switch (pageHeader.type) {
            case DICTIONARY_PAGE:
              if (dictionaryPage == null) {
                dictionaryPage =
                        new DictionaryPage(
                                decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()),
                                pageHeader.uncompressed_page_size,
                                parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
                        );
              } else {
                in.skip(pageHeader.compressed_page_size);
              }
              break;
            case DATA_PAGE:
              valueReadSoFar += pageHeader.data_page_header.getNum_values();
              return new Page(
                      decompressor.decompress(BytesInput.from(in,pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()),
                      pageHeader.data_page_header.num_values,
                      pageHeader.uncompressed_page_size,
                      parquetMetadataConverter.getEncoding(pageHeader.data_page_header.repetition_level_encoding),
                      parquetMetadataConverter.getEncoding(pageHeader.data_page_header.definition_level_encoding),
                      parquetMetadataConverter.getEncoding(pageHeader.data_page_header.encoding)
View Full Code Here

    try {
      FSDataInputStream f = fs.open(path);
      this.dataReader = new ColumnDataReader(f, start, totalByteLength);
      if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
        f.seek(columnChunkMetaData.getDictionaryPageOffset());
        PageHeader pageHeader = Util.readPageHeader(f);
        assert pageHeader.type == PageType.DICTIONARY_PAGE;
        BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
            .decompress( //
                dataReader.getPageAsBytesInput(pageHeader.compressed_page_size), //
                pageHeader.getUncompressed_page_size(), //
                parentColumnReader.columnChunkMetaData.getCodec());
        DictionaryPage page = new DictionaryPage(
            bytesIn,
            pageHeader.uncompressed_page_size,
            pageHeader.dictionary_page_header.num_values,
View Full Code Here

    public ColumnChunkPageReader readAllPages() throws IOException {
      List<Page> pagesInChunk = new ArrayList<Page>();
      DictionaryPage dictionaryPage = null;
      long valuesCountReadSoFar = 0;
      while (valuesCountReadSoFar < descriptor.metadata.getValueCount()) {
        PageHeader pageHeader = readPageHeader(this);
        switch (pageHeader.type) {
          case DICTIONARY_PAGE:
            // there is only one dictionary page per column chunk
            if (dictionaryPage != null) {
              throw new ParquetDecodingException("more than one dictionary page in column " + descriptor.col);
View Full Code Here

    @Override
    public DictionaryPage readDictionaryPage() {
      if (dictionaryPage == null) {
        try {
          long pos = in.getPos();
          PageHeader pageHeader = Util.readPageHeader(in);
          if (pageHeader.getDictionary_page_header() == null) {
            in.seek(pos);
            return null;
          }
          dictionaryPage =
                  new DictionaryPage(
                          decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()),
                          pageHeader.getDictionary_page_header().getNum_values(),
                          parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
                  );
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
View Full Code Here

        if (lastPage != null) {
          lastPage.release();
          lastPage = null;
        }
        while(valueReadSoFar < metaData.getValueCount()) {
          PageHeader pageHeader = Util.readPageHeader(in);
          switch (pageHeader.type) {
            case DICTIONARY_PAGE:
              if (dictionaryPage == null) {
                dictionaryPage =
                        new DictionaryPage(
                                decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()),
                                pageHeader.uncompressed_page_size,
                                parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
                        );
              } else {
                in.skip(pageHeader.compressed_page_size);
              }
              break;
            case DATA_PAGE:
              valueReadSoFar += pageHeader.data_page_header.getNum_values();
              ByteBuf buf = allocator.buffer(pageHeader.compressed_page_size);
              lastPage = buf;
              ByteBuffer buffer = buf.nioBuffer(0, pageHeader.compressed_page_size);
              CompatibilityUtil.getBuf(in, buffer, pageHeader.compressed_page_size);
              return new Page(
                      decompressor.decompress(BytesInput.from(buffer, 0, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()),
                      pageHeader.data_page_header.num_values,
                      pageHeader.uncompressed_page_size,
                      parquetMetadataConverter.fromParquetStatistics(pageHeader.data_page_header.statistics, columnDescriptor.getType()),
                      parquetMetadataConverter.getEncoding(pageHeader.data_page_header.repetition_level_encoding),
                      parquetMetadataConverter.getEncoding(pageHeader.data_page_header.definition_level_encoding),
View Full Code Here

    try {
      FSDataInputStream f = fs.open(path);
      this.dataReader = new ColumnDataReader(f, start, columnChunkMetaData.getTotalSize());
      if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
        f.seek(columnChunkMetaData.getDictionaryPageOffset());
        PageHeader pageHeader = Util.readPageHeader(f);
        assert pageHeader.type == PageType.DICTIONARY_PAGE;

        BytesInput bytesIn;
        ByteBuf uncompressedData=allocateBuffer(pageHeader.getUncompressed_page_size());
        allocatedDictionaryBuffers.add(uncompressedData);
        if(parentColumnReader.columnChunkMetaData.getCodec()==CompressionCodecName.UNCOMPRESSED) {
          dataReader.getPageAsBytesBuf(uncompressedData, pageHeader.compressed_page_size);
          bytesIn=parentColumnReader.parentReader.getCodecFactoryExposer().getBytesInput(uncompressedData,
            pageHeader.getUncompressed_page_size());
        }else{
          ByteBuf compressedData=allocateBuffer(pageHeader.compressed_page_size);
          dataReader.getPageAsBytesBuf(compressedData, pageHeader.compressed_page_size);
          bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
            .decompress(parentColumnReader.columnChunkMetaData.getCodec(),
              compressedData,
              uncompressedData,
              pageHeader.compressed_page_size,
              pageHeader.getUncompressed_page_size());
          compressedData.release();
        }
        DictionaryPage page = new DictionaryPage(
            bytesIn,
            pageHeader.uncompressed_page_size,
View Full Code Here

      int uncompressedSize, int compressedSize,
      int valueCount,
      parquet.column.Encoding rlEncoding,
      parquet.column.Encoding dlEncoding,
      parquet.column.Encoding valuesEncoding) {
    PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize);
    // TODO: pageHeader.crc = ...;
    pageHeader.data_page_header = new DataPageHeader(
        valueCount,
        getEncoding(valuesEncoding),
        getEncoding(dlEncoding),
View Full Code Here

  }

  public void writeDictionaryPageHeader(
      int uncompressedSize, int compressedSize, int valueCount,
      parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException {
    PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize);
    pageHeader.dictionary_page_header = new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding));
    writePageHeader(pageHeader, to);
  }
View Full Code Here

TOP

Related Classes of parquet.format.PageHeader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.