Package parquet.bytes

Examples of parquet.bytes.BytesInput


    PageHeader pageHeader = readPageHeader(f);
    int length = before - f.available();
    f = new ByteBufInputStream(parentColumnReader.parentReader.getBufferWithAllData().slice(
        (int) parentColumnReader.readPositionInBuffer + length, pageHeader.getCompressed_page_size()));

    BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
        .decompress(BytesInput.from(f, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size(),
            parentColumnReader.columnChunkMetaData.getCodec());
    currentPage = new Page(
        bytesIn,
        pageHeader.data_page_header.num_values,
View Full Code Here


      this.dataReader = new ColumnDataReader(f, start, totalByteLength);
      if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
        f.seek(columnChunkMetaData.getDictionaryPageOffset());
        PageHeader pageHeader = Util.readPageHeader(f);
        assert pageHeader.type == PageType.DICTIONARY_PAGE;
        BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
            .decompress( //
                dataReader.getPageAsBytesInput(pageHeader.compressed_page_size), //
                pageHeader.getUncompressed_page_size(), //
                parentColumnReader.columnChunkMetaData.getCodec());
        DictionaryPage page = new DictionaryPage(
View Full Code Here

    // TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one
    // I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary
    do {
      pageHeader = dataReader.readPageHeader();
      if (pageHeader.getType() == PageType.DICTIONARY_PAGE) {
        BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
            .decompress( //
                dataReader.getPageAsBytesInput(pageHeader.compressed_page_size), //
                pageHeader.getUncompressed_page_size(), //
                parentColumnReader.columnChunkMetaData.getCodec());
        DictionaryPage page = new DictionaryPage(
            bytesIn,
            pageHeader.uncompressed_page_size,
            pageHeader.dictionary_page_header.num_values,
            parquet.column.Encoding.valueOf(pageHeader.dictionary_page_header.encoding.name())
        );
        this.dictionary = page.getEncoding().initDictionary(parentColumnReader.columnDescriptor, page);
      }
    } while (pageHeader.getType() == PageType.DICTIONARY_PAGE);

    BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
        .decompress( //
            dataReader.getPageAsBytesInput(pageHeader.compressed_page_size), //
            pageHeader.getUncompressed_page_size(), //
            parentColumnReader.columnChunkMetaData.getCodec());
    currentPage = new Page(
View Full Code Here

  // This assumes that the full state must be serialized, since there is no close method
  @Override
  public BytesInput getBytes() {
    serializeCurrentValue();
    BytesInput buf = bitWriter.finish();
    if (Log.DEBUG) LOG.debug("writing a buffer of size " + buf.size() + " + 4 bytes");
    // We serialize the length so that on deserialization we can
    // deserialize as we go, instead of having to load everything
    // into memory
    return concat(BytesInput.fromInt((int)buf.size()), buf);
  }
View Full Code Here

  @Override
  public BytesInput getBytes() {
    try {
      // prepend the length of the column
      BytesInput rle = encoder.toBytes();
      BytesUtils.writeIntLittleEndian(length, Ints.checkedCast(rle.size()));
      return BytesInput.concat(BytesInput.from(length.toByteArray()), rle);
    } catch (IOException e) {
      throw new ParquetEncodingException(e);
    }
  }
View Full Code Here

        while (iterator.hasNext()) {
          encoder.writeInt(iterator.next());
        }
        // encodes the bit width
        byte[] bytesHeader = new byte[] { (byte) bitWidth };
        BytesInput rleEncodedBytes = encoder.toBytes();
        if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size());
        BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
        if (firstPage && ((bytes.size() + dictionaryByteSize) > rawDataByteSize)) {
          fallBackToPlainEncoding();
        } else {
          // remember size of dictionary when we last wrote a page
          lastUsedDictionarySize = getDictionarySize();
          lastUsedDictionaryByteSize = dictionaryByteSize;
View Full Code Here

    }

    @Override
    public void writePage(BytesInput bytes, int valueCount, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException {
      long uncompressedSize = bytes.size();
      BytesInput compressedBytes = compressor.compress(bytes);
      long compressedSize = compressedBytes.size();
      parquetMetadataConverter.writeDataPageHeader(
          (int)uncompressedSize,
          (int)compressedSize,
          valueCount,
          rlEncoding,
          dlEncoding,
          valuesEncoding,
          buf);
      this.uncompressedLength += uncompressedSize;
      this.compressedLength += compressedSize;
      this.totalValueCount += valueCount;
      this.pageCount += 1;
      compressedBytes.writeAllTo(buf);
      encodings.add(rlEncoding);
      encodings.add(dlEncoding);
      encodings.add(valuesEncoding);
    }
View Full Code Here

    @Override
    public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException {
      if (this.dictionaryPage != null) {
        throw new ParquetEncodingException("Only one dictionary page is allowed");
      }
      BytesInput dictionaryBytes = dictionaryPage.getBytes();
      int uncompressedSize = (int)dictionaryBytes.size();
      BytesInput compressedBytes = compressor.compress(dictionaryBytes);
      this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding());
    }
View Full Code Here

        decompressor = null;
      }
    }

    public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException {
      final BytesInput decompressed;
      if (codec != null) {
        decompressor.reset();
        InputStream is = codec.createInputStream(new ByteArrayInputStream(bytes.toByteArray()), decompressor);
        decompressed = BytesInput.from(is, uncompressedSize);
      } else {
View Full Code Here

        this.compressedOutBuffer = null;
      }
    }

    public BytesInput compress(BytesInput bytes) throws IOException {
      final BytesInput compressedBytes;
      if (codec == null) {
        compressedBytes = bytes;
      } else {
        compressedOutBuffer.reset();
        if (compressor != null) {
View Full Code Here

TOP

Related Classes of parquet.bytes.BytesInput

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.