Examples of parquet.bytes.BytesInput

parquet.bytes.BytesInput
A source of bytes capable of writing itself to an output. A BytesInput should be consumed right away. It is not a container. For example if it is referring to a stream, subsequent BytesInput reads from the stream will be incorrect if the previous has not been consumed. @author Julien Le Dem

    PageHeader pageHeader = readPageHeader(f);
    int length = before - f.available();
    f = new ByteBufInputStream(parentColumnReader.parentReader.getBufferWithAllData().slice(
        (int) parentColumnReader.readPositionInBuffer + length, pageHeader.getCompressed_page_size()));


    BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
        .decompress(BytesInput.from(f, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size(),
            parentColumnReader.columnChunkMetaData.getCodec());
    currentPage = new Page(
        bytesIn,
        pageHeader.data_page_header.num_values,

View Full Code Here

      this.dataReader = new ColumnDataReader(f, start, totalByteLength);
      if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
        f.seek(columnChunkMetaData.getDictionaryPageOffset());
        PageHeader pageHeader = Util.readPageHeader(f);
        assert pageHeader.type == PageType.DICTIONARY_PAGE;
        BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
            .decompress( //
                dataReader.getPageAsBytesInput(pageHeader.compressed_page_size), //
                pageHeader.getUncompressed_page_size(), //
                parentColumnReader.columnChunkMetaData.getCodec());
        DictionaryPage page = new DictionaryPage(

View Full Code Here

    // TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one
    // I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary
    do {
      pageHeader = dataReader.readPageHeader();
      if (pageHeader.getType() == PageType.DICTIONARY_PAGE) {
        BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
            .decompress( //
                dataReader.getPageAsBytesInput(pageHeader.compressed_page_size), //
                pageHeader.getUncompressed_page_size(), //
                parentColumnReader.columnChunkMetaData.getCodec());
        DictionaryPage page = new DictionaryPage(
            bytesIn,
            pageHeader.uncompressed_page_size,
            pageHeader.dictionary_page_header.num_values,
            parquet.column.Encoding.valueOf(pageHeader.dictionary_page_header.encoding.name())
        );
        this.dictionary = page.getEncoding().initDictionary(parentColumnReader.columnDescriptor, page);
      }
    } while (pageHeader.getType() == PageType.DICTIONARY_PAGE);


    BytesInput bytesIn = parentColumnReader.parentReader.getCodecFactoryExposer()
        .decompress( //
            dataReader.getPageAsBytesInput(pageHeader.compressed_page_size), // 
            pageHeader.getUncompressed_page_size(), //
            parentColumnReader.columnChunkMetaData.getCodec());
    currentPage = new Page(

View Full Code Here


  // This assumes that the full state must be serialized, since there is no close method
  @Override
  public BytesInput getBytes() {
    serializeCurrentValue();
    BytesInput buf = bitWriter.finish();
    if (Log.DEBUG) LOG.debug("writing a buffer of size " + buf.size() + " + 4 bytes");
    // We serialize the length so that on deserialization we can
    // deserialize as we go, instead of having to load everything
    // into memory
    return concat(BytesInput.fromInt((int)buf.size()), buf);
  }

View Full Code Here


  @Override
  public BytesInput getBytes() {
    try {
      // prepend the length of the column
      BytesInput rle = encoder.toBytes();
      BytesUtils.writeIntLittleEndian(length, Ints.checkedCast(rle.size()));
      return BytesInput.concat(BytesInput.from(length.toByteArray()), rle);
    } catch (IOException e) {
      throw new ParquetEncodingException(e);
    }
  }

View Full Code Here

        while (iterator.hasNext()) {
          encoder.writeInt(iterator.next());
        }
        // encodes the bit width
        byte[] bytesHeader = new byte[] { (byte) bitWidth };
        BytesInput rleEncodedBytes = encoder.toBytes();
        if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size());
        BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
        if (firstPage && ((bytes.size() + dictionaryByteSize) > rawDataByteSize)) {
          fallBackToPlainEncoding();
        } else {
          // remember size of dictionary when we last wrote a page
          lastUsedDictionarySize = getDictionarySize();
          lastUsedDictionaryByteSize = dictionaryByteSize;

View Full Code Here

    }


    @Override
    public void writePage(BytesInput bytes, int valueCount, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException {
      long uncompressedSize = bytes.size();
      BytesInput compressedBytes = compressor.compress(bytes);
      long compressedSize = compressedBytes.size();
      parquetMetadataConverter.writeDataPageHeader(
          (int)uncompressedSize,
          (int)compressedSize,
          valueCount,
          rlEncoding,
          dlEncoding,
          valuesEncoding,
          buf);
      this.uncompressedLength += uncompressedSize;
      this.compressedLength += compressedSize;
      this.totalValueCount += valueCount;
      this.pageCount += 1;
      compressedBytes.writeAllTo(buf);
      encodings.add(rlEncoding);
      encodings.add(dlEncoding);
      encodings.add(valuesEncoding);
    }

View Full Code Here

    @Override
    public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException {
      if (this.dictionaryPage != null) {
        throw new ParquetEncodingException("Only one dictionary page is allowed");
      }
      BytesInput dictionaryBytes = dictionaryPage.getBytes();
      int uncompressedSize = (int)dictionaryBytes.size();
      BytesInput compressedBytes = compressor.compress(dictionaryBytes);
      this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding());
    }

View Full Code Here

        decompressor = null;
      }
    }


    public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException {
      final BytesInput decompressed;
      if (codec != null) {
        decompressor.reset();
        InputStream is = codec.createInputStream(new ByteArrayInputStream(bytes.toByteArray()), decompressor);
        decompressed = BytesInput.from(is, uncompressedSize);
      } else {

View Full Code Here

        this.compressedOutBuffer = null;
      }
    }


    public BytesInput compress(BytesInput bytes) throws IOException {
      final BytesInput compressedBytes;
      if (codec == null) {
        compressedBytes = bytes;
      } else {
        compressedOutBuffer.reset();
        if (compressor != null) {

View Full Code Here

0 1

TOP

Related Classes of parquet.bytes.BytesInput

org.apache.drill.exec.store.parquet.columnreaders.PageReader

org.apache.drill.exec.store.parquet.PageReadStatus

parquet.column.values.boundedint.BoundedIntValuesWriter

parquet.column.values.dictionary.DictionaryValuesWriter

parquet.column.values.rle.RunLengthBitPackingHybridValuesWriter

parquet.hadoop.CodecFactory$BytesCompressor

parquet.hadoop.CodecFactory$BytesDecompressor

parquet.hadoop.CodecFactoryExposer

parquet.hadoop.ColumnChunkPageWriteStore$ColumnChunkPageWriter

parquet.hadoop.ParquetFileReader$Chunk

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.