Examples of PagedBytes


Examples of org.apache.lucene.util.PagedBytes

      Terms terms = reader.terms(key.field);

      final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

      final PagedBytes bytes = new PagedBytes(15);

      int startTermsBPV;

      final int termCountHardLimit;
      if (maxDoc == Integer.MAX_VALUE) {
        termCountHardLimit = Integer.MAX_VALUE;
      } else {
        termCountHardLimit = maxDoc+1;
      }

      // TODO: use Uninvert?
      if (terms != null) {
        // Try for coarse estimate for number of bits; this
        // should be an underestimate most of the time, which
        // is fine -- GrowableWriter will reallocate as needed
        long numUniqueTerms = terms.size();
        if (numUniqueTerms != -1L) {
          if (numUniqueTerms > termCountHardLimit) {
            // app is misusing the API (there is more than
            // one term per doc); in this case we make best
            // effort to load what we can (see LUCENE-2142)
            numUniqueTerms = termCountHardLimit;
          }

          startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
        } else {
          startTermsBPV = 1;
        }
      } else {
        startTermsBPV = 1;
      }

      MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
      final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

      int termOrd = 0;

      // TODO: use Uninvert?

      if (terms != null) {
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;

        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (termOrd >= termCountHardLimit) {
            break;
          }

          termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            // Store 1+ ord into packed bits
            docToTermOrd.set(docID, 1+termOrd);
          }
          termOrd++;
        }
      }

      // maybe an int-only impl?
      return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset, docToTermOrd.getMutable(), termOrd);
    }
View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

      final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

      final int termCountHardLimit = maxDoc;

      // Holds the actual term data, expanded.
      final PagedBytes bytes = new PagedBytes(15);

      int startBPV;

      if (terms != null) {
        // Try for coarse estimate for number of bits; this
        // should be an underestimate most of the time, which
        // is fine -- GrowableWriter will reallocate as needed
        long numUniqueTerms = terms.size();
        if (numUniqueTerms != -1L) {
          if (numUniqueTerms > termCountHardLimit) {
            numUniqueTerms = termCountHardLimit;
          }
          startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
        } else {
          startBPV = 1;
        }
      } else {
        startBPV = 1;
      }

      final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);
     
      // pointer==0 means not set
      bytes.copyUsingLengthPrefix(new BytesRef());

      if (terms != null) {
        int termCount = 0;
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;
        while(true) {
          if (termCount++ == termCountHardLimit) {
            // app is misusing the API (there is more than
            // one term per doc); in this case we make best
            // effort to load what we can (see LUCENE-2142)
            break;
          }

          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          final long pointer = bytes.copyUsingLengthPrefix(term);
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            docToOffset.set(docID, pointer);
          }
        }
      }

      // maybe an int-only impl?
      return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable());
    }
View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

  }
 
  private BinaryDocValues loadBinary(FieldInfo field) throws IOException {
    BinaryEntry entry = binaries.get(field.number);
    data.seek(entry.offset);
    PagedBytes bytes = new PagedBytes(16);
    bytes.copy(data, entry.numBytes);
    final PagedBytes.Reader bytesReader = bytes.freeze(true);
    if (entry.minLength == entry.maxLength) {
      final int fixedLength = entry.minLength;
      return new BinaryDocValues() {
        @Override
        public void get(int docID, BytesRef result) {
View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

      Terms terms = reader.terms(key.field);

      final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

      final PagedBytes bytes = new PagedBytes(15);

      int startBytesBPV;
      int startTermsBPV;
      int startNumUniqueTerms;

      final int termCountHardLimit;
      if (maxDoc == Integer.MAX_VALUE) {
        termCountHardLimit = Integer.MAX_VALUE;
      } else {
        termCountHardLimit = maxDoc+1;
      }

      // TODO: use Uninvert?
      if (terms != null) {
        // Try for coarse estimate for number of bits; this
        // should be an underestimate most of the time, which
        // is fine -- GrowableWriter will reallocate as needed
        long numUniqueTerms = terms.size();
        if (numUniqueTerms != -1L) {
          if (numUniqueTerms > termCountHardLimit) {
            // app is misusing the API (there is more than
            // one term per doc); in this case we make best
            // effort to load what we can (see LUCENE-2142)
            numUniqueTerms = termCountHardLimit;
          }

          startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
          startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);

          startNumUniqueTerms = (int) numUniqueTerms;
        } else {
          startBytesBPV = 1;
          startTermsBPV = 1;
          startNumUniqueTerms = 1;
        }
      } else {
        startBytesBPV = 1;
        startTermsBPV = 1;
        startNumUniqueTerms = 1;
      }

      GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio);
      final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

      int termOrd = 0;

      // TODO: use Uninvert?

      if (terms != null) {
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;

        while(true) {
          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          if (termOrd >= termCountHardLimit) {
            break;
          }

          if (termOrd == termOrdToBytesOffset.size()) {
            // NOTE: this code only runs if the incoming
            // reader impl doesn't implement
            // size (which should be uncommon)
            termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
          }
          termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            // Store 1+ ord into packed bits
            docToTermOrd.set(docID, 1+termOrd);
          }
          termOrd++;
        }

        if (termOrdToBytesOffset.size() > termOrd) {
          termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd);
        }
      }

      // maybe an int-only impl?
      return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
    }
View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

    }

    // If we need our "term index wrapper", these will be
    // init'd below:
    List<BytesRef> indexedTerms = null;
    PagedBytes indexedTermsBytes = null;

    boolean testedOrd = false;

    // we need a minimum of 9 bytes, but round up to 12 since the space would
    // be wasted with most allocators anyway.
    byte[] tempArr = new byte[12];

    //
    // enumerate all terms, and build an intermediate form of the un-inverted field.
    //
    // During this intermediate form, every document has a (potential) byte[]
    // and the int[maxDoc()] array either contains the termNumber list directly
    // or the *end* offset of the termNumber list in it's byte array (for faster
    // appending and faster creation of the final form).
    //
    // idea... if things are too large while building, we could do a range of docs
    // at a time (but it would be a fair amount slower to build)
    // could also do ranges in parallel to take advantage of multiple CPUs

    // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
    // values.  This requires going over the field first to find the most
    // frequent terms ahead of time.

    int termNum = 0;
    docsEnum = null;

    // Loop begins with te positioned to first term (we call
    // seek above):
    for (;;) {
      final BytesRef t = te.term();
      if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
        break;
      }
      //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

      if (!testedOrd) {
        try {
          ordBase = (int) te.ord();
          //System.out.println("got ordBase=" + ordBase);
        } catch (UnsupportedOperationException uoe) {
          // Reader cannot provide ord support, so we wrap
          // our own support by creating our own terms index:
          indexedTerms = new ArrayList<BytesRef>();
          indexedTermsBytes = new PagedBytes(15);
          //System.out.println("NO ORDS");
        }
        testedOrd = true;
      }

      visitTerm(te, termNum);

      if (indexedTerms != null && (termNum & indexIntervalMask) == 0) {
        // Index this term
        sizeOfIndexedStrings += t.length;
        BytesRef indexedTerm = new BytesRef();
        indexedTermsBytes.copy(t, indexedTerm);
        // TODO: really should 1) strip off useless suffix,
        // and 2) use FST not array/PagedBytes
        indexedTerms.add(indexedTerm);
      }

View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

      final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

      final int termCountHardLimit = maxDoc;

      // Holds the actual term data, expanded.
      final PagedBytes bytes = new PagedBytes(15);

      int startBPV;

      if (terms != null) {
        // Try for coarse estimate for number of bits; this
        // should be an underestimate most of the time, which
        // is fine -- GrowableWriter will reallocate as needed
        long numUniqueTerms = terms.size();
        if (numUniqueTerms != -1L) {
          if (numUniqueTerms > termCountHardLimit) {
            numUniqueTerms = termCountHardLimit;
          }
          startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
        } else {
          startBPV = 1;
        }
      } else {
        startBPV = 1;
      }

      final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);
     
      // pointer==0 means not set
      bytes.copyUsingLengthPrefix(new BytesRef());

      if (terms != null) {
        int termCount = 0;
        final TermsEnum termsEnum = terms.iterator(null);
        DocsEnum docs = null;
        while(true) {
          if (termCount++ == termCountHardLimit) {
            // app is misusing the API (there is more than
            // one term per doc); in this case we make best
            // effort to load what we can (see LUCENE-2142)
            break;
          }

          final BytesRef term = termsEnum.next();
          if (term == null) {
            break;
          }
          final long pointer = bytes.copyUsingLengthPrefix(term);
          docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
          while (true) {
            final int docID = docs.nextDoc();
            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
              break;
            }
            docToOffset.set(docID, pointer);
          }
        }
      }

      // maybe an int-only impl?
      return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable());
    }
View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

    try {
      CodecUtil.checkHeader(input, Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_CODEC_NAME,
                                   Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_START,
                                   Lucene40DocValuesFormat.BYTES_FIXED_STRAIGHT_VERSION_CURRENT);
      final int fixedLength = input.readInt();
      PagedBytes bytes = new PagedBytes(16);
      bytes.copy(input, fixedLength * (long)state.segmentInfo.getDocCount());
      final PagedBytes.Reader bytesReader = bytes.freeze(true);
      if (input.getFilePointer() != input.length()) {
        throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
      }
      success = true;
      return new BinaryDocValues() {
View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

      index = dir.openInput(indexName, state.context);
      CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX,
                                   Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START,
                                   Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
      long totalBytes = index.readVLong();
      PagedBytes bytes = new PagedBytes(16);
      bytes.copy(data, totalBytes);
      final PagedBytes.Reader bytesReader = bytes.freeze(true);
      final PackedInts.Reader reader = PackedInts.getReader(index);
      if (data.getFilePointer() != data.length()) {
        throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
      }
      if (index.getFilePointer() != index.length()) {
View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

                                   Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_START,
                                   Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT);
     
      final int fixedLength = data.readInt();
      final int valueCount = index.readInt();
      PagedBytes bytes = new PagedBytes(16);
      bytes.copy(data, fixedLength * (long) valueCount);
      final PagedBytes.Reader bytesReader = bytes.freeze(true);
      final PackedInts.Reader reader = PackedInts.getReader(index);
      if (data.getFilePointer() != data.length()) {
        throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
      }
      if (index.getFilePointer() != index.length()) {
View Full Code Here

Examples of org.apache.lucene.util.PagedBytes

      CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX,
                                   Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_START,
                                   Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT);
     
      final long totalBytes = index.readLong();
      final PagedBytes bytes = new PagedBytes(16);
      bytes.copy(data, totalBytes);
      final PagedBytes.Reader bytesReader = bytes.freeze(true);
      final PackedInts.Reader reader = PackedInts.getReader(index);
      if (data.getFilePointer() != data.length()) {
        throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
      }
      if (index.getFilePointer() != index.length()) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.