Package org.apache.lucene.store

Examples of org.apache.lucene.store.ByteArrayDataInput


      IndexInput in1 = dir.openInput("out.bin", IOContext.DEFAULT);
      byte[] buf = new byte[(int) fp];
      in1.readBytes(buf, 0, (int) fp);
      in1.seek(0L);
      ByteArrayDataInput in2 = new ByteArrayDataInput(buf);
      final DataInput in = random().nextBoolean() ? in1 : in2;
      final BlockPackedReaderIterator it = new BlockPackedReaderIterator(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
      for (int i = 0; i < valueCount; ) {
        if (random().nextBoolean()) {
          assertEquals("" + i, values[i], it.next());
View Full Code Here


      IndexInput in1 = dir.openInput("out.bin", IOContext.DEFAULT);
      byte[] buf = new byte[(int) fp];
      in1.readBytes(buf, 0, (int) fp);
      in1.seek(0L);
      ByteArrayDataInput in2 = new ByteArrayDataInput(buf);
      final DataInput in = random().nextBoolean() ? in1 : in2;
      final BlockPackedReaderIterator it = new BlockPackedReaderIterator(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
      for (int i = 0; i < valueCount; ) {
        if (random().nextBoolean()) {
          assertEquals("" + i, values[i], it.next());
View Full Code Here

      // Build FST:
      BytesRef previousAnalyzed = null;
      BytesRef analyzed = new BytesRef();
      BytesRef surface = new BytesRef();
      IntsRef scratchInts = new IntsRef();
      ByteArrayDataInput input = new ByteArrayDataInput();

      // Used to remove duplicate surface forms (but we
      // still index the hightest-weight one).  We clear
      // this when we see a new analyzed form, so it cannot
      // grow unbounded (at most 256 entries):
      Set<BytesRef> seenSurfaceForms = new HashSet<BytesRef>();

      int dedup = 0;
      while (reader.read(scratch)) {
        input.reset(scratch.bytes, scratch.offset, scratch.length);
        short analyzedLength = input.readShort();
        analyzed.grow(analyzedLength+2);
        input.readBytes(analyzed.bytes, 0, analyzedLength);
        analyzed.length = analyzedLength;

        long cost = input.readInt();

        surface.bytes = scratch.bytes;
        if (hasPayloads) {
          surface.length = input.readShort();
          surface.offset = input.getPosition();
        } else {
          surface.offset = input.getPosition();
          surface.length = scratch.length - surface.offset;
        }
       
        if (previousAnalyzed == null) {
          previousAnalyzed = new BytesRef();
          previousAnalyzed.copyBytes(analyzed);
          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
        } else if (analyzed.equals(previousAnalyzed)) {
          dedup++;
          if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
            // More than maxSurfaceFormsPerAnalyzedForm
            // dups: skip the rest:
            continue;
          }
          if (seenSurfaceForms.contains(surface)) {
            continue;
          }
          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
        } else {
          dedup = 0;
          previousAnalyzed.copyBytes(analyzed);
          seenSurfaceForms.clear();
          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
        }

        // TODO: I think we can avoid the extra 2 bytes when
        // there is no dup (dedup==0), but we'd have to fix
        // the exactFirst logic ... which would be sort of
        // hairy because we'd need to special case the two
        // (dup/not dup)...

        // NOTE: must be byte 0 so we sort before whatever
        // is next
        analyzed.bytes[analyzed.offset+analyzed.length] = 0;
        analyzed.bytes[analyzed.offset+analyzed.length+1] = (byte) dedup;
        analyzed.length += 2;

        Util.toIntsRef(analyzed, scratchInts);
        //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
        if (!hasPayloads) {
          builder.add(scratchInts, outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
        } else {
          int payloadOffset = input.getPosition() + surface.length;
          int payloadLength = scratch.length - payloadOffset;
          BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
          System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
          br.bytes[surface.length] = PAYLOAD_SEP;
          System.arraycopy(scratch.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
View Full Code Here

      } else {
        termsEnum = new TVTermsEnum();
      }
      termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths,
          payloadIndex, payloadBytes,
          new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length));
      return termsEnum;
    }
View Full Code Here

    final Arc<Long> firstArc = new Arc<Long>();
    final Arc<Long> scratchArc = new Arc<Long>();
    final IntsRef scratchInts = new IntsRef();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
    final BytesRef ref = new BytesRef();
    final ByteArrayDataInput input = new ByteArrayDataInput();
    return new SortedSetDocValues() {
      long currentOrd;

      @Override
      public long nextOrd() {
        if (input.eof()) {
          return NO_MORE_ORDS;
        } else {
          currentOrd += input.readVLong();
          return currentOrd;
        }
      }
     
      @Override
      public void setDocument(int docID) {
        docToOrds.get(docID, ref);
        input.reset(ref.bytes, ref.offset, ref.length);
        currentOrd = 0;
      }

      @Override
      public void lookupOrd(long ord, BytesRef result) {
View Full Code Here

    final BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef();
    decompressor.decompress(fieldsStream, totalLength, offset, length, bytes);
    assert bytes.length == length;

    final ByteArrayDataInput documentInput = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length);
    for (int fieldIDX = 0; fieldIDX < numStoredFields; fieldIDX++) {
      final long infoAndBits = documentInput.readVLong();
      final int fieldNumber = (int) (infoAndBits >>> TYPE_BITS);
      final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);

      final int bits = (int) (infoAndBits & TYPE_MASK);
      assert bits <= NUMERIC_DOUBLE: "bits=" + Integer.toHexString(bits);

      switch(visitor.needsField(fieldInfo)) {
        case YES:
          readField(documentInput, visitor, fieldInfo, bits);
          assert documentInput.getPosition() <= bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + bytes.length;
          break;
        case NO:
          skipField(documentInput, bits);
          assert documentInput.getPosition() <= bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + bytes.length;
          break;
        case STOP:
          return;
      }
    }
    assert documentInput.getPosition() == bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + " " + bytes.length;
  }
View Full Code Here

      final int inputLines = info.lines;
      reader = new Sort.ByteSequencesReader(tempSorted);
      long line = 0;
      int previousBucket = 0;
      int previousScore = 0;
      ByteArrayDataInput input = new ByteArrayDataInput();
      BytesRef tmp1 = new BytesRef();
      BytesRef tmp2 = new BytesRef();
      while (reader.read(tmp1)) {
        input.reset(tmp1.bytes);
        int currentScore = input.readInt();

        int bucket;
        if (line > 0 && currentScore == previousScore) {
          bucket = previousBucket;
        } else {
          bucket = (int) (line * buckets / inputLines);
        }
        previousScore = currentScore;
        previousBucket = bucket;

        // Only append the input, discard the weight.
        tmp2.bytes = tmp1.bytes;
        tmp2.offset = input.getPosition();
        tmp2.length = tmp1.length - input.getPosition();
        builder.add(tmp2, bucket);

        line++;
      }

View Full Code Here

      // Build FST:
      BytesRef previousAnalyzed = null;
      BytesRef analyzed = new BytesRef();
      BytesRef surface = new BytesRef();
      IntsRef scratchInts = new IntsRef();
      ByteArrayDataInput input = new ByteArrayDataInput();

      // Used to remove duplicate surface forms (but we
      // still index the hightest-weight one).  We clear
      // this when we see a new analyzed form, so it cannot
      // grow unbounded (at most 256 entries):
      Set<BytesRef> seenSurfaceForms = new HashSet<BytesRef>();

      int dedup = 0;
      while (reader.read(scratch)) {
        input.reset(scratch.bytes, scratch.offset, scratch.length);
        short analyzedLength = input.readShort();
        analyzed.grow(analyzedLength+2);
        input.readBytes(analyzed.bytes, 0, analyzedLength);
        analyzed.length = analyzedLength;

        long cost = input.readInt();

        surface.bytes = scratch.bytes;
        if (hasPayloads) {
          surface.length = input.readShort();
          surface.offset = input.getPosition();
        } else {
          surface.offset = input.getPosition();
          surface.length = scratch.length - surface.offset;
        }
       
        if (previousAnalyzed == null) {
          previousAnalyzed = new BytesRef();
          previousAnalyzed.copyBytes(analyzed);
          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
        } else if (analyzed.equals(previousAnalyzed)) {
          dedup++;
          if (dedup >= maxSurfaceFormsPerAnalyzedForm) {
            // More than maxSurfaceFormsPerAnalyzedForm
            // dups: skip the rest:
            continue;
          }
          if (seenSurfaceForms.contains(surface)) {
            continue;
          }
          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
        } else {
          dedup = 0;
          previousAnalyzed.copyBytes(analyzed);
          seenSurfaceForms.clear();
          seenSurfaceForms.add(BytesRef.deepCopyOf(surface));
        }

        // TODO: I think we can avoid the extra 2 bytes when
        // there is no dup (dedup==0), but we'd have to fix
        // the exactFirst logic ... which would be sort of
        // hairy because we'd need to special case the two
        // (dup/not dup)...

        // NOTE: must be byte 0 so we sort before whatever
        // is next
        analyzed.bytes[analyzed.offset+analyzed.length] = 0;
        analyzed.bytes[analyzed.offset+analyzed.length+1] = (byte) dedup;
        analyzed.length += 2;

        Util.toIntsRef(analyzed, scratchInts);
        //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
        if (!hasPayloads) {
          builder.add(scratchInts, outputs.newPair(cost, BytesRef.deepCopyOf(surface)));
        } else {
          int payloadOffset = input.getPosition() + surface.length;
          int payloadLength = scratch.length - payloadOffset;
          BytesRef br = new BytesRef(surface.length + 1 + payloadLength);
          System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length);
          br.bytes[surface.length] = PAYLOAD_SEP;
          System.arraycopy(scratch.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength);
View Full Code Here

    //System.out.println("SEPR: readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
    final int len = termsIn.readVInt();
    //System.out.println("  numBytes=" + len);
    if (termState.bytes == null) {
      termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
      termState.bytesReader = new ByteArrayDataInput(termState.bytes);
    } else if (termState.bytes.length < len) {
      termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
    }
    termState.bytesReader.reset(termState.bytes, 0, len);
    termsIn.readBytes(termState.bytes, 0, len);
View Full Code Here

  public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
    //System.out.println("PR.readTermsBlock state=" + _termState);
    final PulsingTermState termState = (PulsingTermState) _termState;
    if (termState.inlinedBytes == null) {
      termState.inlinedBytes = new byte[128];
      termState.inlinedBytesReader = new ByteArrayDataInput();
    }
    int len = termsIn.readVInt();
    //System.out.println("  len=" + len + " fp=" + termsIn.getFilePointer());
    if (termState.inlinedBytes.length < len) {
      termState.inlinedBytes = new byte[ArrayUtil.oversize(len, 1)];
View Full Code Here

TOP

Related Classes of org.apache.lucene.store.ByteArrayDataInput

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.