Examples of GrowableWriter


Examples of org.apache.lucene.util.packed.GrowableWriter

    // pad: ensure no node gets address 0 which is reserved to mean
    // the stop state w/ no arcs
    bytes.writeByte((byte) 0);
    NO_OUTPUT = outputs.getNoOutput();
    if (willPackFST) {
      nodeAddress = new GrowableWriter(15, 8, acceptableOverheadRatio);
      inCounts = new GrowableWriter(1, 8, acceptableOverheadRatio);
    } else {
      nodeAddress = null;
      inCounts = null;
    }
View Full Code Here

Examples of org.apache.lucene.util.packed.GrowableWriter

      topNodeMap.put(n.node, downTo);
      //System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
    }

    // +1 because node ords start at 1 (0 is reserved as stop node):
    final GrowableWriter newNodeAddress = new GrowableWriter(
                       PackedInts.bitsRequired(this.bytes.getPosition()), (int) (1 + nodeCount), acceptableOverheadRatio);

    // Fill initial coarse guess:
    for(int node=1;node<=nodeCount;node++) {
      newNodeAddress.set(node, 1 + this.bytes.getPosition() - nodeAddress.get(node));
    }

    int absCount;
    int deltaCount;
    int topCount;
    int nextCount;

    FST<T> fst;

    // Iterate until we converge:
    while(true) {

      //System.out.println("\nITER");
      boolean changed = false;

      // for assert:
      boolean negDelta = false;

      fst = new FST<>(inputType, outputs, bytes.getBlockBits());
     
      final BytesStore writer = fst.bytes;

      // Skip 0 byte since 0 is reserved target:
      writer.writeByte((byte) 0);

      fst.arcWithOutputCount = 0;
      fst.nodeCount = 0;
      fst.arcCount = 0;

      absCount = deltaCount = topCount = nextCount = 0;

      int changedCount = 0;

      long addressError = 0;

      //int totWasted = 0;

      // Since we re-reverse the bytes, we now write the
      // nodes backwards, so that BIT_TARGET_NEXT is
      // unchanged:
      for(int node=(int)nodeCount;node>=1;node--) {
        fst.nodeCount++;
        final long address = writer.getPosition();

        //System.out.println("  node: " + node + " address=" + address);
        if (address != newNodeAddress.get(node)) {
          addressError = address - newNodeAddress.get(node);
          //System.out.println("    change: " + (address - newNodeAddress[node]));
          changed = true;
          newNodeAddress.set(node, address);
          changedCount++;
        }

        int nodeArcCount = 0;
        int bytesPerArc = 0;

        boolean retry = false;

        // for assert:
        boolean anyNegDelta = false;

        // Retry loop: possibly iterate more than once, if
        // this is an array'd node and bytesPerArc changes:
        writeNode:
        while(true) { // retry writing this node

          //System.out.println("  cycle: retry");
          readFirstRealTargetArc(node, arc, r);

          final boolean useArcArray = arc.bytesPerArc != 0;
          if (useArcArray) {
            // Write false first arc:
            if (bytesPerArc == 0) {
              bytesPerArc = arc.bytesPerArc;
            }
            writer.writeByte(ARCS_AS_FIXED_ARRAY);
            writer.writeVInt(arc.numArcs);
            writer.writeVInt(bytesPerArc);
            //System.out.println("node " + node + ": " + arc.numArcs + " arcs");
          }

          int maxBytesPerArc = 0;
          //int wasted = 0;
          while(true) {  // iterate over all arcs for this node
            //System.out.println("    cycle next arc");

            final long arcStartPos = writer.getPosition();
            nodeArcCount++;

            byte flags = 0;

            if (arc.isLast()) {
              flags += BIT_LAST_ARC;
            }
            /*
            if (!useArcArray && nodeUpto < nodes.length-1 && arc.target == nodes[nodeUpto+1]) {
              flags += BIT_TARGET_NEXT;
            }
            */
            if (!useArcArray && node != 1 && arc.target == node-1) {
              flags += BIT_TARGET_NEXT;
              if (!retry) {
                nextCount++;
              }
            }
            if (arc.isFinal()) {
              flags += BIT_FINAL_ARC;
              if (arc.nextFinalOutput != NO_OUTPUT) {
                flags += BIT_ARC_HAS_FINAL_OUTPUT;
              }
            } else {
              assert arc.nextFinalOutput == NO_OUTPUT;
            }
            if (!targetHasArcs(arc)) {
              flags += BIT_STOP_NODE;
            }

            if (arc.output != NO_OUTPUT) {
              flags += BIT_ARC_HAS_OUTPUT;
            }

            final long absPtr;
            final boolean doWriteTarget = targetHasArcs(arc) && (flags & BIT_TARGET_NEXT) == 0;
            if (doWriteTarget) {

              final Integer ptr = topNodeMap.get(arc.target);
              if (ptr != null) {
                absPtr = ptr;
              } else {
                absPtr = topNodeMap.size() + newNodeAddress.get((int) arc.target) + addressError;
              }

              long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition() - 2;
              if (delta < 0) {
                //System.out.println("neg: " + delta);
                anyNegDelta = true;
                delta = 0;
              }

              if (delta < absPtr) {
                flags |= BIT_TARGET_DELTA;
              }
            } else {
              absPtr = 0;
            }

            assert flags != ARCS_AS_FIXED_ARRAY;
            writer.writeByte(flags);

            fst.writeLabel(writer, arc.label);

            if (arc.output != NO_OUTPUT) {
              outputs.write(arc.output, writer);
              if (!retry) {
                fst.arcWithOutputCount++;
              }
            }
            if (arc.nextFinalOutput != NO_OUTPUT) {
              outputs.writeFinalOutput(arc.nextFinalOutput, writer);
            }

            if (doWriteTarget) {

              long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition();
              if (delta < 0) {
                anyNegDelta = true;
                //System.out.println("neg: " + delta);
                delta = 0;
              }

              if (flag(flags, BIT_TARGET_DELTA)) {
                //System.out.println("        delta");
                writer.writeVLong(delta);
                if (!retry) {
                  deltaCount++;
                }
              } else {
                /*
                if (ptr != null) {
                  System.out.println("        deref");
                } else {
                  System.out.println("        abs");
                }
                */
                writer.writeVLong(absPtr);
                if (!retry) {
                  if (absPtr >= topNodeMap.size()) {
                    absCount++;
                  } else {
                    topCount++;
                  }
                }
              }
            }

            if (useArcArray) {
              final int arcBytes = (int) (writer.getPosition() - arcStartPos);
              //System.out.println("  " + arcBytes + " bytes");
              maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
              // NOTE: this may in fact go "backwards", if
              // somehow (rarely, possibly never) we use
              // more bytesPerArc in this rewrite than the
              // incoming FST did... but in this case we
              // will retry (below) so it's OK to ovewrite
              // bytes:
              //wasted += bytesPerArc - arcBytes;
              writer.skipBytes((int) (arcStartPos + bytesPerArc - writer.getPosition()));
            }

            if (arc.isLast()) {
              break;
            }

            readNextRealArc(arc, r);
          }

          if (useArcArray) {
            if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
              // converged
              //System.out.println("  bba=" + bytesPerArc + " wasted=" + wasted);
              //totWasted += wasted;
              break;
            }
          } else {
            break;
          }

          //System.out.println("  retry this node maxBytesPerArc=" + maxBytesPerArc + " vs " + bytesPerArc);

          // Retry:
          bytesPerArc = maxBytesPerArc;
          writer.truncate(address);
          nodeArcCount = 0;
          retry = true;
          anyNegDelta = false;
        }

        negDelta |= anyNegDelta;

        fst.arcCount += nodeArcCount;
      }

      if (!changed) {
        // We don't renumber the nodes (just reverse their
        // order) so nodes should only point forward to
        // other nodes because we only produce acyclic FSTs
        // w/ nodes only pointing "forwards":
        assert !negDelta;
        //System.out.println("TOT wasted=" + totWasted);
        // Converged!
        break;
      }
      //System.out.println("  " + changedCount + " of " + fst.nodeCount + " changed; retry");
    }

    long maxAddress = 0;
    for (long key : topNodeMap.keySet()) {
      maxAddress = Math.max(maxAddress, newNodeAddress.get((int) key));
    }

    PackedInts.Mutable nodeRefToAddressIn = PackedInts.getMutable(topNodeMap.size(),
        PackedInts.bitsRequired(maxAddress), acceptableOverheadRatio);
    for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
      nodeRefToAddressIn.set(ent.getValue(), newNodeAddress.get(ent.getKey()));
    }
    fst.nodeRefToAddress = nodeRefToAddressIn;
   
    fst.startNode = newNodeAddress.get((int) startNode);
    //System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);

    if (emptyOutput != null) {
      fst.setEmptyOutput(emptyOutput);
    }
View Full Code Here

Examples of org.apache.lucene.util.packed.GrowableWriter

    long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor;
    PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize));
    PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();

    final int bitEstimate = 1+MathUtil.log(tiiFileLength, 2);
    GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInts.DEFAULT);

    String currentField = null;
    List<String> fieldStrs = new ArrayList<>();
    int fieldCounter = -1;
    for (int i = 0; indexEnum.next(); i++) {
      Term term = indexEnum.term();
      if (currentField == null || !currentField.equals(term.field())) {
        currentField = term.field();
        fieldStrs.add(currentField);
        fieldCounter++;
      }
      TermInfo termInfo = indexEnum.termInfo();
      indexToTerms.set(i, dataOutput.getPosition());
      dataOutput.writeVInt(fieldCounter);
      dataOutput.writeString(term.text());
      dataOutput.writeVInt(termInfo.docFreq);
      if (termInfo.docFreq >= skipInterval) {
        dataOutput.writeVInt(termInfo.skipOffset);
      }
      dataOutput.writeVLong(termInfo.freqPointer);
      dataOutput.writeVLong(termInfo.proxPointer);
      dataOutput.writeVLong(indexEnum.indexPointer);
      for (int j = 1; j < indexDivisor; j++) {
        if (!indexEnum.next()) {
          break;
        }
      }
    }

    fields = new Term[fieldStrs.size()];
    for (int i = 0; i < fields.length; i++) {
      fields[i] = new Term(fieldStrs.get(i));
    }
   
    dataPagedBytes.freeze(true);
    dataInput = dataPagedBytes.getDataInput();
    indexToDataOffset = indexToTerms.getMutable();

    ramBytesUsed = fields.length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.shallowSizeOfInstance(Term.class))
        + dataPagedBytes.ramBytesUsed() + indexToDataOffset.ramBytesUsed();
  }
View Full Code Here

Examples of org.apache.lucene.util.packed.GrowableWriter

    // this is only an inital size, it will be GCed once the build is complete
    long initialSize = (long) (tiiFileLength * 1.5) / indexDivisor;
    PagedBytes dataPagedBytes = new PagedBytes(estimatePageBits(initialSize));
    PagedBytesDataOutput dataOutput = dataPagedBytes.getDataOutput();

    GrowableWriter indexToTerms = new GrowableWriter(4, indexSize, false);
    String currentField = null;
    List<String> fieldStrs = new ArrayList<String>();
    int fieldCounter = -1;
    for (int i = 0; indexEnum.next(); i++) {
      Term term = indexEnum.term();
      if (currentField != term.field) {
        currentField = term.field;
        fieldStrs.add(currentField);
        fieldCounter++;
      }
      TermInfo termInfo = indexEnum.termInfo();
      indexToTerms.set(i, dataOutput.getPosition());
      dataOutput.writeVInt(fieldCounter);
      dataOutput.writeString(term.text());
      dataOutput.writeVInt(termInfo.docFreq);
      if (termInfo.docFreq >= skipInterval) {
        dataOutput.writeVInt(termInfo.skipOffset);
      }
      dataOutput.writeVLong(termInfo.freqPointer);
      dataOutput.writeVLong(termInfo.proxPointer);
      dataOutput.writeVLong(indexEnum.indexPointer);
      for (int j = 1; j < indexDivisor; j++) {
        if (!indexEnum.next()) {
          break;
        }
      }
    }

    fields = new Term[fieldStrs.size()];
    for (int i = 0; i < fields.length; i++) {
      fields[i] = new Term(fieldStrs.get(i));
    }
   
    dataPagedBytes.freeze(true);
    dataInput = dataPagedBytes.getDataInput();
    indexToDataOffset = indexToTerms.getMutable();
  }
View Full Code Here

Examples of org.apache.lucene.util.packed.GrowableWriter

        OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) {
            this.startBitsPerValue = startBitsPerValue;
            this.acceptableOverheadRatio = acceptableOverheadRatio;
            positions = new PagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
            firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio);
            // over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc...
            ordinals = new PagedGrowableWriter[24];
            nextLevelSlices = new PagedGrowableWriter[24];
            sizes = new int[24];
            Arrays.fill(sizes, 1); // reserve the 1st slice on every level
View Full Code Here

Examples of org.apache.lucene.util.packed.GrowableWriter

            // don't try to do anything clever. If any other segment has versions
            // all versions of this segment will be initialized to 0
            return reader;
        }
        final TermsEnum uids = terms.iterator(null);
        final GrowableWriter versions = new GrowableWriter(2, reader.maxDoc(), PackedInts.DEFAULT);
        DocsAndPositionsEnum dpe = null;
        for (BytesRef uid = uids.next(); uid != null; uid = uids.next()) {
            dpe = uids.docsAndPositions(reader.getLiveDocs(), dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
            assert dpe != null : "field has payloads";
            for (int doc = dpe.nextDoc(); doc != DocsEnum.NO_MORE_DOCS; doc = dpe.nextDoc()) {
                dpe.nextPosition();
                final BytesRef payload = dpe.getPayload();
                if (payload != null && payload.length == 8) {
                    final long version = Numbers.bytesToLong(payload);
                    versions.set(doc, version);
                    break;
                }
            }
        }
        // Build new field infos, doc values, and return a filter reader
        final FieldInfo newVersionInfo;
        if (versionInfo == null) {
            // Find a free field number
            int fieldNumber = 0;
            for (FieldInfo fi : fieldInfos) {
                fieldNumber = Math.max(fieldNumber, fi.number + 1);
            }
            // TODO: lots of things can wrong here...
            newVersionInfo = new FieldInfo(VersionFieldMapper.NAME,               // field name
                                           fieldNumber,                           // field number
                                           false,                                 // store term vectors
                                           false,                                 // omit norms
                                           false,                                 // store payloads
                                           IndexOptions.NONE,                     // index options
                                           DocValuesType.NUMERIC,                 // docvalues
                                           -1,                                    // docvalues generation
                                           Collections.<String, String>emptyMap() // attributes
                                           );
        } else {
            newVersionInfo = versionInfo;
        }
        newVersionInfo.checkConsistency(); // fail merge immediately if above code is wrong
        final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>();
        for (FieldInfo info : fieldInfos) {
            if (info != versionInfo) {
                fieldInfoList.add(info);
            }
        }
        fieldInfoList.add(newVersionInfo);
        final FieldInfos newFieldInfos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()]));
        final NumericDocValues versionValues = new NumericDocValues() {
            @Override
            public long get(int index) {
                return versions.get(index);
            }
        };
        return new FilterLeafReader(reader) {
            @Override
            public FieldInfos getFieldInfos() {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.