Examples of org.apache.lucene.codecs.Codec

org.apache.lucene.codecs.Codec
Encodes/decodes an inverted index segment.
Note, when extending this class, the name ( {@link #getName}) is written into the index. In order for the segment to be read, the name must resolve to your implementation via {@link #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} (SPI) to resolve codec names.
If you implement your own codec, make sure that it has a no-arg constructor so SPI can load it. @see ServiceLoader

  private FieldInfos currentFieldInfos;


  // maxAllowed = the "highest" we can index, but we will still
  // randomly index at lower IndexOption
  private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
    Codec codec = getCodec();
    SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", maxDoc, false, codec, null);


    int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
    if (VERBOSE) {
      System.out.println("\nTEST: now build index");
    }


    int maxIndexOptionNoOffsets = Arrays.asList(IndexOptions.values()).indexOf(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);


    // TODO use allowPayloads


    FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()];
    for(int fieldUpto=0;fieldUpto<fields.size();fieldUpto++) {
      FieldInfo oldFieldInfo = fieldInfos.fieldInfo(fieldUpto);


      String pf = _TestUtil.getPostingsFormat(codec, oldFieldInfo.name);
      int fieldMaxIndexOption;
      if (doesntSupportOffsets.contains(pf)) {
        fieldMaxIndexOption = Math.min(maxIndexOptionNoOffsets, maxIndexOption);
      } else {
        fieldMaxIndexOption = maxIndexOption;
      }
    
      // Randomly picked the IndexOptions to index this
      // field with:
      IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? fieldMaxIndexOption : random().nextInt(1+fieldMaxIndexOption)];
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;


      newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
                                                   true,
                                                   fieldUpto,
                                                   false,
                                                   false,
                                                   doPayloads,
                                                   indexOptions,
                                                   null,
                                                   DocValuesType.NUMERIC,
                                                   null);
    }


    FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);


    // Estimate that flushed segment size will be 25% of
    // what we use in RAM:
    long bytes =  totalPostings * 8 + totalPayloadBytes;


    SegmentWriteState writeState = new SegmentWriteState(null, dir,
                                                         segmentInfo, newFieldInfos,
                                                         32, null, new IOContext(new FlushInfo(maxDoc, bytes)));
    FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState);


    for(Map.Entry<String,Map<BytesRef,Long>> fieldEnt : fields.entrySet()) {
      String field = fieldEnt.getKey();
      Map<BytesRef,Long> terms = fieldEnt.getValue();


      FieldInfo fieldInfo = newFieldInfos.fieldInfo(field);


      IndexOptions indexOptions = fieldInfo.getIndexOptions();


      if (VERBOSE) {
        System.out.println("field=" + field + " indexOtions=" + indexOptions);
      }


      boolean doFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
      boolean doPos = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
      boolean doOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      
      TermsConsumer termsConsumer = fieldsConsumer.addField(fieldInfo);
      long sumTotalTF = 0;
      long sumDF = 0;
      FixedBitSet seenDocs = new FixedBitSet(maxDoc);
      for(Map.Entry<BytesRef,Long> termEnt : terms.entrySet()) {
        BytesRef term = termEnt.getKey();
        SeedPostings postings = getSeedPostings(term.utf8ToString(), termEnt.getValue(), false, maxAllowed);
        if (VERBOSE) {
          System.out.println("  term=" + field + ":" + term.utf8ToString() + " docFreq=" + postings.docFreq + " seed=" + termEnt.getValue());
        }
        
        PostingsConsumer postingsConsumer = termsConsumer.startTerm(term);
        long totalTF = 0;
        int docID = 0;
        while((docID = postings.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
          final int freq = postings.freq();
          if (VERBOSE) {
            System.out.println("    " + postings.upto + ": docID=" + docID + " freq=" + postings.freq);
          }
          postingsConsumer.startDoc(docID, doFreq ? postings.freq : -1);
          seenDocs.set(docID);
          if (doPos) {
            totalTF += postings.freq;
            for(int posUpto=0;posUpto<freq;posUpto++) {
              int pos = postings.nextPosition();
              BytesRef payload = postings.getPayload();


              if (VERBOSE) {
                if (doPayloads) {
                  System.out.println("      pos=" + pos + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
                } else {
                  System.out.println("      pos=" + pos);
                }
              }
              postingsConsumer.addPosition(pos, doPayloads ? payload : null,
                                           doOffsets ? postings.startOffset() : -1,
                                           doOffsets ? postings.endOffset() : -1);
            }
          } else if (doFreq) {
            totalTF += freq;
          } else {
            totalTF++;
          }
          postingsConsumer.finishDoc();
        }
        termsConsumer.finishTerm(term, new TermStats(postings.docFreq, doFreq ? totalTF : -1));
        sumTotalTF += totalTF;
        sumDF += postings.docFreq;
      }


      termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
    }


    fieldsConsumer.close();


    if (VERBOSE) {
      System.out.println("TEST: after indexing: files=");
      for(String file : dir.listAll()) {
        System.out.println("  " + file + ": " + dir.fileLength(file) + " bytes");
      }
    }


    currentFieldInfos = newFieldInfos;


    SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1);


    return codec.postingsFormat().fieldsProducer(readState);
  }

View Full Code Here

    return result;
  }
  
  public void testWriteReadMerge() throws IOException {
    // get another codec, other than the default: so we are merging segments across different codecs
    final Codec otherCodec;
    if ("SimpleText".equals(Codec.getDefault().getName())) {
      otherCodec = new Lucene46Codec();
    } else {
      otherCodec = new SimpleTextCodec();
    }

View Full Code Here

   throws Exception {
      IOContext context = newIOContext(random());
      SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);
      SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);


      final Codec codec = Codec.getDefault();
      TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
      final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null);


      SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2),
          si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,

View Full Code Here

    final FieldData field = new FieldData("field", builder, terms, true, false);
    final FieldData[] fields = new FieldData[] {field};
    final FieldInfos fieldInfos = builder.finish();
    final Directory dir = newDirectory();
    this.write(fieldInfos, dir, fields, true);
    Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);


    final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));


    final Iterator<String> fieldsEnum = reader.iterator();
    String fieldName = fieldsEnum.next();
    assertNotNull(fieldName);
    final Terms terms2 = reader.terms(fieldName);

View Full Code Here

    if (VERBOSE) {
      System.out.println("TEST: now write postings");
    }


    this.write(fieldInfos, dir, fields, false);
    Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);


    if (VERBOSE) {
      System.out.println("TEST: now read postings");
    }
    final FieldsProducer terms = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));


    final Verify[] threads = new Verify[NUM_TEST_THREADS-1];
    for(int i=0;i<NUM_TEST_THREADS-1;i++) {
      threads[i] = new Verify(si, fields, terms);
      threads[i].setDaemon(true);

View Full Code Here

  }


  private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable {


    final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27);
    final Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);
    final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random()));


    final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state);
    Arrays.sort(fields);
    for (final FieldData field : fields) {
      if (!allowPreFlex && codec instanceof Lucene3xCodec) {
        // code below expects unicode sort order
        continue;

View Full Code Here

    add(diagsTable, r1);
    Object c1 = create("cell");
    setBoolean(c1, "enabled", false);
    add(r1, c1);
    // codec info
    Codec codec = si.info.getCodec();
    map = new LinkedHashMap<String,String>();
    map.put("codecName", codec.getName());
    map.put("codecClassName", codec.getClass().getName());
    map.put("docValuesFormat", codec.docValuesFormat().getClass().getName());
    map.put("fieldInfosFormat", codec.fieldInfosFormat().getClass().getName());
    map.put("liveDocsFormat", codec.liveDocsFormat().getClass().getName());
    map.put("normsFormat", codec.normsFormat().getClass().getName());
    map.put("postingsFormat", codec.postingsFormat().toString() + " " + codec.postingsFormat().getClass().getName());
    map.put("segmentInfoFormat", codec.segmentInfoFormat().getClass().getName());
    map.put("storedFieldsFormat", codec.storedFieldsFormat().getClass().getName());
    map.put("termVectorsFormat", codec.termVectorsFormat().getClass().getName());
    try {
      List<String> files = new ArrayList<String>(si.files());
      Collections.sort(files);
      map.put("---files---", files.toString());
      if (si.info.getUseCompoundFile()) {

View Full Code Here

        if (status == null) {
          errorMsg("You need to run 'Check Index' first.");
          return;
        }
        // use codec from the first valid segment
        Codec c = null;
        for (SegmentInfoStatus ssi : status.segmentInfos) {
          if (ssi.codec != null) {
            c = ssi.codec;
            break;
          }

View Full Code Here

    // We can write directly to the actual name (vs to a
    // .tmp & renaming it) because the file is not live
    // until segments file is written:
    boolean success = false;
    try {
      Codec codec = info.info.getCodec();
      codec.liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, trackingDir, info, pendingDeleteCount, IOContext.DEFAULT);
      success = true;
    } finally {
      if (!success) {
        // Advance only the nextWriteDelGen so that a 2nd
        // attempt to write will write to a new file

View Full Code Here

    TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
    
    FieldInfos fieldInfos = null;
    boolean success = false;
    try {
      final Codec codec = info.info.getCodec();


      // reader could be null e.g. for a just merged segment (from
      // IndexWriter.commitMergedDeletes).
      final SegmentReader reader = this.reader == null ? new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), IOContext.READONCE) : this.reader;
      try {
        // clone FieldInfos so that we can update their dvGen separately from
        // the reader's infos and write them to a new fieldInfos_gen file
        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
        // cannot use builder.add(reader.getFieldInfos()) because it does not
        // clone FI.attributes as well FI.dvGen
        for (FieldInfo fi : reader.getFieldInfos()) {
          FieldInfo clone = builder.add(fi);
          // copy the stuff FieldInfos.Builder doesn't copy
          if (fi.attributes() != null) {
            for (Entry<String,String> e : fi.attributes().entrySet()) {
              clone.putAttribute(e.getKey(), e.getValue());
            }
          }
          clone.setDocValuesGen(fi.getDocValuesGen());
        }
        // create new fields or update existing ones to have NumericDV type
        for (String f : numericFieldUpdates.keySet()) {
          builder.addOrUpdate(f, NumericDocValuesField.TYPE);
        }
        
        fieldInfos = builder.finish();
        final long nextFieldInfosGen = info.getNextFieldInfosGen();
        final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
        final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, writer.getConfig().getTermIndexInterval(), null, IOContext.DEFAULT, segmentSuffix);
        final DocValuesFormat docValuesFormat = codec.docValuesFormat();
        final DocValuesConsumer fieldsConsumer = docValuesFormat.fieldsConsumer(state);
        boolean fieldsConsumerSuccess = false;
        try {
//          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: applying updates; seg=" + info + " updates=" + numericUpdates);
          for (Entry<String,NumericFieldUpdates> e : numericFieldUpdates.entrySet()) {
            final String field = e.getKey();
            final NumericFieldUpdates fieldUpdates = e.getValue();
            final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
            assert fieldInfo != null;


            fieldInfo.setDocValuesGen(nextFieldInfosGen);
            // write the numeric updates to a new gen'd docvalues file
            fieldsConsumer.addNumericField(fieldInfo, new Iterable<Number>() {
              final NumericDocValues currentValues = reader.getNumericDocValues(field);
              final Bits docsWithField = reader.getDocsWithField(field);
              final int maxDoc = reader.maxDoc();
              final UpdatesIterator updatesIter = fieldUpdates.getUpdates();
              @Override
              public Iterator<Number> iterator() {
                updatesIter.reset();
                return new Iterator<Number>() {


                  int curDoc = -1;
                  int updateDoc = updatesIter.nextDoc();
                  
                  @Override
                  public boolean hasNext() {
                    return curDoc < maxDoc - 1;
                  }


                  @Override
                  public Number next() {
                    if (++curDoc >= maxDoc) {
                      throw new NoSuchElementException("no more documents to return values for");
                    }
                    if (curDoc == updateDoc) { // this document has an updated value
                      Long value = updatesIter.value(); // either null (unset value) or updated value
                      updateDoc = updatesIter.nextDoc(); // prepare for next round
                      return value;
                    } else {
                      // no update for this document
                      assert curDoc < updateDoc;
                      if (currentValues != null && docsWithField.get(curDoc)) {
                        // only read the current value if the document had a value before
                        return currentValues.get(curDoc);
                      } else {
                        return null;
                      }
                    }
                  }


                  @Override
                  public void remove() {
                    throw new UnsupportedOperationException("this iterator does not support removing elements");
                  }
                };
              }
            });
          }
          
          codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
          fieldsConsumerSuccess = true;
        } finally {
          if (fieldsConsumerSuccess) {
            fieldsConsumer.close();
          } else {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.codecs.Codec

org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition

org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinitionTest

org.apache.lucene.codecs.lucene3x.TestImpersonation

org.apache.lucene.codecs.lucene40.TestReuseDocsEnum

org.apache.lucene.codecs.perfield.TestPerFieldPostingsFormat2

org.apache.lucene.codecs.pulsing.Test10KPulsings

org.apache.lucene.codecs.pulsing.TestPulsingReuse

org.apache.lucene.index.BasePostingsFormatTestCase

org.apache.lucene.index.BaseStoredFieldsFormatTestCase

org.apache.lucene.index.CheckIndex

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.