Examples of org.apache.lucene.codecs.Codec

org.apache.lucene.codecs.Codec
Encodes/decodes an inverted index segment.
Note, when extending this class, the name ( {@link #getName}) is written into the index. In order for the segment to be read, the name must resolve to your implementation via {@link #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} (SPI) to resolve codec names.
If you implement your own codec, make sure that it has a no-arg constructor so SPI can load it. @see ServiceLoader

      closeDir = false;
    }
    
    try {
      final String segmentSuffix = info.getFieldInfosGen() == -1 ? "" : Long.toString(info.getFieldInfosGen(), Character.MAX_RADIX);
      Codec codec = info.info.getCodec();
      FieldInfosFormat fisFormat = codec.fieldInfosFormat();
      return fisFormat.getFieldInfosReader().read(dir, info.info.name, segmentSuffix, IOContext.READONCE);
    } finally {
      if (closeDir) {
        dir.close();
      }

View Full Code Here


    // We must rewrite the SI file because it references
    // segment name (its own name, if its 3.x, and doc
    // store segment name):
    TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
    final Codec currentCodec = newInfo.getCodec();
    try {
      currentCodec.segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
    } catch (UnsupportedOperationException uoe) {
      if (currentCodec instanceof Lucene3xCodec) {
        // OK: 3x codec cannot write a new SI file;
        // SegmentInfos will write this on commit
      } else {

View Full Code Here

  private FieldInfos currentFieldInfos;


  // maxAllowed = the "highest" we can index, but we will still
  // randomly index at lower IndexOption
  private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
    Codec codec = getCodec();
    SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", maxDoc, false, codec, null, null);


    int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
    if (VERBOSE) {
      System.out.println("\nTEST: now build index");
    }


    int maxIndexOptionNoOffsets = Arrays.asList(IndexOptions.values()).indexOf(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);


    // TODO use allowPayloads


    FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()];
    for(int fieldUpto=0;fieldUpto<fields.size();fieldUpto++) {
      FieldInfo oldFieldInfo = fieldInfos.fieldInfo(fieldUpto);


      String pf = _TestUtil.getPostingsFormat(codec, oldFieldInfo.name);
      int fieldMaxIndexOption;
      if (doesntSupportOffsets.contains(pf)) {
        fieldMaxIndexOption = Math.min(maxIndexOptionNoOffsets, maxIndexOption);
      } else {
        fieldMaxIndexOption = maxIndexOption;
      }
    
      // Randomly picked the IndexOptions to index this
      // field with:
      IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? fieldMaxIndexOption : random().nextInt(1+fieldMaxIndexOption)];
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;


      newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
                                                   true,
                                                   fieldUpto,
                                                   false,
                                                   false,
                                                   doPayloads,
                                                   indexOptions,
                                                   null,
                                                   DocValuesType.NUMERIC,
                                                   null);
    }


    FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);


    // Estimate that flushed segment size will be 25% of
    // what we use in RAM:
    long bytes =  totalPostings * 8 + totalPayloadBytes;


    SegmentWriteState writeState = new SegmentWriteState(null, dir,
                                                         segmentInfo, newFieldInfos,
                                                         32, null, new IOContext(new FlushInfo(maxDoc, bytes)));
    FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState);


    for(Map.Entry<String,Map<BytesRef,Long>> fieldEnt : fields.entrySet()) {
      String field = fieldEnt.getKey();
      Map<BytesRef,Long> terms = fieldEnt.getValue();


      FieldInfo fieldInfo = newFieldInfos.fieldInfo(field);


      IndexOptions indexOptions = fieldInfo.getIndexOptions();


      if (VERBOSE) {
        System.out.println("field=" + field + " indexOtions=" + indexOptions);
      }


      boolean doFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
      boolean doPos = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
      boolean doOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      
      TermsConsumer termsConsumer = fieldsConsumer.addField(fieldInfo);
      long sumTotalTF = 0;
      long sumDF = 0;
      FixedBitSet seenDocs = new FixedBitSet(maxDoc);
      for(Map.Entry<BytesRef,Long> termEnt : terms.entrySet()) {
        BytesRef term = termEnt.getKey();
        SeedPostings postings = getSeedPostings(term.utf8ToString(), termEnt.getValue(), false, maxAllowed);
        if (VERBOSE) {
          System.out.println("  term=" + field + ":" + term.utf8ToString() + " docFreq=" + postings.docFreq + " seed=" + termEnt.getValue());
        }
        
        PostingsConsumer postingsConsumer = termsConsumer.startTerm(term);
        long totalTF = 0;
        int docID = 0;
        while((docID = postings.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
          final int freq = postings.freq();
          if (VERBOSE) {
            System.out.println("    " + postings.upto + ": docID=" + docID + " freq=" + postings.freq);
          }
          postingsConsumer.startDoc(docID, doFreq ? postings.freq : -1);
          seenDocs.set(docID);
          if (doPos) {
            totalTF += postings.freq;
            for(int posUpto=0;posUpto<freq;posUpto++) {
              int pos = postings.nextPosition();
              BytesRef payload = postings.getPayload();


              if (VERBOSE) {
                if (doPayloads) {
                  System.out.println("      pos=" + pos + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
                } else {
                  System.out.println("      pos=" + pos);
                }
              }
              postingsConsumer.addPosition(pos, doPayloads ? payload : null,
                                           doOffsets ? postings.startOffset() : -1,
                                           doOffsets ? postings.endOffset() : -1);
            }
          } else if (doFreq) {
            totalTF += freq;
          } else {
            totalTF++;
          }
          postingsConsumer.finishDoc();
        }
        termsConsumer.finishTerm(term, new TermStats(postings.docFreq, doFreq ? totalTF : -1));
        sumTotalTF += totalTF;
        sumDF += postings.docFreq;
      }


      termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
    }


    fieldsConsumer.close();


    if (VERBOSE) {
      System.out.println("TEST: after indexing: files=");
      for(String file : dir.listAll()) {
        System.out.println("  " + file + ": " + dir.fileLength(file) + " bytes");
      }
    }


    currentFieldInfos = newFieldInfos;


    SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.DEFAULT, 1);


    return codec.postingsFormat().fieldsProducer(readState);
  }

View Full Code Here

   throws Exception {
      IOContext context = newIOContext(random());
      SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);
      SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);


      final Codec codec = Codec.getDefault();
      TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
      final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null, null);


      SegmentMerger merger = new SegmentMerger(si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,
                                               MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context);

View Full Code Here

        // shortly-to-be-opened SegmentReader and let it
        // carry the changes; there's no reason to use
        // filesystem as intermediary here.
          
        SegmentInfoPerCommit info = flushedSegment.segmentInfo;
        Codec codec = info.info.getCodec();
        codec.liveDocsFormat().writeLiveDocs(flushedSegment.liveDocs, directory, info, delCount, context);
        newSegment.setDelCount(delCount);
        newSegment.advanceDelGen();
      }


      success = true;

View Full Code Here

    assertTrue(reader1 != null);
    assertTrue(reader2 != null);
  }


  public void testMerge() throws IOException {
    final Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, -1, false, codec, null, null);


    SegmentMerger merger = new SegmentMerger(si, InfoStream.getDefault(), mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,
                                             MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()));
    merger.add(reader1);

View Full Code Here

      int toLoseDocCount = info.info.getDocCount();


      AtomicReader reader = null;


      try {
        final Codec codec = info.info.getCodec();
        msg(infoStream, "    codec=" + codec);
        segInfoStat.codec = codec;
        msg(infoStream, "    compound=" + info.info.getUseCompoundFile());
        segInfoStat.compound = info.info.getUseCompoundFile();
        msg(infoStream, "    numFiles=" + info.files().size());

View Full Code Here

    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));


    // Sometimes swap in codec that impls ord():
    if (random().nextInt(10) == 7) {
      // Make sure terms index has ords:
      Codec codec = TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds"));
      conf.setCodec(codec);
    }
    
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);

View Full Code Here


    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));


    // Sometimes swap in codec that impls ord():
    if (random().nextInt(10) == 7) {
      Codec codec = TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds"));
      conf.setCodec(codec);
    }
    
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);

View Full Code Here

// TODO: maybe we should test this with mocks, but its easy
// enough to test the basics via Codec
public class TestNamedSPILoader extends LuceneTestCase {
  
  public void testLookup() {
    Codec codec = Codec.forName("Lucene49");
    assertEquals("Lucene49", codec.getName());
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.codecs.Codec

org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition

org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinitionTest

org.apache.lucene.codecs.lucene3x.TestImpersonation

org.apache.lucene.codecs.lucene40.TestReuseDocsEnum

org.apache.lucene.codecs.perfield.TestPerFieldPostingsFormat2

org.apache.lucene.codecs.pulsing.Test10KPulsings

org.apache.lucene.codecs.pulsing.TestPulsingReuse

org.apache.lucene.index.BasePostingsFormatTestCase

org.apache.lucene.index.BaseStoredFieldsFormatTestCase

org.apache.lucene.index.CheckIndex

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.