Examples of org.apache.lucene.codecs.Codec

org.apache.lucene.codecs.Codec
Encodes/decodes an inverted index segment.
Note, when extending this class, the name ( {@link #getName}) is written into the index. In order for the segment to be read, the name must resolve to your implementation via {@link #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} (SPI) to resolve codec names.
If you implement your own codec, make sure that it has a no-arg constructor so SPI can load it. @see ServiceLoader

      int toLoseDocCount = info.info.getDocCount();


      AtomicReader reader = null;


      try {
        final Codec codec = info.info.getCodec();
        msg(infoStream, "    codec=" + codec);
        segInfoStat.codec = codec;
        msg(infoStream, "    compound=" + info.info.getUseCompoundFile());
        segInfoStat.compound = info.info.getUseCompoundFile();
        msg(infoStream, "    numFiles=" + info.files().size());

View Full Code Here

    writer.commit();
    writer.close();


    assertQuery(new Term("content", "ccc"), dir, 10);
    assertQuery(new Term("content", "aaa"), dir, 10);
    Codec codec = iwconf.getCodec();


    iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
        .setOpenMode(OpenMode.APPEND).setCodec(codec);
    //((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0);
    //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);

View Full Code Here

    }
    dir.close();
  }
  
  public void testSameCodecDifferentInstance() throws Exception {
    Codec codec = new Lucene45Codec() {
      @Override
      public PostingsFormat getPostingsFormatForField(String field) {
        if ("id".equals(field)) {
          return new Pulsing41PostingsFormat(1);
        } else if ("date".equals(field)) {

View Full Code Here

    };
    doTestMixedPostings(codec);
  }
  
  public void testSameCodecDifferentParams() throws Exception {
    Codec codec = new Lucene45Codec() {
      @Override
      public PostingsFormat getPostingsFormatForField(String field) {
        if ("id".equals(field)) {
          return new Pulsing41PostingsFormat(1);
        } else if ("date".equals(field)) {

View Full Code Here

  private FieldInfos currentFieldInfos;


  // maxAllowed = the "highest" we can index, but we will still
  // randomly index at lower IndexOption
  private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
    Codec codec = getCodec();
    SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", maxDoc, false, codec, null, null);


    int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
    if (VERBOSE) {
      System.out.println("\nTEST: now build index");
    }


    int maxIndexOptionNoOffsets = Arrays.asList(IndexOptions.values()).indexOf(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);


    // TODO use allowPayloads


    FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()];
    for(int fieldUpto=0;fieldUpto<fields.size();fieldUpto++) {
      FieldInfo oldFieldInfo = fieldInfos.fieldInfo(fieldUpto);


      String pf = _TestUtil.getPostingsFormat(codec, oldFieldInfo.name);
      int fieldMaxIndexOption;
      if (doesntSupportOffsets.contains(pf)) {
        fieldMaxIndexOption = Math.min(maxIndexOptionNoOffsets, maxIndexOption);
      } else {
        fieldMaxIndexOption = maxIndexOption;
      }
    
      // Randomly picked the IndexOptions to index this
      // field with:
      IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? fieldMaxIndexOption : random().nextInt(1+fieldMaxIndexOption)];
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;


      newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
                                                   true,
                                                   fieldUpto,
                                                   false,
                                                   false,
                                                   doPayloads,
                                                   indexOptions,
                                                   null,
                                                   DocValuesType.NUMERIC,
                                                   null);
    }


    FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);


    // Estimate that flushed segment size will be 25% of
    // what we use in RAM:
    long bytes =  totalPostings * 8 + totalPayloadBytes;


    SegmentWriteState writeState = new SegmentWriteState(null, dir,
                                                         segmentInfo, newFieldInfos,
                                                         32, null, new IOContext(new FlushInfo(maxDoc, bytes)));
    FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState);


    for(Map.Entry<String,Map<BytesRef,Long>> fieldEnt : fields.entrySet()) {
      String field = fieldEnt.getKey();
      Map<BytesRef,Long> terms = fieldEnt.getValue();


      FieldInfo fieldInfo = newFieldInfos.fieldInfo(field);


      IndexOptions indexOptions = fieldInfo.getIndexOptions();


      if (VERBOSE) {
        System.out.println("field=" + field + " indexOtions=" + indexOptions);
      }


      boolean doFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
      boolean doPos = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
      boolean doOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      
      TermsConsumer termsConsumer = fieldsConsumer.addField(fieldInfo);
      long sumTotalTF = 0;
      long sumDF = 0;
      FixedBitSet seenDocs = new FixedBitSet(maxDoc);
      for(Map.Entry<BytesRef,Long> termEnt : terms.entrySet()) {
        BytesRef term = termEnt.getKey();
        SeedPostings postings = getSeedPostings(term.utf8ToString(), termEnt.getValue(), false, maxAllowed);
        if (VERBOSE) {
          System.out.println("  term=" + field + ":" + term.utf8ToString() + " docFreq=" + postings.docFreq + " seed=" + termEnt.getValue());
        }
        
        PostingsConsumer postingsConsumer = termsConsumer.startTerm(term);
        long totalTF = 0;
        int docID = 0;
        while((docID = postings.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
          final int freq = postings.freq();
          if (VERBOSE) {
            System.out.println("    " + postings.upto + ": docID=" + docID + " freq=" + postings.freq);
          }
          postingsConsumer.startDoc(docID, doFreq ? postings.freq : -1);
          seenDocs.set(docID);
          if (doPos) {
            totalTF += postings.freq;
            for(int posUpto=0;posUpto<freq;posUpto++) {
              int pos = postings.nextPosition();
              BytesRef payload = postings.getPayload();


              if (VERBOSE) {
                if (doPayloads) {
                  System.out.println("      pos=" + pos + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
                } else {
                  System.out.println("      pos=" + pos);
                }
              }
              postingsConsumer.addPosition(pos, doPayloads ? payload : null,
                                           doOffsets ? postings.startOffset() : -1,
                                           doOffsets ? postings.endOffset() : -1);
            }
          } else if (doFreq) {
            totalTF += freq;
          } else {
            totalTF++;
          }
          postingsConsumer.finishDoc();
        }
        termsConsumer.finishTerm(term, new TermStats(postings.docFreq, doFreq ? totalTF : -1));
        sumTotalTF += totalTF;
        sumDF += postings.docFreq;
      }


      termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
    }


    fieldsConsumer.close();


    if (VERBOSE) {
      System.out.println("TEST: after indexing: files=");
      for(String file : dir.listAll()) {
        System.out.println("  " + file + ": " + dir.fileLength(file) + " bytes");
      }
    }


    currentFieldInfos = newFieldInfos;


    SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1);


    return codec.postingsFormat().fieldsProducer(readState);
  }

View Full Code Here


// TODO: maybe we should test this with mocks, but its easy
// enough to test the basics via Codec
public class TestNamedSPILoader extends LuceneTestCase {
  public void testLookup() {
    Codec codec = Codec.forName("Lucene45");
    assertEquals("Lucene45", codec.getName());
  }

View Full Code Here

  }
  
  // we want an exception if its not found.
  public void testBogusLookup() {
    try {
      Codec codec = Codec.forName("dskfdskfsdfksdfdsf");
      fail();
    } catch (IllegalArgumentException expected) {}
  }

View Full Code Here

// TODO: really this should be in BaseTestPF or somewhere else? useful test!
public class TestReuseDocsEnum extends LuceneTestCase {


  public void testReuseDocsEnumNoReuse() throws IOException {
    Directory dir = newDirectory();
    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
    int numdocs = atLeast(20);
    createRandomIndex(numdocs, writer, random());
    writer.commit();

View Full Code Here

  }
  
  // tests for reuse only if bits are the same either null or the same instance
  public void testReuseDocsEnumSameBitsOrNull() throws IOException {
    Directory dir = newDirectory();
    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
    int numdocs = atLeast(20);
    createRandomIndex(numdocs, writer, random());
    writer.commit();

View Full Code Here

  }
  
  // make sure we never reuse from another reader even if it is the same field & codec etc
  public void testReuseDocsEnumDifferentReader() throws IOException {
    Directory dir = newDirectory();
    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
    int numdocs = atLeast(20);
    createRandomIndex(numdocs, writer, random());
    writer.commit();

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.codecs.Codec

org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition

org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinitionTest

org.apache.lucene.codecs.lucene3x.TestImpersonation

org.apache.lucene.codecs.lucene40.TestReuseDocsEnum

org.apache.lucene.codecs.perfield.TestPerFieldPostingsFormat2

org.apache.lucene.codecs.pulsing.Test10KPulsings

org.apache.lucene.codecs.pulsing.TestPulsingReuse

org.apache.lucene.index.BasePostingsFormatTestCase

org.apache.lucene.index.BaseStoredFieldsFormatTestCase

org.apache.lucene.index.CheckIndex

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.