Examples of org.apache.lucene.codecs.Codec

org.apache.lucene.codecs.Codec
Encodes/decodes an inverted index segment.
Note, when extending this class, the name ( {@link #getName}) is written into the index. In order for the segment to be read, the name must resolve to your implementation via {@link #forName(String)}. This method uses Java's {@link ServiceLoader Service Provider Interface} (SPI) to resolve codec names.
If you implement your own codec, make sure that it has a no-arg constructor so SPI can load it. @see ServiceLoader

    fieldInfos = readFieldInfos(si);
    core = new SegmentCoreReaders(this, si.info.dir, si, context, termInfosIndexDivisor);
    segDocValues = new SegmentDocValues();
    
    boolean success = false;
    final Codec codec = si.info.getCodec();
    try {
      if (si.hasDeletions()) {
        // NOTE: the bitvector is stored using the regular directory, not cfs
        liveDocs = codec.liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE);
      } else {
        assert si.getDelCount() == 0;
        liveDocs = null;
      }
      numDocs = si.info.getDocCount() - si.getDelCount();

View Full Code Here

//    System.out.println("[" + Thread.currentThread().getName() + "] SR.init: sharing reader: " + sr + " for gens=" + sr.genDVProducers.keySet());
    
    // increment refCount of DocValuesProducers that are used by this reader
    boolean success = false;
    try {
      final Codec codec = si.info.getCodec();
      if (si.getFieldInfosGen() == -1) {
        fieldInfos = sr.fieldInfos;
      } else {
        fieldInfos = readFieldInfos(si);
      }

View Full Code Here

      closeDir = false;
    }
    
    try {
      final String segmentSuffix = info.getFieldInfosGen() == -1 ? "" : Long.toString(info.getFieldInfosGen(), Character.MAX_RADIX);
      Codec codec = info.info.getCodec();
      FieldInfosFormat fisFormat = codec.fieldInfosFormat();
      return fisFormat.getFieldInfosReader().read(dir, info.info.name, segmentSuffix, IOContext.READONCE);
    } finally {
      if (closeDir) {
        dir.close();
      }

View Full Code Here

        if (numSegments < 0) {
          throw new CorruptIndexException("invalid segment count: " + numSegments + " (resource: " + input + ")");
        }
        for (int seg = 0; seg < numSegments; seg++) {
          String segName = input.readString();
          Codec codec = Codec.forName(input.readString());
          //System.out.println("SIS.read seg=" + seg + " codec=" + codec);
          SegmentInfo info = codec.segmentInfoFormat().getSegmentInfoReader().read(directory, segName, IOContext.READ);
          info.setCodec(codec);
          long delGen = input.readLong();
          int delCount = input.readInt();
          if (delCount < 0 || delCount > info.getDocCount()) {
            throw new CorruptIndexException("invalid deletion count: " + delCount + " vs docCount=" + info.getDocCount() + " (resource: " + input + ")");
          }
          long fieldInfosGen = -1;
          if (actualFormat >= VERSION_46) {
            fieldInfosGen = input.readLong();
          }
          long dvGen = -1;
          if (actualFormat >= VERSION_49) {
            dvGen = input.readLong();
          } else {
            dvGen = fieldInfosGen;
          }
          SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
          if (actualFormat >= VERSION_46) {
            if (actualFormat < VERSION_49) {
              // Recorded per-generation files, which were buggy (see
              // LUCENE-5636). We need to read and keep them so we continue to
              // reference those files. Unfortunately it means that the files will
              // be referenced even if the fields are updated again, until the
              // segment is merged.
              final int numGensUpdatesFiles = input.readInt();
              final Map<Long,Set<String>> genUpdatesFiles;
              if (numGensUpdatesFiles == 0) {
                genUpdatesFiles = Collections.emptyMap();
              } else {
                genUpdatesFiles = new HashMap<>(numGensUpdatesFiles);
                for (int i = 0; i < numGensUpdatesFiles; i++) {
                  genUpdatesFiles.put(input.readLong(), input.readStringSet());
                }
              }
              siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
            } else {
              siPerCommit.setFieldInfosFiles(input.readStringSet());
              final Map<Integer,Set<String>> dvUpdateFiles;
              final int numDVFields = input.readInt();
              if (numDVFields == 0) {
                dvUpdateFiles = Collections.emptyMap();
              } else {
                dvUpdateFiles = new HashMap<>(numDVFields);
                for (int i = 0; i < numDVFields; i++) {
                  dvUpdateFiles.put(input.readInt(), input.readStringSet());
                }
              }
              siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
            }
          }
          add(siPerCommit);
        }
        userData = input.readStringStringMap();
      } else {
        actualFormat = -1;
        Lucene3xSegmentInfoReader.readLegacyInfos(this, directory, input, format);
        Codec codec = Codec.forName("Lucene3x");
        for (SegmentCommitInfo info : this) {
          info.info.setCodec(codec);
        }
      }

View Full Code Here

    // We can write directly to the actual name (vs to a
    // .tmp & renaming it) because the file is not live
    // until segments file is written:
    boolean success = false;
    try {
      Codec codec = info.info.getCodec();
      codec.liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, trackingDir, info, pendingDeleteCount, IOContext.DEFAULT);
      success = true;
    } finally {
      if (!success) {
        // Advance only the nextWriteDelGen so that a 2nd
        // attempt to write will write to a new file

View Full Code Here

    final Map<Integer,Set<String>> newDVFiles = new HashMap<>();
    Set<String> fieldInfosFiles = null;
    FieldInfos fieldInfos = null;
    boolean success = false;
    try {
      final Codec codec = info.info.getCodec();


      // reader could be null e.g. for a just merged segment (from
      // IndexWriter.commitMergedDeletes).
      final SegmentReader reader = this.reader == null ? new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), IOContext.READONCE) : this.reader;
      try {
        // clone FieldInfos so that we can update their dvGen separately from
        // the reader's infos and write them to a new fieldInfos_gen file
        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
        // cannot use builder.add(reader.getFieldInfos()) because it does not
        // clone FI.attributes as well FI.dvGen
        for (FieldInfo fi : reader.getFieldInfos()) {
          FieldInfo clone = builder.add(fi);
          // copy the stuff FieldInfos.Builder doesn't copy
          if (fi.attributes() != null) {
            for (Entry<String,String> e : fi.attributes().entrySet()) {
              clone.putAttribute(e.getKey(), e.getValue());
            }
          }
          clone.setDocValuesGen(fi.getDocValuesGen());
        }
        // create new fields or update existing ones to have NumericDV type
        for (String f : dvUpdates.numericDVUpdates.keySet()) {
          builder.addOrUpdate(f, NumericDocValuesField.TYPE);
        }
        // create new fields or update existing ones to have BinaryDV type
        for (String f : dvUpdates.binaryDVUpdates.keySet()) {
          builder.addOrUpdate(f, BinaryDocValuesField.TYPE);
        }
        
        fieldInfos = builder.finish();
        final DocValuesFormat docValuesFormat = codec.docValuesFormat();
        
//          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
        handleNumericDVUpdates(fieldInfos, dvUpdates.numericDVUpdates, trackingDir, docValuesFormat, reader, newDVFiles);
        
//        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
        handleBinaryDVUpdates(fieldInfos, dvUpdates.binaryDVUpdates, trackingDir, docValuesFormat, reader, newDVFiles);


//        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: write fieldInfos; seg=" + info);
        fieldInfosFiles = writeFieldInfosGen(fieldInfos, trackingDir, docValuesFormat, codec.fieldInfosFormat());
      } finally {
        if (reader != this.reader) {
//          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
          reader.close();
        }

View Full Code Here

   throws Exception {
      IOContext context = newIOContext(random());
      SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);
      SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);


      final Codec codec = Codec.getDefault();
      TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
      final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null);


      SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2),
          si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,

View Full Code Here

    // main directory
    Directory dir = newDirectory();
    // two auxiliary directories
    Directory aux = newDirectory();
    Directory aux2 = newDirectory();
    Codec codec = new CustomPerFieldCodec();
    IndexWriter writer = null;


    writer = newWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,
        new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE).setCodec(codec));
    // add 100 documents

View Full Code Here

    assertTrue(reader1 != null);
    assertTrue(reader2 != null);
  }


  public void testMerge() throws IOException {
    final Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, -1, false, codec, null);


    SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(reader1, reader2),
        si, InfoStream.getDefault(), mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,
        MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()), true);

View Full Code Here


    if (termsIndexDivisor == 0) {
      throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
    }
    
    final Codec codec = si.info.getCodec();
    final Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.


    boolean success = false;
    
    try {
      if (si.info.getUseCompoundFile()) {
        cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(si.info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
      } else {
        cfsReader = null;
        cfsDir = dir;
      }
      fieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info.name, IOContext.READONCE);


      this.termsIndexDivisor = termsIndexDivisor;
      final PostingsFormat format = codec.postingsFormat();
      final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context, termsIndexDivisor);
      // Ask codec for its Fields
      fields = format.fieldsProducer(segmentReadState);
      assert fields != null;
      // ask codec for its Norms: 
      // TODO: since we don't write any norms file if there are no norms,
      // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!


      if (fieldInfos.hasDocValues()) {
        dvProducer = codec.docValuesFormat().fieldsProducer(segmentReadState);
        assert dvProducer != null;
      } else {
        dvProducer = null;
      }


      if (fieldInfos.hasNorms()) {
        normsProducer = codec.normsFormat().normsProducer(segmentReadState);
        assert normsProducer != null;
      } else {
        normsProducer = null;
      }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.codecs.Codec

org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition

org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinitionTest

org.apache.lucene.codecs.lucene3x.TestImpersonation

org.apache.lucene.codecs.lucene40.TestReuseDocsEnum

org.apache.lucene.codecs.perfield.TestPerFieldPostingsFormat2

org.apache.lucene.codecs.pulsing.Test10KPulsings

org.apache.lucene.codecs.pulsing.TestPulsingReuse

org.apache.lucene.index.BasePostingsFormatTestCase

org.apache.lucene.index.BaseStoredFieldsFormatTestCase

org.apache.lucene.index.CheckIndex

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.