Package org.apache.lucene.codecs

Examples of org.apache.lucene.codecs.Codec


  private FieldInfos currentFieldInfos;

  // maxAllowed = the "highest" we can index, but we will still
  // randomly index at lower IndexOption
  private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
    Codec codec = getCodec();
    SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", maxDoc, false, codec, null);

    int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
    if (VERBOSE) {
      System.out.println("\nTEST: now build index");
    }

    int maxIndexOptionNoOffsets = Arrays.asList(IndexOptions.values()).indexOf(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

    // TODO use allowPayloads

    FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()];
    for(int fieldUpto=0;fieldUpto<fields.size();fieldUpto++) {
      FieldInfo oldFieldInfo = fieldInfos.fieldInfo(fieldUpto);

      String pf = _TestUtil.getPostingsFormat(codec, oldFieldInfo.name);
      int fieldMaxIndexOption;
      if (doesntSupportOffsets.contains(pf)) {
        fieldMaxIndexOption = Math.min(maxIndexOptionNoOffsets, maxIndexOption);
      } else {
        fieldMaxIndexOption = maxIndexOption;
      }
   
      // Randomly picked the IndexOptions to index this
      // field with:
      IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? fieldMaxIndexOption : random().nextInt(1+fieldMaxIndexOption)];
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;

      newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
                                                   true,
                                                   fieldUpto,
                                                   false,
                                                   false,
                                                   doPayloads,
                                                   indexOptions,
                                                   null,
                                                   DocValuesType.NUMERIC,
                                                   null);
    }

    FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);

    // Estimate that flushed segment size will be 25% of
    // what we use in RAM:
    long bytes =  totalPostings * 8 + totalPayloadBytes;

    SegmentWriteState writeState = new SegmentWriteState(null, dir,
                                                         segmentInfo, newFieldInfos,
                                                         32, null, new IOContext(new FlushInfo(maxDoc, bytes)));
    FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState);

    for(Map.Entry<String,Map<BytesRef,Long>> fieldEnt : fields.entrySet()) {
      String field = fieldEnt.getKey();
      Map<BytesRef,Long> terms = fieldEnt.getValue();

      FieldInfo fieldInfo = newFieldInfos.fieldInfo(field);

      IndexOptions indexOptions = fieldInfo.getIndexOptions();

      if (VERBOSE) {
        System.out.println("field=" + field + " indexOtions=" + indexOptions);
      }

      boolean doFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
      boolean doPos = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
      boolean doOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
     
      TermsConsumer termsConsumer = fieldsConsumer.addField(fieldInfo);
      long sumTotalTF = 0;
      long sumDF = 0;
      FixedBitSet seenDocs = new FixedBitSet(maxDoc);
      for(Map.Entry<BytesRef,Long> termEnt : terms.entrySet()) {
        BytesRef term = termEnt.getKey();
        SeedPostings postings = getSeedPostings(term.utf8ToString(), termEnt.getValue(), false, maxAllowed);
        if (VERBOSE) {
          System.out.println("  term=" + field + ":" + term.utf8ToString() + " docFreq=" + postings.docFreq + " seed=" + termEnt.getValue());
        }
       
        PostingsConsumer postingsConsumer = termsConsumer.startTerm(term);
        long totalTF = 0;
        int docID = 0;
        while((docID = postings.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
          final int freq = postings.freq();
          if (VERBOSE) {
            System.out.println("    " + postings.upto + ": docID=" + docID + " freq=" + postings.freq);
          }
          postingsConsumer.startDoc(docID, doFreq ? postings.freq : -1);
          seenDocs.set(docID);
          if (doPos) {
            totalTF += postings.freq;
            for(int posUpto=0;posUpto<freq;posUpto++) {
              int pos = postings.nextPosition();
              BytesRef payload = postings.getPayload();

              if (VERBOSE) {
                if (doPayloads) {
                  System.out.println("      pos=" + pos + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
                } else {
                  System.out.println("      pos=" + pos);
                }
              }
              postingsConsumer.addPosition(pos, doPayloads ? payload : null,
                                           doOffsets ? postings.startOffset() : -1,
                                           doOffsets ? postings.endOffset() : -1);
            }
          } else if (doFreq) {
            totalTF += freq;
          } else {
            totalTF++;
          }
          postingsConsumer.finishDoc();
        }
        termsConsumer.finishTerm(term, new TermStats(postings.docFreq, doFreq ? totalTF : -1));
        sumTotalTF += totalTF;
        sumDF += postings.docFreq;
      }

      termsConsumer.finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.cardinality());
    }

    fieldsConsumer.close();

    if (VERBOSE) {
      System.out.println("TEST: after indexing: files=");
      for(String file : dir.listAll()) {
        System.out.println("  " + file + ": " + dir.fileLength(file) + " bytes");
      }
    }

    currentFieldInfos = newFieldInfos;

    SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1);

    return codec.postingsFormat().fieldsProducer(readState);
  }
View Full Code Here


    return result;
  }
 
  public void testWriteReadMerge() throws IOException {
    // get another codec, other than the default: so we are merging segments across different codecs
    final Codec otherCodec;
    if ("SimpleText".equals(Codec.getDefault().getName())) {
      otherCodec = new Lucene46Codec();
    } else {
      otherCodec = new SimpleTextCodec();
    }
View Full Code Here

   throws Exception {
      IOContext context = newIOContext(random());
      SegmentReader r1 = new SegmentReader(si1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);
      SegmentReader r2 = new SegmentReader(si2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, context);

      final Codec codec = Codec.getDefault();
      TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
      final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null);

      SegmentMerger merger = new SegmentMerger(Arrays.<AtomicReader>asList(r1, r2),
          si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,
View Full Code Here

    final FieldData field = new FieldData("field", builder, terms, true, false);
    final FieldData[] fields = new FieldData[] {field};
    final FieldInfos fieldInfos = builder.finish();
    final Directory dir = newDirectory();
    this.write(fieldInfos, dir, fields, true);
    Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

    final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));

    final Iterator<String> fieldsEnum = reader.iterator();
    String fieldName = fieldsEnum.next();
    assertNotNull(fieldName);
    final Terms terms2 = reader.terms(fieldName);
View Full Code Here

    if (VERBOSE) {
      System.out.println("TEST: now write postings");
    }

    this.write(fieldInfos, dir, fields, false);
    Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);

    if (VERBOSE) {
      System.out.println("TEST: now read postings");
    }
    final FieldsProducer terms = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR));

    final Verify[] threads = new Verify[NUM_TEST_THREADS-1];
    for(int i=0;i<NUM_TEST_THREADS-1;i++) {
      threads[i] = new Verify(si, fields, terms);
      threads[i].setDaemon(true);
View Full Code Here

  }

  private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable {

    final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27);
    final Codec codec = Codec.getDefault();
    final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null);
    final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random()));

    final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state);
    Arrays.sort(fields);
    for (final FieldData field : fields) {
      if (!allowPreFlex && codec instanceof Lucene3xCodec) {
        // code below expects unicode sort order
        continue;
View Full Code Here

    add(diagsTable, r1);
    Object c1 = create("cell");
    setBoolean(c1, "enabled", false);
    add(r1, c1);
    // codec info
    Codec codec = si.info.getCodec();
    map = new LinkedHashMap<String,String>();
    map.put("codecName", codec.getName());
    map.put("codecClassName", codec.getClass().getName());
    map.put("docValuesFormat", codec.docValuesFormat().getClass().getName());
    map.put("fieldInfosFormat", codec.fieldInfosFormat().getClass().getName());
    map.put("liveDocsFormat", codec.liveDocsFormat().getClass().getName());
    map.put("normsFormat", codec.normsFormat().getClass().getName());
    map.put("postingsFormat", codec.postingsFormat().toString() + " " + codec.postingsFormat().getClass().getName());
    map.put("segmentInfoFormat", codec.segmentInfoFormat().getClass().getName());
    map.put("storedFieldsFormat", codec.storedFieldsFormat().getClass().getName());
    map.put("termVectorsFormat", codec.termVectorsFormat().getClass().getName());
    try {
      List<String> files = new ArrayList<String>(si.files());
      Collections.sort(files);
      map.put("---files---", files.toString());
      if (si.info.getUseCompoundFile()) {
View Full Code Here

        if (status == null) {
          errorMsg("You need to run 'Check Index' first.");
          return;
        }
        // use codec from the first valid segment
        Codec c = null;
        for (SegmentInfoStatus ssi : status.segmentInfos) {
          if (ssi.codec != null) {
            c = ssi.codec;
            break;
          }
View Full Code Here

    // We can write directly to the actual name (vs to a
    // .tmp & renaming it) because the file is not live
    // until segments file is written:
    boolean success = false;
    try {
      Codec codec = info.info.getCodec();
      codec.liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, trackingDir, info, pendingDeleteCount, IOContext.DEFAULT);
      success = true;
    } finally {
      if (!success) {
        // Advance only the nextWriteDelGen so that a 2nd
        // attempt to write will write to a new file
View Full Code Here

    TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
   
    FieldInfos fieldInfos = null;
    boolean success = false;
    try {
      final Codec codec = info.info.getCodec();

      // reader could be null e.g. for a just merged segment (from
      // IndexWriter.commitMergedDeletes).
      final SegmentReader reader = this.reader == null ? new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), IOContext.READONCE) : this.reader;
      try {
        // clone FieldInfos so that we can update their dvGen separately from
        // the reader's infos and write them to a new fieldInfos_gen file
        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
        // cannot use builder.add(reader.getFieldInfos()) because it does not
        // clone FI.attributes as well FI.dvGen
        for (FieldInfo fi : reader.getFieldInfos()) {
          FieldInfo clone = builder.add(fi);
          // copy the stuff FieldInfos.Builder doesn't copy
          if (fi.attributes() != null) {
            for (Entry<String,String> e : fi.attributes().entrySet()) {
              clone.putAttribute(e.getKey(), e.getValue());
            }
          }
          clone.setDocValuesGen(fi.getDocValuesGen());
        }
        // create new fields or update existing ones to have NumericDV type
        for (String f : numericFieldUpdates.keySet()) {
          builder.addOrUpdate(f, NumericDocValuesField.TYPE);
        }
       
        fieldInfos = builder.finish();
        final long nextFieldInfosGen = info.getNextFieldInfosGen();
        final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
        final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, writer.getConfig().getTermIndexInterval(), null, IOContext.DEFAULT, segmentSuffix);
        final DocValuesFormat docValuesFormat = codec.docValuesFormat();
        final DocValuesConsumer fieldsConsumer = docValuesFormat.fieldsConsumer(state);
        boolean fieldsConsumerSuccess = false;
        try {
//          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: applying updates; seg=" + info + " updates=" + numericUpdates);
          for (Entry<String,NumericFieldUpdates> e : numericFieldUpdates.entrySet()) {
            final String field = e.getKey();
            final NumericFieldUpdates fieldUpdates = e.getValue();
            final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
            assert fieldInfo != null;

            fieldInfo.setDocValuesGen(nextFieldInfosGen);
            // write the numeric updates to a new gen'd docvalues file
            fieldsConsumer.addNumericField(fieldInfo, new Iterable<Number>() {
              final NumericDocValues currentValues = reader.getNumericDocValues(field);
              final Bits docsWithField = reader.getDocsWithField(field);
              final int maxDoc = reader.maxDoc();
              final UpdatesIterator updatesIter = fieldUpdates.getUpdates();
              @Override
              public Iterator<Number> iterator() {
                updatesIter.reset();
                return new Iterator<Number>() {

                  int curDoc = -1;
                  int updateDoc = updatesIter.nextDoc();
                 
                  @Override
                  public boolean hasNext() {
                    return curDoc < maxDoc - 1;
                  }

                  @Override
                  public Number next() {
                    if (++curDoc >= maxDoc) {
                      throw new NoSuchElementException("no more documents to return values for");
                    }
                    if (curDoc == updateDoc) { // this document has an updated value
                      Long value = updatesIter.value(); // either null (unset value) or updated value
                      updateDoc = updatesIter.nextDoc(); // prepare for next round
                      return value;
                    } else {
                      // no update for this document
                      assert curDoc < updateDoc;
                      if (currentValues != null && docsWithField.get(curDoc)) {
                        // only read the current value if the document had a value before
                        return currentValues.get(curDoc);
                      } else {
                        return null;
                      }
                    }
                  }

                  @Override
                  public void remove() {
                    throw new UnsupportedOperationException("this iterator does not support removing elements");
                  }
                };
              }
            });
          }
         
          codec.fieldInfosFormat().getFieldInfosWriter().write(trackingDir, info.info.name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
          fieldsConsumerSuccess = true;
        } finally {
          if (fieldsConsumerSuccess) {
            fieldsConsumer.close();
          } else {
View Full Code Here

TOP

Related Classes of org.apache.lucene.codecs.Codec

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.