Package com.gentics.cr.lucene.autocomplete

Source Code of com.gentics.cr.lucene.autocomplete.AutocompleteIndexJob

package com.gentics.cr.lucene.autocomplete;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.LuceneDictionary;

import com.gentics.cr.CRConfig;
import com.gentics.cr.CRConfigUtil;
import com.gentics.cr.exceptions.CRException;
import com.gentics.cr.lucene.indexaccessor.IndexAccessor;
import com.gentics.cr.lucene.indexer.index.LuceneIndexLocation;
import com.gentics.cr.monitoring.MonitorFactory;
import com.gentics.cr.monitoring.UseCase;
import com.gentics.cr.util.indexing.AbstractUpdateCheckerJob;
import com.gentics.cr.util.indexing.IndexLocation;

/**
* This job is used to re-index (or newly index) the autocomplete-index
*
* @author Sebastian Vogel <s.vogel@gentics.com>
*
*/
public class AutocompleteIndexJob extends AbstractUpdateCheckerJob implements AutocompleteConfigurationKeys {

  private AutocompleteIndexExtension autocompleter;

  public AutocompleteIndexJob(CRConfig updateCheckerConfig, IndexLocation indexLoc,
    AutocompleteIndexExtension autocompleter) {
    super(updateCheckerConfig, indexLoc, null);

    this.identifyer = identifyer.concat(":reIndex");
    log = Logger.getLogger(AutocompleteIndexJob.class);
    this.autocompleter = autocompleter;
  }

  /**
   * starts the job - is called by the IndexJobQueue
   */
  @Override
  protected void indexCR(IndexLocation indexLocation, CRConfigUtil config) throws CRException {
    try {
      reIndex();
    } catch (IOException e) {
      throw new CRException("Could not access the Autocomplete index! " + e.getMessage());
    }

  }

  private synchronized void reIndex() throws IOException {
    UseCase ucReIndex = MonitorFactory.startUseCase("reIndex()");
    // build a dictionary (from the spell package)
    log.debug("Starting to reindex autocomplete index.");

    LuceneIndexLocation source = this.autocompleter.getSource();
    LuceneIndexLocation autocompleteLocation = this.autocompleter.getAutocompleteLocation();
    String autocompletefield = this.autocompleter.getAutocompletefield();

    IndexAccessor sia = source.getAccessor();
    IndexReader sourceReader = sia.getReader(false);
    LuceneDictionary dict = new LuceneDictionary(sourceReader, autocompletefield);
    IndexAccessor aia = autocompleteLocation.getAccessor();
    // IndexReader reader = aia.getReader(false);
    IndexWriter writer = aia.getWriter();

    try {
      writer.setMergeFactor(300);
      writer.setMaxBufferedDocs(150);
      // go through every word, storing the original word (incl. n-grams)
      // and the number of times it occurs
      // CREATE WORD LIST FROM SOURCE INDEX
      Map<String, Integer> wordsMap = new HashMap<String, Integer>();
      Iterator<String> iter = (Iterator<String>) dict.getWordsIterator();
      while (iter.hasNext()) {
        String word = iter.next();
        int len = word.length();
        if (len < 3) {
          continue; // too short we bail but "too long" is fine...
        }
        if (wordsMap.containsKey(word)) {
          throw new IllegalStateException("Lucene returned a bad word list");
        } else {
          // use the number of documents this word appears in
          wordsMap.put(word, sourceReader.docFreq(new Term(autocompletefield, word)));
        }
      }
      // DELETE OLD OBJECTS FROM INDEX
      writer.deleteAll();

      // UPDATE DOCUMENTS IN AUTOCOMPLETE INDEX
      for (String word : wordsMap.keySet()) {
        // ok index the word
        Document doc = new Document();
        doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // orig term
        doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed
        doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS)); // count
        writer.addDocument(doc);
      }
      writer.optimize();
      autocompleteLocation.createReopenFile();
    } finally {

      sia.release(sourceReader, false);
      // close writer

      aia.release(writer);
      // aia.release(reader,false);
    }
    log.debug("Finished reindexing autocomplete index.");
    ucReIndex.stop();
  }

}
TOP

Related Classes of com.gentics.cr.lucene.autocomplete.AutocompleteIndexJob

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.