Package com.gentics.cr.lucene.indexaccessor

Examples of com.gentics.cr.lucene.indexaccessor.IndexAccessor


  public final String[] suggestSimilar(final String word, final int numSug, final IndexReader ir, final String field,
      final boolean morePopular) throws IOException {
    // obtainSearcher calls ensureOpen
    ensureOpen();

    IndexAccessor ia = this.spellIndex.getAccessor();
    final IndexSearcher indexSearcher = (IndexSearcher) ia.getPrioritizedSearcher();

    try {
      float min = this.minScore;
      int minfrq = this.minDFreq;
      final int lengthWord = word.length();

      Collection<String> fieldnames = null;

      int intfreq = 0;
      if (ir != null) {
        if (field != null) {
          if ("all".equalsIgnoreCase(field) || (field != null && field.contains(","))) {
            if ("all".equalsIgnoreCase(field)) {
              fieldnames = ir.getFieldNames(FieldOption.ALL);
            } else {
              String[] arr = field.split(",");
              fieldnames = Arrays.asList(arr);
            }

            for (String fieldname : fieldnames) {
              intfreq += ir.docFreq(new Term(fieldname, word));
            }
          } else {
            intfreq += ir.docFreq(new Term(field, word));
          }
        }
      }

      final int freq = intfreq;
      int xfreq = 0;
      if (morePopular && ir != null && field != null) {
        xfreq = freq;
      }
      final int goalFreq = xfreq;

      // if the word exists in the real index and we don't
      // care for word frequency, return the word itself
      if (!morePopular && freq > 0) {
        return new String[] { word };
      }

      BooleanQuery query = new BooleanQuery();
      String[] grams;
      String key;

      for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {

        key = "gram" + ng; // form key

        grams = formGrams(word, ng); // form word into ngrams (allow
                        // dups too)

        if (grams.length == 0) {
          continue; // hmm
        }

        if (bStart > 0) { // should we boost prefixes?
          add(query, "start" + ng, grams[0], bStart); // matches start
                                // of word

        }
        if (bEnd > 0) { // should we boost suffixes
          add(query, "end" + ng, grams[grams.length - 1], bEnd);
          // matches end of word

        }
        for (int i = 0; i < grams.length; i++) {
          add(query, key, grams[i]);
        }
      }

      int maxHits = TEN * numSug;

      // System.out.println("Q: " + query);
      ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
      // System.out.println("HITS: " + hits.length());
      CustomSuggestWordQueue sugQueue = new CustomSuggestWordQueue(numSug);

      // go thru more than 'maxr' matches in case the distance filter
      // triggers
      int stop = Math.min(hits.length, maxHits);
      CustomSuggestWord sugWord = new CustomSuggestWord();
      for (int i = 0; i < stop; i++) {

        sugWord.setString(indexSearcher.doc(hits[i].doc).get(F_WORD));
        // get orig word

        log.debug("DYM found term: " + sugWord.getString());

        // don't suggest a word for itself, that would be silly
        if (sugWord.getString().equals(word)) {
          log.debug("  Found word is the same as input word (" + word + ") -> next");
          continue;
        }

        // edit distance
        sugWord.setScore(sd.getDistance(word, sugWord.getString()));
        log.debug("  Distance score: " + sugWord.getScore());
        if (sugWord.getScore() < min) {
          log.debug("  Found word does not match min score (" + min + ") -> next");
          continue;
        }

        if (ir != null && field != null) { // use the user index

          String sugterm = sugWord.getString();
          int sugfreq = 0;
          if (fieldnames != null) {
            for (String fieldname : fieldnames) {
              sugfreq += ir.docFreq(new Term(fieldname, sugterm));
            }
          } else {
            sugfreq = ir.docFreq(new Term(field, sugterm));
          }
          sugWord.setFreq(sugfreq); // freq in the index
          log.debug("  DocFreq: " + sugWord.getFreq());
          // don't suggest a word that is not present in the field
          if ((morePopular && goalFreq > sugWord.getFreq()) || sugWord.getFreq() < minfrq) {
            log.debug("  Found word doese not match min frequency (" + minfrq + ") -> next");
            continue;
          }
        }
        sugQueue.insertWithOverflow(sugWord);
        if (sugQueue.size() == numSug) {
          // if queue full, maintain the minScore score
          min = sugQueue.top().getScore();
        }
        sugWord = new CustomSuggestWord();
      }

      // convert to array string
      String[] list = new String[sugQueue.size()];
      for (int i = sugQueue.size() - 1; i >= 0; i--) {
        list[i] = sugQueue.pop().getString();
      }

      return list;
    } finally {
      if (ia != null && indexSearcher != null) {
        ia.release(indexSearcher);
      }
    }
  }
View Full Code Here


   *
   * @throws IOException in case of error
   */
  public final void clearIndex() throws IOException {
    ensureOpen();
    final IndexAccessor accessor = this.spellIndex.getAccessor();
    final IndexWriter writer = accessor.getWriter();
    try {
      writer.deleteAll();
      this.spellIndex.createReopenFile();
    } finally {
      if (accessor != null && writer != null) {
        accessor.release(writer);
      }
    }
  }
View Full Code Here

   * @throws IOException in case of error
   * @return true if the word exists in the index
   */
  public final boolean exist(final String word) throws IOException {
    ensureOpen();
    final IndexAccessor accessor = this.spellIndex.getAccessor();
    final IndexSearcher indexSearcher = (IndexSearcher) accessor.getPrioritizedSearcher();
    try {
      return indexSearcher.docFreq(F_WORD_TERM.createTerm(word)) > 0;
    } finally {
      if (accessor != null && indexSearcher != null) {
        accessor.release(indexSearcher);
      }
    }
  }
View Full Code Here

   */
  public final void indexDictionary(final Dictionary dict) throws IOException {
    synchronized (modifyCurrentIndexLock) {
      ensureOpen();

      final IndexAccessor accessor = this.spellIndex.getAccessor();
      final IndexWriter writer = accessor.getWriter();
      writer.setMergeFactor(300);
      final IndexSearcher indexSearcher = (IndexSearcher) accessor.getPrioritizedSearcher();
      int obj_count = 0;

      try {
        Iterator<String> iter = dict.getWordsIterator();
        while (iter.hasNext()) {
          String word = iter.next();

          int len = word.length();
          if (len < THREE) {
            continue; // too short we bail but "too long" is fine...
          }

          if (indexSearcher.docFreq(F_WORD_TERM.createTerm(word)) > 0) {
            // if the word already exist in the gramindex
            continue;
          }

          // ok index the word
          Document doc = createDocument(word, getMin(len), getMax(len));
          writer.addDocument(doc);
          obj_count++;
        }

      } finally {
        // if documents where added to the index create a reopen file and
        // optimize the writer
        if (obj_count > 0) {
          writer.optimize();
          this.spellIndex.createReopenFile();
        }
        accessor.release(writer);
        accessor.release(indexSearcher);
      }
    }
  }
View Full Code Here

    if (crid == null) {
      crid = this.identifyer;
    }
    fillBoostValues(config.getString(BOOSTED_ATTRIBUTES_KEY));

    IndexAccessor indexAccessor = null;
    IndexWriter indexWriter = null;
    IndexReader indexReader = null;
    TaxonomyAccessor taxonomyAccessor = null;
    TaxonomyWriter taxonomyWriter = null;
    LuceneIndexUpdateChecker luceneIndexUpdateChecker = null;
    boolean finishedIndexJobSuccessfull = false;
    boolean finishedIndexJobWithError = false;

    try {
      indexLocation.checkLock();
      Collection<CRResolvableBean> slice = null;
      try {
        status.setCurrentStatusString("Writer accquired. Starting" + "index job.");

        if (rp == null) {
          throw new CRException("FATAL ERROR", "RequestProcessor not available");
        }

        String bsString = (String) config.get(BATCH_SIZE_KEY);

        int crBatchSize = batchSize;

        if (bsString != null) {
          try {
            crBatchSize = Integer.parseInt(bsString);
          } catch (NumberFormatException e) {
            log.error("The configured " + BATCH_SIZE_KEY + " for the Current CR" + " did not contain a parsable integer. ", e);
          }
        }

        // and get the current rule
        String rule = (String) config.get(RULE_KEY);

        if (rule == null) {
          rule = "";
        }
        if (rule.length() == 0) {
          rule = "(1 == 1)";
        } else {
          rule = "(" + rule + ")";
        }

        List<ContentTransformer> transformerlist = ContentTransformer.getTransformerList(config);

        boolean create = true;

        if (indexLocation.isContainingIndex()) {
          create = false;
          log.debug("Index already exists.");
        }
        if (indexLocation instanceof LuceneIndexLocation) {
          luceneIndexUpdateChecker = new LuceneIndexUpdateChecker((LuceneIndexLocation) indexLocation, CR_FIELD_KEY, crid,
              idAttribute);
        } else {
          log.error("IndexLocation is not created for Lucene. " + "Using the " + CRLuceneIndexJob.class.getName()
              + " requires that you use the " + LuceneIndexLocation.class.getName()
              + ". You can configure another Job by setting the " + IndexLocation.UPDATEJOBCLASS_KEY + " key in your config.");
          throw new CRException(new CRError("Error", "IndexLocation is not created for Lucene."));
        }
        Collection<CRResolvableBean> objectsToIndex = null;
        //Clear Index and remove stale Documents
        //if (!create) {
        log.debug("Will do differential index.");
        try {
          CRRequest req = new CRRequest();
          req.setRequestFilter(rule);
          req.set(CR_FIELD_KEY, crid);
          status.setCurrentStatusString("Get objects to update " + "in the index ...");
          objectsToIndex = getObjectsToUpdate(req, rp, false, luceneIndexUpdateChecker);
        } catch (Exception e) {
          log.error("ERROR while cleaning index", e);
        }
        //}
        //Obtain accessor and writer after clean
        if (indexLocation instanceof LuceneIndexLocation) {
          indexAccessor = ((LuceneIndexLocation) indexLocation).getAccessor();
          indexWriter = indexAccessor.getWriter();
          indexReader = indexAccessor.getReader(false);
          useFacets = ((LuceneIndexLocation) indexLocation).useFacets();
          if (useFacets) {
            taxonomyAccessor = ((LuceneIndexLocation) indexLocation).getTaxonomyAccessor();
            taxonomyWriter = taxonomyAccessor.getTaxonomyWriter();
          }
        } else {
          log.error("IndexLocation is not created for Lucene. " + "Using the " + CRLuceneIndexJob.class.getName()
              + " requires that you use the " + LuceneIndexLocation.class.getName()
              + ". You can configure another Job by setting the " + IndexLocation.UPDATEJOBCLASS_KEY + " key in your config.");
          throw new CRException(new CRError("Error", "IndexLocation is not created for Lucene."));
        }
        log.debug("Using rule: " + rule);
        // prepare the map of indexed/stored attributes
        Map<String, Boolean> attributes = new HashMap<String, Boolean>();
        List<String> containedAttributes = IndexerUtil.getListFromString(config.getString(CONTAINED_ATTRIBUTES_KEY), ",");
        List<String> indexedAttributes = IndexerUtil.getListFromString(config.getString(INDEXED_ATTRIBUTES_KEY), ",");
        List<String> reverseAttributes = ((LuceneIndexLocation) indexLocation).getReverseAttributes();
        // first put all indexed attributes into the map
        for (String name : indexedAttributes) {
          attributes.put(name, Boolean.FALSE);
        }

        // now put all contained attributes
        for (String name : containedAttributes) {
          attributes.put(name, Boolean.TRUE);
        }
        // finally, put the "contentid" (always contained)
        attributes.put(idAttribute, Boolean.TRUE);

        if (objectsToIndex == null) {
          log.debug("Rule returned no objects to index. Skipping...");
          return;
        }

        status.setObjectCount(objectsToIndex.size());
        log.debug(" index job with " + objectsToIndex.size() + " objects to index.");
        // now get the first batch of objects from the collection
        // (remove them from the original collection) and index them
        slice = new Vector(crBatchSize);
        int sliceCounter = 0;

        status.setCurrentStatusString("Starting to index slices.");
        boolean interrupted = Thread.currentThread().isInterrupted();
        for (Iterator<CRResolvableBean> iterator = objectsToIndex.iterator(); iterator.hasNext();) {
          CRResolvableBean obj = iterator.next();
          slice.add(obj);
          iterator.remove();
          sliceCounter++;
          if (Thread.currentThread().isInterrupted()) {
            interrupted = true;
            break;
          }
          if (sliceCounter == crBatchSize) {
            // index the current slice
            log.debug("Indexing slice with " + slice.size() + " objects.");
            indexSlice(
              crid,
              indexWriter,
              indexReader,
              slice,
              attributes,
              rp,
              create,
              config,
              transformerlist,
              reverseAttributes,
              taxonomyWriter,
              taxonomyAccessor);
            // clear the slice and reset the counter
            slice.clear();
            sliceCounter = 0;
          }
        }

        if (!slice.isEmpty()) {
          // index the last slice
          indexSlice(
            crid,
            indexWriter,
            indexReader,
            slice,
            attributes,
            rp,
            create,
            config,
            transformerlist,
            reverseAttributes,
            taxonomyWriter,
            taxonomyAccessor);
        }
        if (!interrupted) {
          // Only Optimize the Index if the thread
          // has not been interrupted
          if (optimize) {
            log.debug("Executing optimize command.");
            UseCase uc = MonitorFactory.startUseCase("optimize(" + crid + ")");
            try {
              indexWriter.optimize();
            } finally {
              uc.stop();
            }
          } else if (maxSegmentsString != null) {
            log.debug("Executing optimize command with max" + " segments: " + maxSegmentsString);
            int maxs = Integer.parseInt(maxSegmentsString);
            UseCase uc = MonitorFactory.startUseCase("optimize(" + crid + ")");
            try {
              indexWriter.optimize(maxs);
            } finally {
              uc.stop();
            }
          }
        } else {
          log.debug("Job has been interrupted and will now be closed." + " Missing objects " + "will be reindexed next run.");
        }
        finishedIndexJobSuccessfull = true;
      } catch (Exception ex) {
        log.error("Could not complete index run... indexed Objects: " + status.getObjectsDone()
            + ", trying to close index and remove lock.", ex);
        finishedIndexJobWithError = true;
        status.setError("Could not complete index run... indexed " + "Objects: " + status.getObjectsDone()
            + ", trying to close index and remove lock.");
      } finally {
        if (!finishedIndexJobSuccessfull && !finishedIndexJobWithError) {
          log.fatal("There seems to be a run time exception from this" + " index job.\nLast slice was: " + slice);
        }
        //Set status for job if it was not locked
        status.setCurrentStatusString("Finished job.");
        int objectCount = status.getObjectsDone();
        log.debug("Indexed " + objectCount + " objects...");

        if (taxonomyAccessor != null && taxonomyWriter != null) {
          taxonomyAccessor.release(taxonomyWriter);
        }

        if (indexAccessor != null && indexWriter != null) {
          indexAccessor.release(indexWriter);
        }
        if (indexAccessor != null && indexReader != null) {
          indexAccessor.release(indexReader, false);
        }

        if (objectCount > 0) {
          indexLocation.createReopenFile();
        }
View Full Code Here

  @Override
  protected final void indexCR(final IndexLocation indexLocation, final CRConfigUtil config) throws CRException {
    if (indexLocation instanceof LuceneIndexLocation) {
      log.debug("Starting to optimize index.");
      LuceneIndexLocation luceneIndexLoccation = (LuceneIndexLocation) indexLocation;
      IndexAccessor ia = luceneIndexLoccation.getAccessor();
      IndexWriter writer = null;
      try {
        writer = ia.getWriter();
        writer.optimize();
      } catch (IOException e) {
        log.error("Optimize index.", e);
      } finally {
        ia.release(writer);
      }
      log.debug("Finished optimizing index.");
    } else {
      log.error("Index does not seem to be a Lucene index. Therfore no " + "optimizing will be done.");
    }
View Full Code Here

  private synchronized void reIndex() throws IOException {
    UseCase ucReIndex = MonitorFactory.startUseCase("reIndex()");
    // build a dictionary (from the spell package)
    log.debug("Starting to reindex didyoumean index.");
    IndexAccessor sourceAccessor = didyoumean.getSourceLocation().getAccessor();
    IndexReader sourceReader = sourceAccessor.getReader(false);
    CustomSpellChecker spellchecker = didyoumean.getSpellchecker();
    Collection<String> fields = null;

    if (didyoumean.isAll()) {
      fields = sourceReader.getFieldNames(IndexReader.FieldOption.ALL);
    } else {
      fields = didyoumean.getDym_fields();
    }
    try {
      for (String fieldname : fields) {
        LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldname);
        spellchecker.indexDictionary(dict);
      }
    } finally {
      if (sourceAccessor != null && sourceReader != null) {
        sourceAccessor.release(sourceReader, false);
      }
    }
    log.debug("Finished reindexing didyoumean index.");
    ucReIndex.stop();
  }
View Full Code Here

    LinkedHashMap<Document, Float> docs = objectToLinkedHashMapDocuments(searchResult
      .get(CRSearcher.RESULT_RESULT_KEY));

    LuceneIndexLocation idsLocation = LuceneIndexLocation.getIndexLocation(config);
    IndexAccessor indexAccessor = idsLocation.getAccessor();
    IndexReader reader = null;
    try {
      reader = indexAccessor.getReader(false);

      parsedQuery = parseHighlightQuery(request, reader, parsedQuery);

      processDocuments(docs, result, reader, parsedQuery);

    } catch (IOException e) {
      LOGGER.error("Cannot get Index reader for highlighting", e);
    } finally {
      indexAccessor.release(reader, false);
    }

    ucProcessSearchResolvables.stop();
    return result;
  }
View Full Code Here

  @Override
  protected final void indexCR(final IndexLocation indexLocation, final CRConfigUtil config) throws CRException {
    if (indexLocation instanceof LuceneIndexLocation) {
      log.debug("Starting to clear index.");
      LuceneIndexLocation luceneIndexLoccation = (LuceneIndexLocation) indexLocation;
      IndexAccessor ia = luceneIndexLoccation.getAccessor();
      IndexWriter writer = null;
      TaxonomyAccessor ta = null;
      if (luceneIndexLoccation.useFacets()) {
        ta = luceneIndexLoccation.getTaxonomyAccessor();
      }
      try {
        writer = ia.getWriter();
        writer.deleteAll();
        if (ta != null) {
          ta.clearTaxonomy();
        }
        luceneIndexLoccation.resetIndexJobCreationTimes();
      } catch (IOException e) {
        log.error("Could not clear index", e);
      } finally {
        ia.release(writer);
        if (ta != null) {
          ta.refresh();
        }
      }
      log.debug("Finished clearing index.");
View Full Code Here

  /**
   * Requests an optimize command on the index.
   */
  public void optimizeIndex() {
    IndexAccessor indexAccessor = getAccessor();
    IndexWriter indexWriter;
    try {
      indexWriter = indexAccessor.getWriter();
      indexWriter.optimize(true);
      indexAccessor.release(indexWriter);
    } catch (IOException e) {
      log.error(e.getMessage(), e);
    }
  }
View Full Code Here

TOP

Related Classes of com.gentics.cr.lucene.indexaccessor.IndexAccessor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.