Examples of org.apache.lucene.index.IndexReader

org.apache.lucene.index.IndexReader
IndexReader is an abstract class, providing an interface for accessing an index. Search of an index is done entirely through this abstract interface, so that any subclass which implements it is searchable.
Concrete subclasses of IndexReader are usually constructed with a call to one of the static open() methods, e.g. {@link #open(Directory,boolean)}.
For efficiency, in this API documents are often referred to via document numbers, non-negative integers which each name a unique document in the index. These document numbers are ephemeral--they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions.
An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used to delete documents from the index then.
NOTE: for backwards API compatibility, several methods are not listed as abstract, but have no useful implementations in this base class and instead always throw UnsupportedOperationException. Subclasses are strongly encouraged to override these methods, but in many cases may not need to.

NOTE: as of 2.4, it's possible to open a read-only IndexReader using the static open methods that accept the boolean readOnly parameter. Such a reader has better concurrency as it's not necessary to synchronize on the isDeleted method. You must specify false if you want to make changes with the resulting IndexReader.

NOTE: {@link IndexReader} instances are completely threadsafe, meaning multiple threads can call any of its methods, concurrently. If your application requires external synchronization, you should not synchronize on the IndexReader instance; use your own (non-Lucene) objects instead.

            EntityManager em = (EntityManager) Component.getInstance("entityManager");
            FullTextSession ftSession = (FullTextSession)em.getDelegate();


            // Delete all documents with "_hibernate_class" term of the selected entity
            DirectoryProvider dirProvider = ftSession.getSearchFactory().getDirectoryProviders(entityClass)[0];
            IndexReader reader = IndexReader.open(dirProvider.getDirectory());


            // TODO: This is using an internal term of HSearch
            reader.deleteDocuments(new Term("_hibernate_class", entityClass.getName()));
            reader.close();


            // Optimize index
            progress.setStatus("Optimizing index");
            log.debug("optimizing index (merging segments)");
            ftSession.getSearchFactory().optimize(entityClass);

View Full Code Here


        EntityManager em = (EntityManager) Component.getInstance("entityManager");


        for (SearchableEntity indexedEntity : indexedEntities) {
            DirectoryProvider dirProvider = ((FullTextSession)em.getDelegate()).getSearchFactory().getDirectoryProviders(indexedEntity.getClazz())[0];
            IndexReader reader = IndexReader.open(dirProvider.getDirectory());


            indexedEntity.setNumOfIndexedDocuments(reader.numDocs());


            TermEnum te = reader.terms();
            long numTerms = 0;
            while (te.next()) numTerms++;
            indexedEntity.setNumOfIndexedTerms(numTerms);


            long size = 0;
            String [] fileNames = dirProvider.getDirectory().list();
            for (String fileName : fileNames) {
                size += dirProvider.getDirectory().fileLength(fileName);
            }
            indexedEntity.setIndexSizeInBytes(size);


            reader.close();
        }
    }

View Full Code Here

          deleteQueue.clear();
        }
        // 1. Open Index Reader
        File indexFile = new File(indexPath);
        Directory directory = FSDirectory.open(indexFile);
        IndexReader indexReader = IndexReader.open(directory);


        log.info("befor delete: indexReader.numDocs()=" + indexReader.numDocs());
        // 2. Delete old Document
        // loop over all documents in updateQueue
        for (int i = 0; i < updateCopy.size(); i++) {
          String resourceUrl = updateCopy.get(i).get(OlatDocument.RESOURCEURL_FIELD_NAME);
          Term term = new Term(OlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl );
          log.info("updateQueue:delete documents with resourceUrl=" + resourceUrl );
          indexReader.deleteDocuments(term);          
        }
        // loop over all documents in deleteQueue
        for (int i = 0; i < deleteCopy.size(); i++) {
          String resourceUrl = deleteCopy.get(i).get(OlatDocument.RESOURCEURL_FIELD_NAME);
          Term term = new Term(OlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl );
          log.info("deleteQueue:delete documents with resourceUrl='" + resourceUrl + "'");
          indexReader.deleteDocuments(term);
          
        }
        log.info("after delete: indexReader.numDocs()=" + indexReader.numDocs());
        // 3. Close reader
        indexReader.close();
        directory.close();
        
        // 4. open writer
        IndexWriter indexWriter = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.UNLIMITED);
        indexWriter.setMergeFactor(INDEX_MERGE_FACTOR); //for better performance

View Full Code Here

   * Creates a new spell-check index based on search-index 
   *
   */
  public void createSpellIndex() {
    if (isSpellCheckEnabled) {
      IndexReader indexReader = null;
      try {
        log.info("Start generating Spell-Index...");
        long startSpellIndexTime = 0;
        if (log.isDebug()) startSpellIndexTime = System.currentTimeMillis();
        Directory indexDir = FSDirectory.open(new File(indexPath));
        indexReader = IndexReader.open(indexDir);
        // 1. Create content spellIndex 
        File spellDictionaryFile = new File(spellDictionaryPath);
        Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));//true
        SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
        Dictionary contentDictionary = new LuceneDictionary(indexReader, OlatDocument.CONTENT_FIELD_NAME);
        contentSpellChecker.indexDictionary(contentDictionary);
        // 2. Create title spellIndex 
        Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));//true
        SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
        Dictionary titleDictionary = new LuceneDictionary(indexReader, OlatDocument.TITLE_FIELD_NAME);
        titleSpellChecker.indexDictionary(titleDictionary);
        // 3. Create description spellIndex 
        Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));//true
        SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
        Dictionary descriptionDictionary = new LuceneDictionary(indexReader, OlatDocument.DESCRIPTION_FIELD_NAME);
        descriptionSpellChecker.indexDictionary(descriptionDictionary);
        // 4. Create author spellIndex 
        Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));//true
        SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
        Dictionary authorDictionary = new LuceneDictionary(indexReader, OlatDocument.AUTHOR_FIELD_NAME);
        authorSpellChecker.indexDictionary(authorDictionary);
        
        // Merge all part spell indexes (content,title etc.) to one common spell index
        Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);//true
        IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
        Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory};
        merger.addIndexesNoOptimize(directories);
        merger.optimize();
        merger.close();
        spellChecker = new SpellChecker(spellIndexDirectory);
        spellChecker.setAccuracy(0.7f);
         if (log.isDebug()) log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
        log.info("New generated Spell-Index ready to use.");
      } catch(IOException ioEx) { 
        log.warn("Can not create SpellIndex",ioEx);
      } finally {
        if (indexReader != null) {
            try {
              indexReader.close();
            } catch (IOException e) {
              log.warn("Can not close indexReader properly",e);
            }
        }
      }

View Full Code Here

      doc.add(fd);
      writer.addDocument(doc);
      writer.optimize();
      writer.close();
      
      IndexReader reader = IndexReader.open(ramDir);
      String queryString = QUERY;
      QueryParser parser=new QueryParser(FIELD_NAME, /*new StandardAnalyzer()/*/XFactory.getWriterAnalyzer());
      Query query = parser.parse(queryString);
      System.out.println(query);
      Searcher searcher = new IndexSearcher(ramDir);
      query = query.rewrite(reader);
      System.out.println(query);
      System.out.println("Searching for: " + query.toString(FIELD_NAME));
      Hits hits = searcher.search(query);
      
      BoldFormatter formatter = new BoldFormatter();
      Highlighter highlighter =new Highlighter(formatter,new QueryScorer(query));
      highlighter.setTextFragmenter(new SimpleFragmenter(50));
      for (int i = 0; i < hits.length(); i++)
      {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector)reader.getTermFreqVector(hits.id(i),FIELD_NAME); 
        TokenStream tokenStream=TokenSources.getTokenStream(tpv);
        /*
        TokenStream tokenStream2=
          (new StandardAnalyzer())
          //XFactory.getWriterAnalyzer()
            .tokenStream(FIELD_NAME,new StringReader(text));
        
        do {
          Token t = tokenStream2.next();
          if(t==null)break;
          System.out.println("\t" + t.startOffset() + "," + t.endOffset() + "\t" + t.termText());
        }while(true);
        */
        String result =
          highlighter.getBestFragments(
            tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
      }
      reader.close();
    }catch(Exception e) {
      e.printStackTrace();
    }
  }

View Full Code Here

    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();


    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    TopDocs hits = searcher.search(query, 10);


    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
        query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.scoreDocs.length; i++) {
      int docId = hits.scoreDocs[i].doc;
      Document hit = searcher.doc(docId);
      String text = hit.get(FIELD_NAME);
      int maxNumFragmentsRequired = 5;
      String fragmentSeparator = "...";
      TermPositionVector tpv = (TermPositionVector) reader
          .getTermFreqVector(docId, FIELD_NAME);
      TokenStream tokenStream = TokenSources.getTokenStream(tpv);
      String result = highlighter.getBestFragments(tokenStream, text,
          maxNumFragmentsRequired, fragmentSeparator);
      System.out.println("\n" + result);
    }
    reader.close();
  }

View Full Code Here

    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();


    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    TopDocs hits = searcher.search(query, 10);    


    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
        query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.scoreDocs.length; i++) {
      int docId = hits.scoreDocs[i].doc;
      Document hit = searcher.doc(docId);
      String text = hit.get(FIELD_NAME);
      int maxNumFragmentsRequired = 5;
      String fragmentSeparator = "...";
      TermPositionVector tpv = (TermPositionVector) reader
          .getTermFreqVector(docId, FIELD_NAME);
      TokenStream tokenStream = TokenSources.getTokenStream(tpv);
      String result = highlighter.getBestFragments(tokenStream, text,
          maxNumFragmentsRequired, fragmentSeparator);
      System.out.println("\n" + result);
    }
    reader.close();
  }

View Full Code Here

    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();


    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    TopDocs hits = searcher.search(query, 10);    


    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
        query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.scoreDocs.length; i++) {
      int docId = hits.scoreDocs[i].doc;
      Document hit = searcher.doc(docId);
      String text = hit.get(FIELD_NAME);
      int maxNumFragmentsRequired = 5;
      String fragmentSeparator = "...";
      TermPositionVector tpv = (TermPositionVector) reader
          .getTermFreqVector(docId, FIELD_NAME);
      TokenStream tokenStream = TokenSources.getTokenStream(tpv);
      String result = highlighter.getBestFragments(tokenStream, text,
          maxNumFragmentsRequired, fragmentSeparator);
      System.out.println("\n" + result);
    }
    reader.close();
  }

View Full Code Here

    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();


    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    TopDocs hits = searcher.search(query, 10);


    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
        query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.scoreDocs.length; i++) {
      int docId = hits.scoreDocs[i].doc;
      Document hit = searcher.doc(docId);
      String text = hit.get(FIELD_NAME);
      int maxNumFragmentsRequired = 5;
      String fragmentSeparator = "...";
      TermPositionVector tpv = (TermPositionVector) reader
          .getTermFreqVector(docId, FIELD_NAME);
      TokenStream tokenStream = TokenSources.getTokenStream(tpv);
      String result = highlighter.getBestFragments(tokenStream, text,
          maxNumFragmentsRequired, fragmentSeparator);
      System.out.println("\n" + result);
    }
    reader.close();
  }

View Full Code Here

    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();


    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    TopDocs hits = searcher.search(query, 10);


    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
        query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.scoreDocs.length; i++) {
      int docId = hits.scoreDocs[i].doc;
      Document hit = searcher.doc(docId);
      String text = hit.get(FIELD_NAME);
      int maxNumFragmentsRequired = 5;
      String fragmentSeparator = "...";
      TermPositionVector tpv = (TermPositionVector) reader
          .getTermFreqVector(docId, FIELD_NAME);
      TokenStream tokenStream = TokenSources.getTokenStream(tpv);
      String result = highlighter.getBestFragments(tokenStream, text,
          maxNumFragmentsRequired, fragmentSeparator);
      System.out.println("\n" + result);
    }
    reader.close();
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.index.IndexReader

com.gentics.cr.lucene.autocomplete.Autocompleter

com.gentics.cr.lucene.indexaccessor.DefaultIndexAccessor

com.gentics.cr.lucene.indexaccessor.DefaultIndexAccessorTest

com.gentics.cr.lucene.indexer.index.LuceneIndexUpdateChecker

com.gentics.cr.lucene.indexer.index.LuceneMultiIndexLocation

com.gentics.cr.lucene.indexer.index.LuceneSingleIndexLocation

com.gentics.cr.lucene.information.SpecialDirectoryInformationEntry

com.gentics.cr.lucene.search.CRSearcher

com.tamingtext.tagging.LuceneCategoryExtractor

com.tamingtext.tagging.LuceneTagExtractor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.