Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermEnum


  private final Map<String, TermEntry> termEntries;
  private final String field;
 
  public CachedTermInfo(IndexReader reader, String field, int minDf, int maxDfPercent) throws IOException {
    this.field = field;
    TermEnum te = reader.terms(new Term(field, ""));
    int numDocs = reader.numDocs();
    double percent = numDocs * maxDfPercent / 100.0;
    //Should we use a linked hash map so that we know terms are in order?
    termEntries = new LinkedHashMap<String, TermEntry>();
    int count = 0;
    do {
      Term term = te.term();
      if (term == null || term.field().equals(field) == false){
        break;
      }
      int df = te.docFreq();
      if (df < minDf || df > percent){
        continue;
      }
      TermEntry entry = new TermEntry(term.text(), count++, df);
      termEntries.put(entry.term, entry);
    } while (te.next());
    te.close();
  }
View Full Code Here


     * in the entire index. To get the in-cluster frequency, we need to query the index to get the term
     * frequencies in each document. The number of results of this call will be the in-cluster document
     * frequency.
     */
   
    TermEnum te = reader.terms(new Term(contentField, ""));
    int count = 0;
   
    Map<String,TermEntry> termEntryMap = new LinkedHashMap<String,TermEntry>();
    do {
      Term term = te.term();
      if (term == null || term.field().equals(contentField) == false) {
        break;
      }
      OpenBitSet termBitset = new OpenBitSet(reader.maxDoc());
     
      // Generate bitset for the term
      TermDocs termDocs = reader.termDocs(term);
     
      while (termDocs.next()) {
        termBitset.set(termDocs.doc());
      }
     
      // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
      // This modifies the termBitset, but that's fine as we are not using it anywhere else.
      termBitset.and(clusterDocBitset);
      int inclusterDF = (int) termBitset.cardinality();
     
      TermEntry entry = new TermEntry(term.text(), count++, inclusterDF);
      termEntryMap.put(entry.term, entry);
    } while (te.next());
    te.close();
   
    List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<TermInfoClusterInOut>();
   
    int clusterSize = ids.size();

View Full Code Here

                }
            }

            final Iterator<Term> it = orderedTerms.keySet().iterator();

            setEnum(new TermEnum() {

                private Term current;

                {
                    getNext();
View Full Code Here

  }

  private void initFacetCounts(IndexReader reader) throws IOException {
    String fieldName = facetRequest.getFieldName();
    // term are enumerated by field name and within field names by term value
    TermEnum terms = reader.terms( new Term( fieldName, "" ) );
    try {
      while ( fieldName.equals( terms.term().field() ) ) {
        String fieldValue = terms.term().text();
        facetCounts.initCount( fieldValue );
        if ( !terms.next() ) {
          break;
        }
      }
    }
    finally {
      terms.close();
    }
  }
View Full Code Here

                    rangeScans.add(new RangeScan(reader, term, term));
                }

                Iterator it = rangeScans.iterator();
                while (it.hasNext()) {
                    TermEnum terms = (TermEnum) it.next();
                    do {
                        Term t = terms.term();
                        if (t != null) {
                            currentTerm.setBase(t.text());
                            int compare = currentTerm.compareTo(termText);
                            if (compare == 0) {
                                orderedTerms.put(t, new Integer(terms.docFreq()));
                            } else if (compare < 0) {
                                // try next one
                            } else {
                                // compare > 0
                            }
                        } else {
                            break;
                        }
                    } while (terms.next());
                }
            } finally {
                Iterator it = rangeScans.iterator();
                while (it.hasNext()) {
                    TermEnum terms = (TermEnum) it.next();
                    try {
                        terms.close();
                    } catch (IOException e) {
                       if (LOG.isTraceEnabled())
                       {
                           LOG.trace("An exception occurred: " + e.getMessage());
                       }
                    }
                }
            }

            final Iterator it = orderedTerms.keySet().iterator();

            setEnum(new TermEnum() {

                private Term current;

                {
                    getNext();
View Full Code Here

   
    List<String> uniqueMimeTypes = null;
    if (retrieveUniqueMimeTypes) {
      // retrieve all possible file types
      uniqueMimeTypesIndexReader = indexAccessor.getReader(false);
      final TermEnum termEnum = uniqueMimeTypesIndexReader.terms(new Term(LUCENE_INDEX_MIMETYPE, ""));
      uniqueMimeTypes = new ArrayList<String>();
      while (termEnum.next() && termEnum.term().field().equals(LUCENE_INDEX_MIMETYPE)) {
        uniqueMimeTypes.add(termEnum.term().text());
      }
    }

    // get accessors and reader only if facets are activated
    if (facetsSearch.useFacets()) {
View Full Code Here

                                     Term start,
                                     TermDocsCollector collector)
                throws IOException {
            TermDocs tDocs = reader.termDocs();
            try {
                TermEnum terms = reader.terms(start);
                try {
                    int count = 0;
                    do {
                        Term t = terms.term();
                        if (t != null && t.field() == start.field()) {
                            tDocs.seek(terms);
                            collector.collect(t, tDocs);
                        } else {
                            break;
                        }
                        // once in a while check if we should quit
                        if (++count % 10000 == 0) {
                            if (stopRequested) {
                                break;
                            }
                        }
                    } while (terms.next());
                } finally {
                    terms.close();
                }
            } finally {
                tDocs.close();
            }
        }
View Full Code Here

        reader = IndexReader.open(FSDirectory.open(new File(args[i])));
      }
    }

    TermFreqQueue tiq = new TermFreqQueue(count);
    TermEnum terms = reader.terms();
     
    int minFreq = 0;
    while (terms.next()) {
      if (terms.docFreq() > minFreq) {
        TermFreq top = tiq.add(new TermFreq(terms.term(), terms.docFreq()));
        if (tiq.size() >= count) {                 // if tiq overfull
          tiq.pop();                              // remove lowest in tiq
          minFreq = top.docFreq; // reset minFreq
        }
      }
View Full Code Here

     *                     the search index.
     */
    private void calculateDocFilter() throws IOException {
        docFilter = new BitSet(reader.maxDoc());
        // we match all terms
        TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, field));
        try {
            TermDocs docs = reader.termDocs();
            try {
                while (terms.term() != null
                        && terms.term().field() == FieldNames.PROPERTIES
                        && terms.term().text().startsWith(field)) {
                    docs.seek(terms);
                    while (docs.next()) {
                        docFilter.set(docs.doc());
                    }
                    terms.next();
                }
            } finally {
                docs.close();
            }
        } finally {
            terms.close();
        }
    }
View Full Code Here

        private void calculateHits() throws IOException {
            if (hitsCalculated) {
                return;
            }

            TermEnum enumerator = reader.terms(lowerTerm);

            try {
                boolean checkLower = false;
                if (!inclusive) {
                    // make adjustments to set to exclusive
                    checkLower = true;
                }

                String testField = getField();

                TermDocs docs = reader.termDocs();
                try {
                    do {
                        Term term = enumerator.term();
                        if (term != null && term.field() == testField) {
                            if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
                                checkLower = false;
                                if (upperTerm != null) {
                                    int compare = upperTerm.text().compareTo(term.text());
                                    // if beyond the upper term, or is exclusive and
                                    // this is equal to the upper term, break out
                                    if ((compare < 0) || (!inclusive && compare == 0)) {
                                        break;
                                    }
                                }

                                docs.seek(enumerator);
                                while (docs.next()) {
                                    hits.set(docs.doc());
                                }
                            }
                        } else {
                            break;
                        }
                    } while (enumerator.next());
                } finally {
                    docs.close();
                }
            } finally {
                enumerator.close();
            }
            hitsCalculated = true;
            // put to cache
            synchronized (resultMap) {
                resultMap.put(cacheKey, hits);
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermEnum

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.