* in the entire index. To get the in-cluster frequency, we need to query the index to get the term
* frequencies in each document. The number of results of this call will be the in-cluster document
* frequency.
*/
TermEnum te = reader.terms(new Term(contentField, ""));
int count = 0;
Map<String,TermEntry> termEntryMap = new LinkedHashMap<String,TermEntry>();
do {
Term term = te.term();
if (term == null || term.field().equals(contentField) == false) {
break;
}
OpenBitSet termBitset = new OpenBitSet(reader.maxDoc());
// Generate bitset for the term
TermDocs termDocs = reader.termDocs(term);
while (termDocs.next()) {
termBitset.set(termDocs.doc());
}
// AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
// This modifies the termBitset, but that's fine as we are not using it anywhere else.
termBitset.and(clusterDocBitset);
int inclusterDF = (int) termBitset.cardinality();
TermEntry entry = new TermEntry(term.text(), count++, inclusterDF);
termEntryMap.put(entry.term, entry);
} while (te.next());
te.close();
List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<TermInfoClusterInOut>();
int clusterSize = ids.size();