Examples of TermEntry


Examples of org.apache.mahout.utils.vectors.TermEntry

      }
      int df = te.docFreq();
      if (df < minDf || df > percent){
        continue;
      }
      TermEntry entry = new TermEntry(term.text(), count++, df);
      termEntries.put(entry.term, entry);
    } while (te.next());
    te.close();
  }
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

    this.numTerms = numTerms;
  }
 
  @Override
  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
    TermEntry entry = termInfo.getTermEntry(field, term);
    if (entry != null) {
      vector.setQuick(entry.termIdx, weight.calculate(frequency, entry.docFreq, numTerms, numDocs));
    }
  }
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

      // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
      // This modifies the termBitset, but that's fine as we are not using it anywhere else.
      termBitset.and(clusterDocBitset);
      int inclusterDF = (int) termBitset.cardinality();
     
      TermEntry entry = new TermEntry(term.text(), count++, inclusterDF);
      termEntryMap.put(entry.term, entry);
    } while (te.next());
    te.close();
   
    List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<TermInfoClusterInOut>();
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

        // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
        // This modifies the termBitset, but that's fine as we are not using it anywhere else.
        termBitset.and(clusterDocBitset);
        int inclusterDF = (int) termBitset.cardinality();

        TermEntry entry = new TermEntry(term.text(), count++, inclusterDF);
        termEntryMap.put(entry.getTerm(), entry);
      } while (te.next());
    } finally {
      Closeables.closeQuietly(te);
    }
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

      // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency.
      // This modifies the termBitset, but that's fine as we are not using it anywhere else.
      termBitset.and(clusterDocBitset);
      int inclusterDF = (int) termBitset.cardinality();

      TermEntry entry = new TermEntry(term.text(), count++, inclusterDF);
      termEntryMap.put(entry.term, entry);
    } while (te.next());
    te.close();

    List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<TermInfoClusterInOut>();
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

      }
      int df = te.docFreq();
      if (df < minDf || df > percent) {
        continue;
      }
      TermEntry entry = new TermEntry(term.text(), count++, df);
      termEntries.put(entry.term, entry);
    } while (te.next());
    te.close();
  }
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

    this.numTerms = numTerms;
  }
 
  @Override
  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
    TermEntry entry = termInfo.getTermEntry(field, term);
    if (entry != null) {
      vector.setQuick(entry.termIdx, weight.calculate(frequency, entry.docFreq, numTerms, numDocs));
    }
  }
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

    writer.write(String.valueOf(ti.totalTerms(field)));
    writer.write("\n");
    writer.write("#term" + delimiter + "doc freq" + delimiter + "idx");
    writer.write("\n");
    while (entIter.hasNext()) {
      TermEntry entry = entIter.next();
      writer.write(entry.term);
      writer.write(delimiter);
      writer.write(String.valueOf(entry.docFreq));
      writer.write(delimiter);
      writer.write(String.valueOf(entry.termIdx));
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

      writer.write(String.valueOf(ti.totalTerms(field)));
      writer.write('\n');
      writer.write("#term" + delimiter + "doc freq" + delimiter + "idx");
      writer.write('\n');
      while (entIter.hasNext()) {
        TermEntry entry = entIter.next();
        writer.write(entry.getTerm());
        writer.write(delimiter);
        writer.write(String.valueOf(entry.getDocFreq()));
        writer.write(delimiter);
        writer.write(String.valueOf(entry.getTermIdx()));
        writer.write('\n');
      }
    } finally {
      Closeables.close(writer, false);
    }
View Full Code Here

Examples of org.apache.mahout.utils.vectors.TermEntry

    while ((text = te.next()) != null) {
      int df = te.docFreq();
      if (df < minDf || df > percent) {
        continue;
      }
      TermEntry entry = new TermEntry(text.utf8ToString(), count++, df);
      termEntries.put(entry.getTerm(), entry);
    }
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.