Examples of TFIDF

com.github.pmerienne.trident.ml.nlp.TFIDF
edu.udo.cs.wvtool.generic.vectorcreation.TFIDF
This class represents a mechanism to create TFIDF word vectors. The resulting vectors are normalized. @author Michael Wurst @version $Id: TFIDF.java,v 1.3 2007/05/20 18:06:03 mjwurst Exp $
org.apache.mahout.utils.vectors.TFIDF
org.apache.mahout.vectorizer.TFIDF

Examples of org.apache.mahout.vectorizer.TFIDF


    Weight weight;
    if ("tf".equalsIgnoreCase(weightType)) {
      weight = new TF();
    } else if ("tfidf".equalsIgnoreCase(weightType)) {
      weight = new TFIDF();
    } else {
      throw new IllegalArgumentException("Weight type " + weightType + " is not supported");
    }


    TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent);

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF

      }
    } finally {
      Closeables.closeQuietly(writer);
    }
    IndexReader reader = IndexReader.open(directory, true);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    
    int numTerms = 0;
    for (Iterator<TermEntry> it = termInfo.getAllEntries(); it.hasNext();) {
      it.next();

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF

        if (cmdLine.hasOption(weightOpt)) {
          String wString = cmdLine.getValue(weightOpt).toString();
          if ("tf".equalsIgnoreCase(wString)) {
            weight = new TF();
          } else if ("tfidf".equalsIgnoreCase(wString)) {
            weight = new TFIDF();
          } else {
            throw new OptionException(weightOpt);
          }
        } else {
          weight = new TFIDF();
        }


        String field = cmdLine.getValue(fieldOpt).toString();


        int minDf = 1;

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF

      }
    } finally {
      Closeables.closeQuietly(writer);
    }
    IndexReader reader = IndexReader.open(directory, true);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
    Iterable<Vector> iterable = new LuceneIterable(reader, "id", "content", mapper);


    int i = 0;

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF

  }


  @Test
  public void testIterable() throws Exception {
    IndexReader reader = DirectoryReader.open(directory);
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content", termInfo,weight);


    //TODO: do something more meaningful here
    for (Vector vector : iterable) {

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF

  public void testIterableNoTermVectors() throws IOException {
    RAMDirectory directory = createTestIndex(Field.TermVector.NO);
    IndexReader reader = DirectoryReader.open(directory);
    
    
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content",  termInfo,weight);


    Iterator<Vector> iterator = iterable.iterator();
    iterator.hasNext();

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF

    RAMDirectory directory = createTestIndex(Field.TermVector.YES, new RAMDirectory(), true, 0);
    //get real vectors
    createTestIndex(Field.TermVector.NO, directory, false, 5);
    IndexReader reader = DirectoryReader.open(directory);


    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    
    boolean exceptionThrown;
    //0 percent tolerance
    LuceneIterable iterable = new LuceneIterable(reader, "id", "content", termInfo,weight);

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF


    Weight weight;
    if ("tf".equalsIgnoreCase(weightType)) {
      weight = new TF();
    } else if ("tfidf".equalsIgnoreCase(weightType)) {
      weight = new TFIDF();
    } else {
      throw new IllegalArgumentException("Weight type " + weightType + " is not supported");
    }


    TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent);

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF

    }
    
    IndexReader reader = DirectoryReader.open(directory);
   


    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    
    int numTerms = 0;
    for (Iterator<TermEntry> it = termInfo.getAllEntries(); it.hasNext();) {
      it.next();

View Full Code Here

Examples of org.apache.mahout.vectorizer.TFIDF

    
    IndexReader reader = DirectoryReader.open(directory);
    System.out.println("Number of documents: \t"+reader.numDocs());
    
    
    Weight weight = new TFIDF();
    TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
    Iterable<Vector> iterable = new LuceneIterable(reader, "id", "content", termInfo,weight);
    
    int i = 0;
    for (Vector vector : iterable) {

View Full Code Here

0 1 2 3 4

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.