Package org.apache.mahout.vectorizer.encoders

Examples of org.apache.mahout.vectorizer.encoders.StaticWordValueEncoder.addToVector()


  static Vector vectorize(Multiset<String> doc, CorpusWeighting w, boolean normalize, int dimension) {
    Vector v = new RandomAccessSparseVector(dimension);
    FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
    for (String word : doc.elementSet()) {
      encoder.addToVector(word, w.weight(word, doc.count(word)), v);
    }
    if (normalize) {
      return v.assign(Functions.div(v.norm(2)));
    } else {
      return v;
View Full Code Here


    Vector v1 = new RandomAccessSparseVector(100);                  
    while (ts.incrementToken()) {
      char[] termBuffer = termAtt.termBuffer();
      int termLen = termAtt.termLength();
      String w = new String(termBuffer, 0, termLen);                
      encoder.addToVector(w, 1, v1);                                
    }
    System.out.printf("%s\n", new SequentialAccessSparseVector(v1));
  }

}
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.