Package org.apache.mahout.vectorizer.encoders

Examples of org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder


    return vectorize(counts, w, normalize, dimension);
  }

  static Vector vectorize(Multiset<String> doc, CorpusWeighting w, boolean normalize, int dimension) {
    Vector v = new RandomAccessSparseVector(dimension);
    FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
    for (String word : doc.elementSet()) {
      encoder.addToVector(word, w.weight(word, doc.count(word)), v);
    }
    if (normalize) {
      return v.assign(Functions.div(v.norm(2)));
    } else {
      return v;
View Full Code Here


import org.apache.mahout.vectorizer.encoders.StaticWordValueEncoder;

public class TokenizingAndVectorizingText {

  public static void main(String[] args) throws IOException {
    FeatureVectorEncoder encoder = new StaticWordValueEncoder("text");
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);    

    StringReader in = new StringReader("text to magically vectorize");
    TokenStream ts = analyzer.tokenStream("body", in);
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);

    Vector v1 = new RandomAccessSparseVector(100);                  
    while (ts.incrementToken()) {
      char[] termBuffer = termAtt.termBuffer();
      int termLen = termAtt.termLength();
      String w = new String(termBuffer, 0, termLen);                
      encoder.addToVector(w, 1, v1);                                
    }
    System.out.printf("%s\n", new SequentialAccessSparseVector(v1));
  }
View Full Code Here

  public static void main(String[] args) {
    File base = new File(args[0]);
    overallCounts = HashMultiset.create();

    Map<String, Set<Integer>> traceDictionary = new TreeMap<String, Set<Integer>>();
    FeatureVectorEncoder encoder = new StaticWordValueEncoder("body");
    encoder.setProbes(2);
    encoder.setTraceDictionary(traceDictionary);
    FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept");
    bias.setTraceDictionary(traceDictionary);
    FeatureVectorEncoder lines = new ConstantValueEncoder("Lines");
    lines.setTraceDictionary(traceDictionary);
    Dictionary newsGroups = new Dictionary();
   
    OnlineLogisticRegression learningAlgorithm =
        new OnlineLogisticRegression(
              20, FEATURES, new L1())
View Full Code Here

      try {
        Preconditions.checkArgument(c != null, "Invalid type of variable %s,  wanted one of %s",
          typeMap.get(name), TYPE_DICTIONARY.keySet());
        Constructor<? extends FeatureVectorEncoder> constructor = c.getConstructor(String.class);
        Preconditions.checkArgument(constructor != null, "Can't find correct constructor for %s", typeMap.get(name));
        FeatureVectorEncoder encoder = constructor.newInstance(name);
        predictorEncoders.put(predictor, encoder);
        encoder.setTraceDictionary(traceDictionary);
      } catch (InstantiationException e) {
        throw new IllegalStateException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (IllegalAccessException e) {
        throw new IllegalStateException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (InvocationTargetException e) {
View Full Code Here

      try {
        Preconditions.checkArgument(c != null, "Invalid type of variable %s,  wanted one of %s",
          typeMap.get(name), typeDictionary.keySet());
        Constructor<? extends FeatureVectorEncoder> constructor = c.getConstructor(String.class);
        Preconditions.checkArgument(constructor != null, "Can't find correct constructor for %s", typeMap.get(name));
        FeatureVectorEncoder encoder = constructor.newInstance(name);
        predictorEncoders.put(predictor, encoder);
        encoder.setTraceDictionary(traceDictionary);
      } catch (InstantiationException e) {
        throw new ImpossibleException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (IllegalAccessException e) {
        throw new ImpossibleException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (InvocationTargetException e) {
View Full Code Here

      try {
        Preconditions.checkArgument(c != null, "Invalid type of variable %s,  wanted one of %s",
          typeMap.get(name), TYPE_DICTIONARY.keySet());
        Constructor<? extends FeatureVectorEncoder> constructor = c.getConstructor(String.class);
        Preconditions.checkArgument(constructor != null, "Can't find correct constructor for %s", typeMap.get(name));
        FeatureVectorEncoder encoder = constructor.newInstance(name);
        predictorEncoders.put(predictor, encoder);
        encoder.setTraceDictionary(traceDictionary);
      } catch (InstantiationException e) {
        throw new IllegalStateException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (IllegalAccessException e) {
        throw new IllegalStateException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (InvocationTargetException e) {
View Full Code Here

      try {
        Preconditions.checkArgument(c != null, "Invalid type of variable %s,  wanted one of %s",
          typeMap.get(name), TYPE_DICTIONARY.keySet());
        Constructor<? extends FeatureVectorEncoder> constructor = c.getConstructor(String.class);
        Preconditions.checkArgument(constructor != null, "Can't find correct constructor for %s", typeMap.get(name));
        FeatureVectorEncoder encoder = constructor.newInstance(name);
        predictorEncoders.put(predictor, encoder);
        encoder.setTraceDictionary(traceDictionary);
      } catch (InstantiationException e) {
        throw new ImpossibleException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (IllegalAccessException e) {
        throw new ImpossibleException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (InvocationTargetException e) {
View Full Code Here

      try {
        Preconditions.checkArgument(c != null, "Invalid type of variable %s,  wanted one of %s",
          typeMap.get(name), TYPE_DICTIONARY.keySet());
        Constructor<? extends FeatureVectorEncoder> constructor = c.getConstructor(String.class);
        Preconditions.checkArgument(constructor != null, "Can't find correct constructor for %s", typeMap.get(name));
        FeatureVectorEncoder encoder = constructor.newInstance(name);
        predictorEncoders.put(predictor, encoder);
        encoder.setTraceDictionary(traceDictionary);
      } catch (InstantiationException e) {
        throw new IllegalStateException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (IllegalAccessException e) {
        throw new IllegalStateException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (InvocationTargetException e) {
View Full Code Here

      try {
        Preconditions.checkArgument(c != null, "Invalid type of variable %s,  wanted one of %s",
          typeMap.get(name), TYPE_DICTIONARY.keySet());
        Constructor<? extends FeatureVectorEncoder> constructor = c.getConstructor(String.class);
        Preconditions.checkArgument(constructor != null, "Can't find correct constructor for %s", typeMap.get(name));
        FeatureVectorEncoder encoder = constructor.newInstance(name);
        predictorEncoders.put(predictor, encoder);
        encoder.setTraceDictionary(traceDictionary);
      } catch (InstantiationException e) {
        throw new IllegalStateException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (IllegalAccessException e) {
        throw new IllegalStateException(CANNOT_CONSTRUCT_CONVERTER, e);
      } catch (InvocationTargetException e) {
View Full Code Here

  @Test
  public void test() {
   
   
    FeatureVectorEncoder encoder = new ContinuousValueEncoder("demo");
    Vector v = new RandomAccessSparseVector( 3 );
   
    encoder.addToVector("32", 1, v);

    FeatureVectorEncoder encoder_other = new ContinuousValueEncoder("demo_other");
    //Vector v = new RandomAccessSparseVector( 3 );
   
    encoder_other.addToVector("100", 1, v);
   
    assertEquals( v.get(0), 132.0, 0.0);
   
    Utils.PrintVector(v);
   
View Full Code Here

TOP

Related Classes of org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.