Package org.apache.mahout.vectorizer.encoders

Examples of org.apache.mahout.vectorizer.encoders.Dictionary.intern()


  public void testDictionaryOrder() {
    Dictionary dict = new Dictionary();

    dict.intern("a");
    dict.intern("d");
    dict.intern("c");
    dict.intern("b");
    dict.intern("qrz");

    assertEquals("[a, d, c, b, qrz]", dict.values().toString());
View Full Code Here


    Dictionary dict = new Dictionary();

    dict.intern("a");
    dict.intern("d");
    dict.intern("c");
    dict.intern("b");
    dict.intern("qrz");

    assertEquals("[a, d, c, b, qrz]", dict.values().toString());

    Dictionary dict2 = Dictionary.fromList(dict.values());
View Full Code Here

    dict.intern("a");
    dict.intern("d");
    dict.intern("c");
    dict.intern("b");
    dict.intern("qrz");

    assertEquals("[a, d, c, b, qrz]", dict.values().toString());

    Dictionary dict2 = Dictionary.fromList(dict.values());
    assertEquals("[a, d, c, b, qrz]", dict2.values().toString());
View Full Code Here

    learningAlgorithm.setAveragingWindow(500);

    List<File> files = Lists.newArrayList();
    for (File newsgroup : base.listFiles()) {
      if (newsgroup.isDirectory()) {
        newsGroups.intern(newsgroup.getName());
        files.addAll(Arrays.asList(newsgroup.listFiles()));
      }
    }
    Collections.shuffle(files);
    System.out.printf("%d training files\n", files.size());
View Full Code Here

    int k = 0;
    double step = 0;
    int[] bumps = {1, 2, 5};
    for (File file : files.subList(0, 3000)) {
      String ng = file.getParentFile().getName();
      int actual = newsGroups.intern(ng);

      Vector v = encodeFeatureVector(file, actual, leakType);
      learningAlgorithm.train(actual, v);

      k++;
View Full Code Here

            null, true, conf);

    long numItems = 0;
    while (iter.hasNext()) {
      Pair<Text, VectorWritable> next = iter.next();
      asfDictionary.intern(next.getFirst().toString());
      numItems++;
    }

    System.out.printf("%d test files\n", numItems);
    ResultAnalyzer ra = new ResultAnalyzer(asfDictionary.values(), "DEFAULT");
View Full Code Here

            null, true, conf);
    while (iter.hasNext()) {
      Pair<Text, VectorWritable> next = iter.next();
      String ng = next.getFirst().toString();

      int actual = asfDictionary.intern(ng);
      Vector result = classifier.classifyFull(next.getSecond().get());
      int cat = result.maxValueIndex();
      double score = result.maxValue();
      double ll = classifier.logLikelihood(actual, next.getSecond().get());
      ClassifierResult cr = new ClassifierResult(asfDictionary.values().get(cat), score, ll);
View Full Code Here

    Multiset<String> overallCounts = HashMultiset.create();

    List<File> files = Lists.newArrayList();
    for (File newsgroup : base.listFiles()) {
      if (newsgroup.isDirectory()) {
        newsGroups.intern(newsgroup.getName());
        files.addAll(Arrays.asList(newsgroup.listFiles()));
      }
    }
    System.out.printf("%d test files\n", files.size());
    ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
View Full Code Here

    System.out.printf("%d test files\n", files.size());
    ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
    for (File file : files) {
      String ng = file.getParentFile().getName();

      int actual = newsGroups.intern(ng);
      NewsgroupHelper helper = new NewsgroupHelper();
      Vector input = helper.encodeFeatureVector(file, actual, 0, overallCounts); //no leak type ensures this is a normal vector
      Vector result = classifier.classifyFull(input);
      int cat = result.maxValueIndex();
      double score = result.maxValue();
View Full Code Here

    SequenceFileDirIterator<Text, VectorWritable> iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, trainFilter,
            null, true, conf);
    long numItems = 0;
    while (iter.hasNext()) {
      Pair<Text, VectorWritable> next = iter.next();
      asfDictionary.intern(next.getFirst().toString());
      numItems++;
    }

    System.out.printf("%d training files\n", numItems);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.