Examples of KLDClassifier


Examples of com.github.pmerienne.trident.ml.nlp.KLDClassifier

  @Test
  public void testWithSmallWiki() {
    EnglishTokenizer tokenizer = new EnglishTokenizer();

    KLDClassifier kldClassifier = new KLDClassifier(2);
    kldClassifier.update(0, tokenizer.tokenize(NOSQL_WIKI));
    kldClassifier.update(0, tokenizer.tokenize(MYSQL_WIKI));
    kldClassifier.update(1, tokenizer.tokenize(LILIUM_WIKI));
    kldClassifier.update(1, tokenizer.tokenize(ROSE_WIKI));

    assertEquals(0, (int) kldClassifier.classify(tokenizer.tokenize(DATABASE_WIKI)));
    assertEquals(1, (int) kldClassifier.classify(tokenizer.tokenize(FLOWER_WIKI)));
  }
View Full Code Here

Examples of com.github.pmerienne.trident.ml.nlp.KLDClassifier

  public void testWithReuters() {
    List<TextInstance<Integer>> training = DatasetUtils.getTrainingFolds(0, 10, Datasets.getReutersSamples());
    List<TextInstance<Integer>> eval = DatasetUtils.getEvalFold(0, 10, Datasets.getReutersSamples());

    // Use 500 max words per class to speed up test
    KLDClassifier kldClassifier = new KLDClassifier(9, 500);

    // Train
    for (TextInstance<Integer> instance : training) {
      kldClassifier.update(instance.label, instance.tokens);
    }

    // Eval
    double evalSize = 0.0;
    double errorCount = 0.0;
    for (TextInstance<Integer> instance : eval) {
      int actual = kldClassifier.classify(instance.tokens);
      if (actual != instance.label) {
        errorCount++;
      }
      evalSize++;
    }
View Full Code Here

Examples of com.github.pmerienne.trident.ml.nlp.KLDClassifier

      TridentState classifierState = toppology.newStream("reutersData", new ReutersBatchSpout())
      // Transform raw data to text instance
          .each(new Fields("label", "text"), new TextInstanceCreator<Integer>(), new Fields("instance"))

          // Update text classifier
          .partitionPersist(new MemoryMapState.Factory(), new Fields("instance"), new TextClassifierUpdater<Integer>("newsClassifier", new KLDClassifier(9)));

      // Classification stream
      toppology.newDRPCStream("classify", localDRPC)
      // Convert DRPC args to text instance
          .each(new Fields("args"), new TextInstanceCreator<Integer>(false), new Fields("instance"))
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.