Package com.github.pmerienne.trident.ml.preprocessing

Examples of com.github.pmerienne.trident.ml.preprocessing.EnglishTokenizer.tokenize()


    TextTokenizer tokenizer = new EnglishTokenizer();
    List<String> d1 = tokenizer.tokenize(DATABASE_WIKI);
    List<String> d2 = tokenizer.tokenize(NOSQL_WIKI);
    List<String> d3 = tokenizer.tokenize(MYSQL_WIKI);
    List<String> d4 = tokenizer.tokenize(FLOWER_WIKI);
    List<String> d5 = tokenizer.tokenize(LILIUM_WIKI);
    List<String> d6 = tokenizer.tokenize(ROSE_WIKI);
    List<List<String>> training = Arrays.asList(d1, d2, d4, d5);

    TFIDF tfidf = new TFIDF();
View Full Code Here


    List<String> d1 = tokenizer.tokenize(DATABASE_WIKI);
    List<String> d2 = tokenizer.tokenize(NOSQL_WIKI);
    List<String> d3 = tokenizer.tokenize(MYSQL_WIKI);
    List<String> d4 = tokenizer.tokenize(FLOWER_WIKI);
    List<String> d5 = tokenizer.tokenize(LILIUM_WIKI);
    List<String> d6 = tokenizer.tokenize(ROSE_WIKI);
    List<List<String>> training = Arrays.asList(d1, d2, d4, d5);

    TFIDF tfidf = new TFIDF();

    // When
View Full Code Here

    // Given
    EnglishTokenizer tokenizer = new EnglishTokenizer();
    String text = "I can't argue with some arguments on argus with argues";

    // When
    List<String> actualTokens = tokenizer.tokenize(text);

    // Then
    List<String> expectedTokens = Arrays.asList("i", "can't", "argu", "some", "argument", "argu", "argu");
    assertEquals(expectedTokens, actualTokens);
  }
View Full Code Here

          // Get text
          int startIndex = line.indexOf(" - ");
          String text = line.substring(startIndex, line.length() - 1);

          REUTERS_SAMPLES.add(new TextInstance<Integer>(classIndex, tokenizer.tokenize(text)));
        } catch (Exception ex) {
          System.err.println("Skipped Reuters sample because it can't be parsed : " + line);
        }
      }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.