Examples of opennlp.tools.tokenize.Tokenizer.tokenize()

opennlp.tools.tokenize.Tokenizer.tokenize()
Splits a string into its atomic parts @param s The string to be tokenized. @return The String[] with the individual tokens as the arrayelements.

    }


    Tokenizer tokenizer = SimpleTokenizer.INSTANCE; //<co id="co.opennlp.name.2"/>
    for (int si = 0; si < sentences.length; si++) { //<co id="co.opennlp.name.3"/>
      List<Annotation> allAnnotations = new ArrayList<Annotation>();
      String[] tokens = tokenizer.tokenize(sentences[si]);//<co id="co.opennlp.name.4"/>
      for (int fi = 0; fi < finders.length; fi++) { //<co id="co.opennlp.name.5"/>
        Span[] spans = finders[fi].find(tokens); //<co id="co.opennlp.name.6"/>
        double[] probs = finders[fi].probs(spans); //<co id="co.opennlp.name.7"/>
        for (int ni = 0; ni < spans.length; ni++) {
          allAnnotations.add( //<co id="co.opennlp.name.8"/>

View Full Code Here

    );
    
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE; //<co id="co.opennlp.name.inittokenizer2"/>
    
    for (int si = 0; si < sentences.length; si++) {
      String[] tokens = tokenizer.tokenize(sentences[si]); //<co id="co.opennlp.name.tokenize2"/>
      Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames3"/>
      displayNames(names, tokens);
    }
    
    finder.clearAdaptiveData(); //<co id="co.opennlp.name.clear"/>

View Full Code Here

    </callout>
    </calloutlist>*/
    //<end id="ne-display2"/>
    //<start id="ne-prob"/>
    for (int si = 0; si < sentences.length; si++) {//<co id="co.opennlp.name.eachsent3"/>
      String[] tokens = tokenizer.tokenize(sentences[si]); //<co id="co.opennlp.name.tokenize3"/>
      Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames1"/>
      double[] spanProbs = finder.probs(names); //<co id="co.opennlp.name.probs"/>
    }
    /*<calloutlist>
    <callout arearefs="co.opennlp.name.eachsent3"><para>Iterate over each sentence.</para></callout>

View Full Code Here

    return model.eval(mContextGenerator.getContext(text));
  }


  public double[] categorize(String documentText) {
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
    return categorize(tokenizer.tokenize(documentText));
  }


  public String getBestCategory(double[] outcome) {
    return model.getBestOutcome(outcome);
  }

View Full Code Here

   */
  public SentencesToTree(String text, TokenizerModel model){
    /* Configure the tokenizer with preloaded model */
    Tokenizer tokenizer = new TokenizerME(model);
    /* tokens has an array of strings, where each string is a token */
    String s = spaces(tokenizer.tokenize(text));
    this.text = this.upperCase(s);
  }
  
  /**
   *

View Full Code Here

   * Categorizes the given text. The text is tokenized with the SimpleTokenizer before it
   * is passed to the feature generation.
   */
  public double[] categorize(String documentText) {
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
    return categorize(tokenizer.tokenize(documentText));
  }


  public String getBestCategory(double[] outcome) {
    return model.getBestOutcome(outcome);
  }

View Full Code Here

        for (int i = 0; i < sentences.length; i++) {
            String sentence = sentences[i];
            //LOG.debug("Sentence: " + sentence);


            // extract the names in the current sentence
            String[] tokens = tokenizer.tokenize(sentence);
            Span[] tokenspan = tokenizer.tokenizePos(sentence);
            Span[] nameSpans = finder.find(tokens);
            double[] probs = finder.probs();


            if (nameSpans != null && nameSpans.length > 0) {

View Full Code Here

   */
  @Override
  public double[] categorize(String documentText,
      Map<String, Object> extraInformation) {
    Tokenizer tokenizer = model.getFactory().getTokenizer();
    return categorize(tokenizer.tokenize(documentText), extraInformation);
  }


  /**
   * Categorizes the given text. The text is tokenized with the SimpleTokenizer
   * before it is passed to the feature generation.

View Full Code Here

   * Categorizes the given text. The text is tokenized with the SimpleTokenizer
   * before it is passed to the feature generation.
   */
  public double[] categorize(String documentText) {
    Tokenizer tokenizer = model.getFactory().getTokenizer();
    return categorize(tokenizer.tokenize(documentText),
        Collections.<String, Object> emptyMap());
  }


/**
 * Returns a map in which the key is the category name and the value is the score

View Full Code Here

    return model.eval(mContextGenerator.getContext(text));
  }


  public double[] categorize(String documentText) {
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
    return categorize(tokenizer.tokenize(documentText));
  }


  public String getBestCategory(double[] outcome) {
    return model.getBestOutcome(outcome);
  }

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.