Package net.sf.nlpshell

Source Code of net.sf.nlpshell.MainUima

package net.sf.nlpshell;

import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.factory.ExternalResourceFactory.createDependencyAndBind;
import static org.apache.uima.fit.util.JCasUtil.select;

import java.io.File;
import java.util.Locale;

import opennlp.tools.parser.AbstractBottomUpParser;
import opennlp.uima.Parse;
import opennlp.uima.parser.Parser;
import opennlp.uima.parser.ParserModelResourceImpl;
import opennlp.uima.sentdetect.SentenceDetector;
import opennlp.uima.sentdetect.SentenceModelResourceImpl;
import opennlp.uima.tokenize.Tokenizer;
import opennlp.uima.tokenize.TokenizerModelResourceImpl;
import opennlp.uima.util.UimaUtil;

import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.examples.type.Sentence;
import org.apache.uima.fit.examples.type.Token;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;

public class MainUima {

  /**
   * @param args
   */
  public static void main(String[] args) throws Exception {
    // Create descriptor
    AnalysisEngineDescription sentenceDetector = createEngineDescription(
        SentenceDetector.class, UimaUtil.SENTENCE_TYPE_PARAMETER,
        Sentence.class.getName());

    // Create the external resource dependency for the model and bind it
    createDependencyAndBind(sentenceDetector, UimaUtil.MODEL_PARAMETER,
        SentenceModelResourceImpl.class, new File("models/en-sent.bin")
            .toURI().toURL().toString());

    // Create descriptor
    AnalysisEngineDescription tokenizer = createEngineDescription(
        Tokenizer.class, UimaUtil.TOKEN_TYPE_PARAMETER,
        Token.class.getName(), UimaUtil.SENTENCE_TYPE_PARAMETER,
        Sentence.class.getName());

    // Create the external resource dependency for the model and bind it
    createDependencyAndBind(tokenizer, UimaUtil.MODEL_PARAMETER,
        TokenizerModelResourceImpl.class, new File(
            "models/en-token.bin").toURI().toURL().toString());

    // Create descriptor
    AnalysisEngineDescription parser = createEngineDescription(
        Parser.class, UimaUtil.TOKEN_TYPE_PARAMETER,
        Token.class.getName(), UimaUtil.SENTENCE_TYPE_PARAMETER,
        Sentence.class.getName(), Parser.PARSE_TYPE_PARAMETER,
        Parse.class.getName(), Parser.TYPE_FEATURE_PARAMETER, "type2",
        Parser.CHILDREN_FEATURE_PARAMETER, "children",
        Parser.PROBABILITY_FEATURE_PARAMETER, "prob");

    // Create the external resource dependency for the model and bind it
    createDependencyAndBind(parser, UimaUtil.MODEL_PARAMETER,
        ParserModelResourceImpl.class, new File(
            "models/en-parser-chunking.bin").toURI().toURL()
            .toString());

    Locale.setDefault(Locale.ENGLISH);
    AnalysisEngineDescription aggregate = createEngineDescription(
        sentenceDetector, tokenizer, parser);
    AnalysisEngine engine = createEngine(aggregate);

    String text = "The meeting was moved from Yorktown 01-144 to Hawthorne 1S-W33.";
    JCas jCas = engine.newJCas();
    jCas.setDocumentText(text);
    engine.process(jCas);

    for (Sentence sentence : select(jCas, Sentence.class)) {
      System.out.println("Sentence : " + sentence.getCoveredText());
      for (Token token : JCasUtil.selectCovered(jCas, Token.class,
          sentence)) {
        System.out.println("Token : " + token.getCoveredText());
      }
    }

    for (Parse parse : select(jCas, Parse.class)) {
      System.out.println("Parse : " + parse.getCoveredText() + " type : " + parse.getType2());
    }

  }

}
TOP

Related Classes of net.sf.nlpshell.MainUima

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.