Package opennlp.tools.tokenize

Examples of opennlp.tools.tokenize.TokenizerModel


  }

  public void setTokenizer(FileSystem fs, Path p){
    try {
      FSDataInputStream in = fs.open(p);
      TokenizerModel model;
      model = new TokenizerModel(in);
      tokenizer = new TokenizerME(model);
    }
    catch (IOException e) {
      e.printStackTrace();
    }
View Full Code Here


        openNLP = new OpenNLP(new ClasspathDataFileProvider("DUMMY"));
    }

    @Test
    public void testLoadEnTokenizer() throws IOException{
        TokenizerModel model = openNLP.getTokenizerModel("en");
        Assert.assertNotNull(model);
        Tokenizer tokenizer = openNLP.getTokenizer("en");
        Assert.assertNotNull(tokenizer);
    }
View Full Code Here

        Assert.assertNotNull(tokenizer);
    }
   
    @Test
    public void testLoadMissingTokenizerModel() throws IOException{
        TokenizerModel model = openNLP.getTokenizerModel("ru");
        //there is not Russian model ...
        //so it is expected that the model is NULL
        Assert.assertNull(model);
    }
View Full Code Here

        ner = openNLP.getNameFinder("person", "ru");
        Assert.assertNull(ner);
    }
    @Test
    public void testLoadModelByName() throws IOException{
        TokenizerModel tokenModel = openNLP.getModel(TokenizerModel.class, "en-token.bin", null);
        Assert.assertNotNull(tokenModel);
        SentenceModel sentModel = openNLP.getModel(SentenceModel.class, "en-sent.bin", null);
        Assert.assertNotNull(sentModel);
        POSModel posModel = openNLP.getModel(POSModel.class, "en-pos-maxent.bin", null);
        Assert.assertNotNull(posModel);
View Full Code Here

     */
    public Tokenizer getTokenizer(String language) {
        Tokenizer tokenizer = null;
        if(language != null){
            try {
                TokenizerModel model = getTokenizerModel(language);
                if(model != null){
                    tokenizer = new TokenizerME(getTokenizerModel(language));
                }
            } catch (InvalidFormatException e) {
                log.warn("Unable to load Tokenizer Model for "+language+": " +
View Full Code Here

        if(modelName == null){
            return openNLP.getTokenizer(language);
        } else if(SIMPLE_MODEL_NAME.equals(modelName)){
            return SimpleTokenizer.INSTANCE;
        } else { //try to load the configured model
            TokenizerModel model;
            try {
                model = openNLP.getModel(TokenizerModel.class, modelName, null);
            } catch (Exception e) {
                throw new EngineException("Error while loading the configured OpenNLP "
                    + "TokenizerModel '"+modelName+"' ("+getClass().getSimpleName()+" | name="
View Full Code Here

        getDefaultPosTagger();
        getDefaultChunker();
    }

    public static Tokenizer getDefaultTokenizer() throws IOException {
        return new TokenizerME(new TokenizerModel(
                getResourceAsStream(tokenizerModelFile)));
    }
View Full Code Here

    protected static BaseModel loadgivenmodeltype(OpenNlpModels m, InputStream in) throws InvalidFormatException, IOException {
        BaseModel mdl = null;
        switch(m) {
            case TokenizerModel: {
                mdl = new TokenizerModel(in);
                LOG.debug("OpenNLP5 Tokenizer Model loaded: " + mdl);
                break;
            }
            case POSModel: {
                mdl = new POSModel(in);
View Full Code Here

     *
     * @throws IOException
     */
    public ApacheExtractor() throws IOException {
        nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel)));
        tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel)));
        sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel)));
    }
View Full Code Here

  private final EnglishChunkFilter chunkFilter;

  public EnglishChunker() {
    try {
      if (tokenModel == null) {
        tokenModel = new TokenizerModel(Tools.getStream(TOKENIZER_MODEL));
      }
      if (posModel == null) {
        posModel = new POSModel(Tools.getStream(POS_TAGGER_MODEL));
      }
      if (chunkerModel == null) {
View Full Code Here

TOP

Related Classes of opennlp.tools.tokenize.TokenizerModel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.