Package opennlp.tools.dictionary

Examples of opennlp.tools.dictionary.Dictionary


        }
      }
      sample = sampleStream.read();
    }
    sampleStream.close();
    Dictionary dictionary = new Dictionary(true);
    for (String[] entry : entries) {
      StringList dicEntry = new StringList(entry);
      dictionary.put(dicEntry);
    }
    return dictionary;
  }
View Full Code Here


    public DummyDictionary(Dictionary dict) {
      this.indict = dict;
    }

    public DummyDictionary(InputStream in) throws IOException {
      this.indict = new Dictionary(in);
    }
View Full Code Here

    char[] eos = null;
    if (params.getEosChars() != null)
      eos = params.getEosChars().toCharArray();

    try {
      Dictionary abbreviations = SentenceDetectorTrainerTool.loadDict(params.getAbbDict());
      SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(
          params.getFactory(), params.getLang(), true, abbreviations, eos);
      validator = new SDCrossValidator(params.getLang(), mlParams, sdFactory,
          errorListener);
     
View Full Code Here

  static Dictionary loadAbbDictionary() throws IOException {
    InputStream in = TokenizerFactoryTest.class.getClassLoader()
        .getResourceAsStream("opennlp/tools/sentdetect/abb.xml");

    return new Dictionary(in);
  }
View Full Code Here

  }

  @Test
  public void testDefault() throws IOException {

    Dictionary dic = loadAbbDictionary();
    final String lang = "es";

    TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null));

    TokenizerFactory factory = model.getFactory();
View Full Code Here

  }

  @Test
  public void testNullDict() throws IOException {

    Dictionary dic = null;
    final String lang = "es";

    TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null));

    TokenizerFactory factory = model.getFactory();
View Full Code Here

  }

  @Test
  public void testCustomPatternAndAlphaOpt() throws IOException {

    Dictionary dic = null;
    final String lang = "es";
    String pattern = "^[0-9A-Za-z]+$";

    TokenizerModel model = train(new TokenizerFactory(lang, dic, true,
        Pattern.compile(pattern)));
View Full Code Here

  }

  @Test
  public void testDummyFactory() throws IOException {

    Dictionary dic = loadAbbDictionary();
    final String lang = "es";
    String pattern = "^[0-9A-Za-z]+$";

    TokenizerModel model = train(new DummyTokenizerFactory(lang, dic, true,
        Pattern.compile(pattern)));
View Full Code Here

    assertTrue(factory.isUseAlphaNumericOptmization());
  }

  @Test
  public void testCreateDummyFactory() throws IOException {
    Dictionary dic = loadAbbDictionary();
    final String lang = "es";
    String pattern = "^[0-9A-Za-z]+$";

    TokenizerFactory factory = TokenizerFactory.create(
        DummyTokenizerFactory.class.getCanonicalName(), lang, dic, true,
View Full Code Here

  public String getShortDescription() {
    return "trainer for the learnable sentence detector";
  }
 
  static Dictionary loadDict(File f) throws IOException {
    Dictionary dict = null;
    if (f != null) {
      CmdLineUtil.checkInputFile("abb dict", f);
      dict = new Dictionary(new FileInputStream(f));
    }
    return dict;
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.dictionary.Dictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.