Package opennlp.tools.dictionary

Examples of opennlp.tools.dictionary.Dictionary


    OutputStream out = null;
    try {
      in = new InputStreamReader(new FileInputStream(dictInFile), encoding);
      out = new FileOutputStream(dictOutFile);

      Dictionary dict = Dictionary.parseOneEntryPerLine(in);
      dict.serialize(out);

    } catch (IOException e) {
      throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage(), e);
    } finally {
      try {
View Full Code Here


   *    built from the input file.
   * @throws IOException IOException
   */
  public static Dictionary createDictionary(ObjectStream<StringList> sampleStream) throws IOException {

    Dictionary mNameDictionary = new Dictionary(true);
    StringList entry;

    entry = sampleStream.read();
    while (entry != null) {
      if (!mNameDictionary.contains(entry)) {
        mNameDictionary.put(entry);
      }
      entry = sampleStream.read();
    }

    return mNameDictionary;
View Full Code Here

    FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData);
    ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn,
        Charset.forName(params.getEncoding()));
   
    Dictionary mDictionary;
    try {
      System.out.println("Creating Dictionary...");
      mDictionary = createDictionary(sampleStream);
    } catch (IOException e) {
      throw new TerminateToolException(-1, "IO error while reading training data or indexing data: "
          + e.getMessage(), e);
    } finally {
      try {
        sampleStream.close();
      } catch(IOException e) {
        // sorry this can fail..
      }
    }

    System.out.println("Saving Dictionary...");
   
    OutputStream out = null;
   
    try {
      out = new FileOutputStream(dictOutFile);
      mDictionary.serialize(out);
    } catch (IOException e) {
      throw new TerminateToolException(-1, "IO error while writing dictionary file: "
          + e.getMessage(), e);
    }
    finally {
View Full Code Here

   * Gets the context generator
   */
  public TokenContextGenerator getContextGenerator() {
    Factory f = new Factory();
    Set<String> abbs = null;
    Dictionary abbDict = getAbbreviationDictionary();
    if (abbDict != null) {
      abbs = abbDict.asStringSet();
    } else {
      abbs = Collections.emptySet();
    }
    return f.createTokenContextGenerator(getLanguageCode(), abbs);
  }
View Full Code Here

  static Dictionary loadAbbDictionary() throws IOException {
    InputStream in = SentenceDetectorFactoryTest.class.getClassLoader()
        .getResourceAsStream("opennlp/tools/sentdetect/abb.xml");

    return new Dictionary(in);
  }
View Full Code Here

  }

  @Test
  public void testDefault() throws IOException {

    Dictionary dic = loadAbbDictionary();

    char[] eos = { '.', '?' };
    SentenceModel sdModel = train(new SentenceDetectorFactory("en", true, dic,
        eos));
View Full Code Here

    assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
  }

  @Test
  public void testNullDict() throws IOException {
    Dictionary dic = null;

    char[] eos = { '.', '?' };
    SentenceModel sdModel = train(new SentenceDetectorFactory("en", true, dic,
        eos));
View Full Code Here

    assertTrue(Arrays.equals(eos, factory.getEOSCharacters()));
  }

  @Test
  public void testDefaultEOS() throws IOException {
    Dictionary dic = null;

    char[] eos = null;
    SentenceModel sdModel = train(new SentenceDetectorFactory("en", true, dic,
        eos));
View Full Code Here

  }

  @Test
  public void testDummyFactory() throws IOException {

    Dictionary dic = loadAbbDictionary();

    char[] eos = { '.', '?' };
    SentenceModel sdModel = train(new DummySentenceDetectorFactory("en", true,
        dic, eos));
View Full Code Here

        sdModel.getEosCharacters()));
  }
 
  @Test
  public void testCreateDummyFactory() throws IOException {
    Dictionary dic = loadAbbDictionary();
    char[] eos = { '.', '?' };
   
    SentenceDetectorFactory factory = SentenceDetectorFactory.create(
        DummySentenceDetectorFactory.class.getCanonicalName(), "es", false,
        dic, eos);
View Full Code Here

TOP

Related Classes of opennlp.tools.dictionary.Dictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.