Package opennlp.tools.dictionary

Examples of opennlp.tools.dictionary.Dictionary


        System.exit(1);
      }
      ai++;
    }
    HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(args[ai++]);
    Dictionary dict = null;
    if (ai < args.length) {
      dict = new Dictionary(new FileInputStream(args[ai++]),true);
    }
    if (fun) {
      Parse.useFunctionTags(true);
    }
    ObjectStream<Event> es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, dict);
View Full Code Here


      if (this.factory == null) {
        this.factory = POSTaggerFactory.create(this.factoryClassName, null,
            null);
      }

      Dictionary ngramDict = this.factory.getDictionary();
      if (ngramDict == null) {
        if(this.ngramCutoff != null) {
          System.err.print("Building ngram dictionary ... ");
          ngramDict = POSTaggerME.buildNGramDictionary(trainingSampleStream,
              this.ngramCutoff);
View Full Code Here

    }

    File modelOutFile = params.getModel();
    CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);

    Dictionary ngramDict = null;

    Integer ngramCutoff = params.getNgram();

    if (ngramCutoff != null) {
      System.err.print("Building ngram dictionary ... ");
View Full Code Here

  public String getShortDescription() {
    return "trainer for the learnable tokenizer";
  }

  static Dictionary loadDict(File f) throws IOException {
    Dictionary dict = null;
    if (f != null) {
      CmdLineUtil.checkInputFile("abb dict", f);
      dict = new Dictionary(new FileInputStream(f));
    }
    return dict;
  }
View Full Code Here

    File modelOutFile = params.getModel();
    CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);

    TokenizerModel model;
    try {
      Dictionary dict = loadDict(params.getAbbDict());

      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      model = opennlp.tools.tokenize.TokenizerME.train(sampleStream,
View Full Code Here

    if (params.getMisclassified()) {
      listener = new TokenEvaluationErrorListener();
    }

    try {
      Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict());

      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      validator = new opennlp.tools.tokenize.TokenizerCrossValidator(mlParams,
View Full Code Here

  @Test
  public void testPOSTaggerWithCustomFactory() throws IOException {
    DummyPOSDictionary posDict = new DummyPOSDictionary(
        POSDictionary.create(POSDictionaryTest.class
            .getResourceAsStream("TagDictionaryCaseSensitive.xml")));
    Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);

    POSModel posModel = trainPOSModel(ModelType.MAXENT,
        new DummyPOSTaggerFactory(dic, posDict));

    POSTaggerFactory factory = posModel.getFactory();
View Full Code Here

  @Test
  public void testPOSTaggerWithDefaultFactory() throws IOException {
    POSDictionary posDict = POSDictionary.create(POSDictionaryTest.class
            .getResourceAsStream("TagDictionaryCaseSensitive.xml"));
    Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);

    POSModel posModel = trainPOSModel(ModelType.MAXENT,
        new POSTaggerFactory(dic, posDict));

    POSTaggerFactory factory = posModel.getFactory();
View Full Code Here

   *    built from the input file.
   * @throws IOException IOException
   */
  public static Dictionary createDictionary(ObjectStream<StringList> sampleStream) throws IOException {

    Dictionary mNameDictionary = new Dictionary(true);
    StringList entry;

    entry = sampleStream.read();
    while (entry != null) {
      if (!mNameDictionary.contains(entry)) {
        mNameDictionary.put(entry);
      }
      entry = sampleStream.read();
    }

    return mNameDictionary;
View Full Code Here

    FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData);
    ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn,
        Charset.forName(params.getEncoding()));

    Dictionary mDictionary;
    try {
      System.out.println("Creating Dictionary...");
      mDictionary = createDictionary(sampleStream);
    } catch (IOException e) {
      throw new TerminateToolException(-1, "IO error while reading training data or indexing data: "
          + e.getMessage(), e);
    } finally {
      try {
        sampleStream.close();
      } catch(IOException e) {
        // sorry this can fail..
      }
    }

    System.out.println("Saving Dictionary...");

    OutputStream out = null;

    try {
      out = new FileOutputStream(dictOutFile);
      mDictionary.serialize(out);
    } catch (IOException e) {
      throw new TerminateToolException(-1, "IO error while writing dictionary file: "
          + e.getMessage(), e);
    }
    finally {
View Full Code Here

TOP

Related Classes of opennlp.tools.dictionary.Dictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.