Package opennlp.tools.dictionary

Examples of opennlp.tools.dictionary.Dictionary


      eos = params.getEosChars().toCharArray();

    SentenceModel model;

    try {
      Dictionary dict = loadDict(params.getAbbDict());
      SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(
          params.getFactory(), params.getLang(), true, dict, eos);
      model = SentenceDetectorME.train(params.getLang(), sampleStream,
          sdFactory, mlParams);
    } catch (IOException e) {
View Full Code Here


      if (this.factory == null) {
        this.factory = POSTaggerFactory.create(this.factoryClassName, null,
            null);
      }

      Dictionary ngramDict = this.factory.getDictionary();
      if (ngramDict == null) {
        if(this.ngramCutoff != null) {
          System.err.print("Building ngram dictionary ... ");
          ngramDict = POSTaggerME.buildNGramDictionary(trainingSampleStream,
              this.ngramCutoff);
View Full Code Here

          if (dict == null) {
            ss = new POSSampleSequenceStream(new WordTagSampleStream(
                new InputStreamReader(new FileInputStream(inFile))));
          }
          else {
            POSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));

            ss = new POSSampleSequenceStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile)))),
                cg);
          }
        }
        else {
          if (dict == null) {

            ss = new POSSampleSequenceStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile), encoding))));
          }
          else {
            POSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));

            ss = new POSSampleSequenceStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile), encoding))), cg);
          }
        }
        mod = new SimplePerceptronSequenceTrainer().trainModel(iterations, ss, cutoff, true);
        System.out.println("Saving the model as: " + outFile);
        new SuffixSensitivePerceptronModelWriter(mod, outFile).persist();
      }
      else {
        POSSampleEventStream es;
        if (encoding == null) {
          if (dict == null) {
            es = new POSSampleEventStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile)))));
          }
          else {
            POSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));

            es = new POSSampleEventStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile)))),
                cg);
          }
        }
        else {
          if (dict == null) {

            es = new POSSampleEventStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile), encoding))));
          }
          else {
            POSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));

            es = new POSSampleEventStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile), encoding))), cg);
          }
        }
View Full Code Here

    }

    System.out.println("Saving the dictionary");

    ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
    Dictionary dictionary = ngramModel.toDictionary(true);

    dictionary.serialize(new FileOutputStream(dict));
  }
View Full Code Here

  @Test
  public void testPOSTaggerWithCustomFactory() throws IOException {
    DummyPOSDictionary posDict = new DummyPOSDictionary(
        POSDictionary.create(POSDictionaryTest.class
            .getResourceAsStream("TagDictionaryCaseSensitive.xml")));
    Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);

    POSModel posModel = trainPOSModel(ModelType.MAXENT,
        new DummyPOSTaggerFactory(dic, posDict));

    POSTaggerFactory factory = posModel.getFactory();
View Full Code Here

 
  @Test
  public void testPOSTaggerWithDefaultFactory() throws IOException {
    POSDictionary posDict = POSDictionary.create(POSDictionaryTest.class
            .getResourceAsStream("TagDictionaryCaseSensitive.xml"));
    Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0);

    POSModel posModel = trainPOSModel(ModelType.MAXENT,
        new POSTaggerFactory(dic, posDict));

    POSTaggerFactory factory = posModel.getFactory();
View Full Code Here

  public String getShortDescription() {
    return "trainer for the learnable tokenizer";
  }

  static Dictionary loadDict(File f) throws IOException {
    Dictionary dict = null;
    if (f != null) {
      CmdLineUtil.checkInputFile("abb dict", f);
      dict = new Dictionary(new FileInputStream(f));
    }
    return dict;
  }
View Full Code Here

    File modelOutFile = params.getModel();
    CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);

    TokenizerModel model;
    try {
      Dictionary dict = loadDict(params.getAbbDict());

      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      model = opennlp.tools.tokenize.TokenizerME.train(sampleStream,
View Full Code Here

    if (params.getMisclassified()) {
      listener = new TokenEvaluationErrorListener();
    }
   
    try {
      Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict());

      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      validator = new opennlp.tools.tokenize.TokenizerCrossValidator(mlParams,
View Full Code Here

    }

    File modelOutFile = params.getModel();
    CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);

    Dictionary ngramDict = null;
   
    Integer ngramCutoff = params.getNgram();
   
    if (ngramCutoff != null) {
      System.err.print("Building ngram dictionary ... ");
View Full Code Here

TOP

Related Classes of opennlp.tools.dictionary.Dictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.