Package opennlp.tools.tokenize.lang

Examples of opennlp.tools.tokenize.lang.Factory


   * @param cg
   */
  public TokSpanEventStream(ObjectStream<TokenSample> tokenSamples,
        boolean skipAlphaNumerics, TokenContextGenerator cg) {
    super(tokenSamples);
    Factory factory = new Factory();
    this.alphaNumeric = factory.getAlphanumeric(null);
    this.skipAlphaNumerics = skipAlphaNumerics;
    this.cg = cg;
  }
View Full Code Here


   * @param cg
   */
  public TokSpanEventStream(ObjectStream<TokenSample> tokenSamples,
        boolean skipAlphaNumerics, TokenContextGenerator cg) {
    super(tokenSamples);
    Factory factory = new Factory();
    this.alphaNumeric = factory.getAlphanumeric(null);
    this.skipAlphaNumerics = skipAlphaNumerics;
    this.cg = cg;
  }
View Full Code Here

  private List<Double> tokProbs;

  private List<Span> newTokens;

  public TokenizerME(TokenizerModel model) {
    this(model, new Factory());
  }
View Full Code Here

   */
  public static TokenizerModel train(String languageCode,
      ObjectStream<TokenSample> samples, Dictionary abbreviations,
      boolean useAlphaNumericOptimization, TrainingParameters mlParams)
      throws IOException {
    Factory factory = new Factory();

    Map<String, String> manifestInfoEntries = new HashMap<String, String>();

    EventStream eventStream = new TokSpanEventStream(samples,
        useAlphaNumericOptimization, factory.getAlphanumeric(languageCode),
        factory.createTokenContextGenerator(languageCode,
            getAbbreviations(abbreviations)));

    AbstractModel maxentModel = TrainUtil.train(eventStream,
        mlParams.getSettings(), manifestInfoEntries);

View Full Code Here

          this.alphaNumericPattern = Pattern.compile(prop);
        }
      }
      // could not load from manifest, will get from language dependent factory
      if (this.alphaNumericPattern == null) {
        Factory f = new Factory();
        this.alphaNumericPattern = f.getAlphanumeric(languageCode);
      }
    }
    return this.alphaNumericPattern;
  }
View Full Code Here

  /**
   * Gets the context generator
   */
  public TokenContextGenerator getContextGenerator() {
    Factory f = new Factory();
    Set<String> abbs = null;
    Dictionary abbDict = getAbbreviationDictionary();
    if (abbDict != null) {
      abbs = abbDict.asStringSet();
    } else {
      abbs = Collections.emptySet();
    }
    return f.createTokenContextGenerator(getLanguageCode(), abbs);
  }
View Full Code Here

   */
  public static TokenizerModel train(String languageCode,
      ObjectStream<TokenSample> samples, Dictionary abbreviations,
      boolean useAlphaNumericOptimization, TrainingParameters mlParams)
      throws IOException {
    Factory factory = new Factory();

    Map<String, String> manifestInfoEntries = new HashMap<String, String>();

    EventStream eventStream = new TokSpanEventStream(samples,
        useAlphaNumericOptimization, factory.getAlphanumeric(languageCode),
        factory.createTokenContextGenerator(languageCode,
            getAbbreviations(abbreviations)));

    AbstractModel maxentModel = TrainUtil.train(eventStream,
        mlParams.getSettings(), manifestInfoEntries);

View Full Code Here

   * @param cg
   */
  public TokSpanEventStream(ObjectStream<TokenSample> tokenSamples,
        boolean skipAlphaNumerics, TokenContextGenerator cg) {
    super(tokenSamples);
    Factory factory = new Factory();
    this.alphaNumeric = factory.getAlphanumeric(null);
    this.skipAlphaNumerics = skipAlphaNumerics;
    this.cg = cg;
  }
View Full Code Here

          this.alphaNumericPattern = Pattern.compile(prop);
        }
      }
      // could not load from manifest, will get from language dependent factory
      if (this.alphaNumericPattern == null) {
        Factory f = new Factory();
        this.alphaNumericPattern = f.getAlphanumeric(languageCode);
      }
    }
    return this.alphaNumericPattern;
  }
View Full Code Here

   * Gets the context generator
   *
   * @return a new instance of the context generator
   */
  public TokenContextGenerator getContextGenerator() {
    Factory f = new Factory();
    Set<String> abbs = null;
    Dictionary abbDict = getAbbreviationDictionary();
    if (abbDict != null) {
      abbs = abbDict.asStringSet();
    } else {
      abbs = Collections.emptySet();
    }
    return f.createTokenContextGenerator(getLanguageCode(), abbs);
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.tokenize.lang.Factory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.