Examples of TokenizerFactory


Examples of com.aliasi.tokenizer.TokenizerFactory

  //*-- create the spell checker
  if (crawlConfig.isSpellCheck())
  { FixedWeightEditDistance fixedEdit = new FixedWeightEditDistance( MATCH_WEIGHT, DELETE_WEIGHT, INSERT_WEIGHT,
      SUBSTITUTE_WEIGHT, TRANSPOSE_WEIGHT);
    NGramProcessLM lm = new NGramProcessLM(NGRAM_LENGTH);
    TokenizerFactory tokenizerFactory = new StandardBgramTokenizerFactory(false)//*-- do not extract entities
    try { if ( (new File(Constants.SPELL_TRAIN_MODEL).exists() ) && !crawlConfig.isFreshCrawl() )
             lm = readModel(Constants.SPELL_TRAIN_MODEL)
          sc = new TrainSpellChecker(lm, fixedEdit, tokenizerFactory); }
    catch (IOException ie) { logger.error("IO Error: Could not read spell train file " + ie.getMessage()); }
    catch (ClassNotFoundException ce) { logger.error("Class error: " + ce.getMessage()); }
View Full Code Here

Examples of com.google.gwt.place.testplaces.TokenizerFactory

    doTest(subject, null);
  }

  public void testTopLevelWithFactory() {
    AbstractPlaceHistoryMapper<TokenizerFactory> subject = GWT.create(WithFactory.class);
    TokenizerFactory factory = new TokenizerFactory();
    subject.setFactory(factory);

    doTest(subject, factory);
  }
View Full Code Here

Examples of com.google.gwt.place.testplaces.TokenizerFactory

    doTest(subject, null);
  }

  public void testNestedWithFactory() {
    AbstractPlaceHistoryMapper<TokenizerFactory> subject = GWT.create(LocalWithFactory.class);
    TokenizerFactory factory = new TokenizerFactory();
    subject.setFactory(factory);

    doTest(subject, factory);
  }
View Full Code Here

Examples of edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenizerFactory

     
    }
   
    private static Map<INDEXFIELD, Tokenizer> initMap(Properties props) {
      HashMap<INDEXFIELD, Tokenizer> map = new HashMap<INDEXFIELD, Tokenizer>(INDEXFIELD.values().length);
      TokenizerFactory fact = TokenizerFactory.getInstance(props);
      for (INDEXFIELD fld : INDEXFIELD.values()) {
        map.put(fld, fact.getTokenizer(fld));
      }
     
      return map;
    }
View Full Code Here

Examples of edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenizerFactory

   
  }
 
  private static Map<INDEXFIELD, Tokenizer> initMap(Properties props) {
    HashMap<INDEXFIELD, Tokenizer> map = new HashMap<INDEXFIELD, Tokenizer>(INDEXFIELD.values().length);
    TokenizerFactory fact = TokenizerFactory.getInstance(props);
    for (INDEXFIELD fld : INDEXFIELD.values()) {
      map.put(fld, fact.getTokenizer(fld));
    }
   
    return map;
  }
View Full Code Here

Examples of opennlp.tools.tokenize.TokenizerFactory

    TokenizerModel model;
    try {
      Dictionary dict = loadDict(params.getAbbDict());

      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      model = opennlp.tools.tokenize.TokenizerME.train(sampleStream,
          tokFactory, mlParams);
View Full Code Here

Examples of opennlp.tools.tokenize.TokenizerFactory

    }
   
    try {
      Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict());

      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      validator = new opennlp.tools.tokenize.TokenizerCrossValidator(mlParams,
          tokFactory, listener);
View Full Code Here

Examples of opennlp.tools.tokenize.TokenizerFactory

    TokenizerModel model;
    try {
      Dictionary dict = loadDict(params.getAbbDict());

      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      model = opennlp.tools.tokenize.TokenizerME.train(sampleStream,
          tokFactory, mlParams);
View Full Code Here

Examples of opennlp.tools.tokenize.TokenizerFactory

    }

    try {
      Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict());

      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      validator = new opennlp.tools.tokenize.TokenizerCrossValidator(mlParams,
          tokFactory, listener);
View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenizerFactory

    // prepare bi-gram tokenizer factory
    Map<String, String> args = new HashMap<String, String>();
    args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, "4.4");
    args.put("minGramSize","2");
    args.put("maxGramSize","2");
    TokenizerFactory tf = new NGramTokenizerFactory(args);
   
    // (ab)->(bc)->(cd)->[ef][fg][gh]
    List<String> rules = new ArrayList<String>();
    rules.add( "abcd=>efgh" );
    synMap = new SlowSynonymMap( true );
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.