Examples of TokenizerFactory

com.aliasi.tokenizer.TokenizerFactory
com.google.gwt.place.testplaces.TokenizerFactory
Used by tests of {@link com.google.gwt.place.rebind.PlaceHistoryMapperGenerator}.
edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenizerFactory
Factory class to instantiate a Tokenizer instance The expectation is that you need to decide which rules to apply for which field Thus, given a field type, initialize the applicable rules and create the tokenizer @author nikhillo
opennlp.tools.tokenize.TokenizerFactory
The factory that provides {@link Tokenizer} default implementations andresources. Users can extend this class if their application requires overriding the {@link TokenContextGenerator}, {@link Dictionary} etc.
org.apache.lucene.analysis.util.TokenizerFactory
Abstract parent class for analysis factories that create {@link Tokenizer}instances.
org.apache.solr.analysis.TokenizerFactory
A TokenizerFactory breaks up a stream of characters into tokens.
TokenizerFactories are registered for FieldTypes with the IndexSchema through the schema.xml file.

Example schema.xml entry to register a TokenizerFactory implementation to tokenize fields of type "cool"
```
 <fieldtype name="cool" class="solr.TextField"> <analyzer> <tokenizer class="solr.StandardTokenizerFactory"/> ... 
```
A single instance of any registered TokenizerFactory is created via the default constructor and is reused for each FieldType.
@version $Id: TokenizerFactory.java 929782 2010-04-01 02:15:27Z rmuir $

Examples of com.aliasi.tokenizer.TokenizerFactory

  //*-- create the spell checker
  if (crawlConfig.isSpellCheck())
  { FixedWeightEditDistance fixedEdit = new FixedWeightEditDistance( MATCH_WEIGHT, DELETE_WEIGHT, INSERT_WEIGHT, 
      SUBSTITUTE_WEIGHT, TRANSPOSE_WEIGHT);
    NGramProcessLM lm = new NGramProcessLM(NGRAM_LENGTH);
    TokenizerFactory tokenizerFactory = new StandardBgramTokenizerFactory(false);  //*-- do not extract entities
    try { if ( (new File(Constants.SPELL_TRAIN_MODEL).exists() ) && !crawlConfig.isFreshCrawl() ) 
             lm = readModel(Constants.SPELL_TRAIN_MODEL);  
          sc = new TrainSpellChecker(lm, fixedEdit, tokenizerFactory); }
    catch (IOException ie) { logger.error("IO Error: Could not read spell train file " + ie.getMessage()); }
    catch (ClassNotFoundException ce) { logger.error("Class error: " + ce.getMessage()); }

View Full Code Here

Examples of com.google.gwt.place.testplaces.TokenizerFactory

    doTest(subject, null);
  }


  public void testTopLevelWithFactory() {
    AbstractPlaceHistoryMapper<TokenizerFactory> subject = GWT.create(WithFactory.class);
    TokenizerFactory factory = new TokenizerFactory();
    subject.setFactory(factory);


    doTest(subject, factory);
  }

View Full Code Here

Examples of com.google.gwt.place.testplaces.TokenizerFactory

    doTest(subject, null);
  }


  public void testNestedWithFactory() {
    AbstractPlaceHistoryMapper<TokenizerFactory> subject = GWT.create(LocalWithFactory.class);
    TokenizerFactory factory = new TokenizerFactory();
    subject.setFactory(factory);


    doTest(subject, factory);
  }

View Full Code Here

Examples of edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenizerFactory

      
    }
    
    private static Map<INDEXFIELD, Tokenizer> initMap(Properties props) {
      HashMap<INDEXFIELD, Tokenizer> map = new HashMap<INDEXFIELD, Tokenizer>(INDEXFIELD.values().length);
      TokenizerFactory fact = TokenizerFactory.getInstance(props);
      for (INDEXFIELD fld : INDEXFIELD.values()) {
        map.put(fld, fact.getTokenizer(fld));
      }
      
      return map;
    }

View Full Code Here

Examples of edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenizerFactory

    
  }
  
  private static Map<INDEXFIELD, Tokenizer> initMap(Properties props) {
    HashMap<INDEXFIELD, Tokenizer> map = new HashMap<INDEXFIELD, Tokenizer>(INDEXFIELD.values().length);
    TokenizerFactory fact = TokenizerFactory.getInstance(props);
    for (INDEXFIELD fld : INDEXFIELD.values()) {
      map.put(fld, fact.getTokenizer(fld));
    }
    
    return map;
  }

View Full Code Here

Examples of opennlp.tools.tokenize.TokenizerFactory


    TokenizerModel model;
    try {
      Dictionary dict = loadDict(params.getAbbDict());


      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      model = opennlp.tools.tokenize.TokenizerME.train(sampleStream,
          tokFactory, mlParams);

View Full Code Here

Examples of opennlp.tools.tokenize.TokenizerFactory

    }
    
    try {
      Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict());


      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      validator = new opennlp.tools.tokenize.TokenizerCrossValidator(mlParams,
          tokFactory, listener);

View Full Code Here

Examples of opennlp.tools.tokenize.TokenizerFactory


    TokenizerModel model;
    try {
      Dictionary dict = loadDict(params.getAbbDict());


      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      model = opennlp.tools.tokenize.TokenizerME.train(sampleStream,
          tokFactory, mlParams);

View Full Code Here

Examples of opennlp.tools.tokenize.TokenizerFactory

    }


    try {
      Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict());


      TokenizerFactory tokFactory = TokenizerFactory.create(
          params.getFactory(), params.getLang(), dict,
          params.getAlphaNumOpt(), null);
      validator = new opennlp.tools.tokenize.TokenizerCrossValidator(mlParams,
          tokFactory, listener);

View Full Code Here

Examples of org.apache.lucene.analysis.util.TokenizerFactory

    // prepare bi-gram tokenizer factory
    Map<String, String> args = new HashMap<String, String>();
    args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, "4.4");
    args.put("minGramSize","2");
    args.put("maxGramSize","2");
    TokenizerFactory tf = new NGramTokenizerFactory(args);
    
    // (ab)->(bc)->(cd)->[ef][fg][gh]
    List<String> rules = new ArrayList<String>();
    rules.add( "abcd=>efgh" );
    synMap = new SlowSynonymMap( true );

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.