Examples of StandardTokenizer


Examples of de.susebox.jtopas.StandardTokenizer

   */
  private long tokenizeFile(String message, File file, boolean useReaderSource) throws Throwable {
    System.out.println(message);

    TokenizerSource   source    = null;
    StandardTokenizer tokenizer = new StandardTokenizer(_properties);
   
    try {
      if (useReaderSource) {
        source = new ReaderSource(file);
      } else {
        source = new CharArraySource(readFile(file));
      }
      tokenizer.setSource(source);
      return tokenize(tokenizer);
    } finally {
      if (useReaderSource && source != null) {
        ((ReaderSource)source).close();
      }
      tokenizer.close();
    }
  }
View Full Code Here

Examples of it.unibz.instasearch.indexing.tokenizers.standard.StandardTokenizer

 
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader)
  {
    if( Field.CONTENTS.toString().equals(fieldName) ) {
      TokenStream result = new StandardTokenizer(reader); // splits at ". ", "-"
     
      result = new WordSplitTokenizer(result);   // non-alphanumerics
      result = new DotSplitTokenizer(result);   // com.package.names
      result = new CamelCaseTokenizer(result);   // CamelCaseIdentifiers
     
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

public StandardBgramAnalyzer() { }
public TokenStream tokenStream (String fieldName, Reader reader)
{
   TokenStream ts = (extractEntities) ?
     new EntFilter ( new BgramFilter( new LowerCaseFilter( new StandardFilter( new StandardTokenizer(reader) ) ) ) ):
                     new BgramFilter( new LowerCaseFilter( new StandardFilter( new StandardTokenizer(reader) ) ) );
   return (ts);
}
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

   *         filtered with {@link StandardFilter}, {@link StopFilter},
   *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
   */
  @Override
  public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(matchVersion, reader);
    result = new StandardFilter(result);
    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                            result, stoptable);
    result = new FrenchStemFilter(result, excltable);
    // Convert to lowercase after stemming!
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

  public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(matchVersion, reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                      streams.result, stoptable);
      streams.result = new FrenchStemFilter(streams.result, excltable);
      // Convert to lowercase after stemming!
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

   *   filtered with {@link StandardFilter}, {@link StopFilter},
   *   and {@link DutchStemFilter}
   */
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(matchVersion, reader);
    result = new StandardFilter(result);
    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                            result, stoptable);
    result = new DutchStemFilter(result, excltable, stemdict);
    return result;
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    }
   
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(matchVersion, reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                      streams.result, stoptable);
      streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
      setPreviousTokenStream(streams);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

   *       {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
   *          {@link BrazilianStemFilter}.
   */
  @Override
  public final TokenStream tokenStream(String fieldName, Reader reader) {
                TokenStream result = new StandardTokenizer( matchVersion, reader );
    result = new LowerCaseFilter( result );
    result = new StandardFilter( result );
    result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                         result, stoptable );
    result = new BrazilianStemFilter( result, excltable );
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new StandardTokenizer(matchVersion, reader);
        streams.result = new LowerCaseFilter(streams.source);
        streams.result = new StandardFilter(streams.result);
        streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
                                        streams.result, stoptable);
        streams.result = new BrazilianStemFilter(streams.result, excltable);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    public MultiAnalyzer() {
    }

    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
      result = new TestFilter(result);
      result = new LowerCaseFilter(result);
      return result;
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.