Examples of WhitespaceTokenizer


Examples of org.apache.lucene.analysis.WhitespaceTokenizer

    super.init(args);
    assureMatchVersion();
  }

  public WhitespaceTokenizer create(Reader input) {
    return new WhitespaceTokenizer(luceneMatchVersion,input);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

    final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf, args);
   
    Analyzer analyzer = new ReusableAnalyzerBase() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
        return new TokenStreamComponents(tokenizer, stream);
      }
    };
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

  }


 
  public void testReset() throws Exception {
    Tokenizer wsTokenizer = new WhitespaceTokenizer(new StringReader("please divide this sentence"));
    TokenStream filter = new ShingleFilter(wsTokenizer, 2);
    assertTokenStreamContents(filter,
      new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"},
      new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27},
      new String[]{TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE},
      new int[]{1,0,1,0,1,0,1}
    );
    wsTokenizer.reset(new StringReader("please divide this sentence"));
    assertTokenStreamContents(filter,
      new String[]{"please","please divide","divide","divide this","this","this sentence","sentence"},
      new int[]{0,0,7,7,14,14,19}, new int[]{6,13,13,18,18,27,27},
      new String[]{TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE,"shingle",TypeAttributeImpl.DEFAULT_TYPE},
      new int[]{1,0,1,0,1,0,1}
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

      this.synonyms = synonyms;
      this.maxSynonyms = maxSynonyms;
    }
   
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream ts = new WhitespaceTokenizer(reader);
      ts = new LowerCaseFilter(ts);
      ts = new SynonymTokenFilter(ts, synonyms, maxSynonyms);
      return ts;
    }
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

    public TokenStream reusableTokenStream(String fieldName, Reader reader)
        throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new WhitespaceTokenizer(reader);
        streams.result = new LowerCaseFilter(streams.source);
        streams.result = new SynonymTokenFilter(streams.result, synonyms, maxSynonyms);
        setPreviousTokenStream(streams);
      } else {
        streams.source.reset(reader);
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

    public SnowballSubclassAnalyzer(String name) {
      super(name);
    }
   
    public TokenStream tokenStream(String fieldName, Reader reader) {
      return new WhitespaceTokenizer(reader);
    }
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

        }
       
        @Override
        public TokenStream tokenStream(String fieldName, Reader reader) {
            PayloadData payload =  fieldToData.get(fieldName);
            TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
            if (payload != null) {
                if (payload.numFieldInstancesToSkip == 0) {
                    ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length);
                } else {
                    payload.numFieldInstancesToSkip--;
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

  }

  public void testPositionIncrementGap() throws IOException {
    Analyzer analyzer = new Analyzer() {
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new WhitespaceTokenizer(reader);
      }

      public int getPositionIncrementGap(String fieldName) {
        return 500;
      }
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

  }

  public void testTokenReuse() throws IOException {
    Analyzer analyzer = new Analyzer() {
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new WhitespaceTokenizer(reader)) {
          boolean first=true;
          AttributeSource.State state;

          public boolean incrementToken() throws IOException {
            if (state != null) {
View Full Code Here

Examples of org.apache.lucene.analysis.WhitespaceTokenizer

  }

  public void testDocumentsWriterExceptions() throws IOException {
    Analyzer analyzer = new Analyzer() {
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new CrashingFilter(fieldName, new WhitespaceTokenizer(reader));
      }
    };

    for(int i=0;i<2;i++) {
      MockRAMDirectory dir = new MockRAMDirectory();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.