Examples of StandardTokenizer


Examples of org.apache.lucene.analysis.standard.StandardTokenizer

*/
public class TypeAwareStopFilterTest {
  @Test
  public void testTypeBasedStop() {
    try {
      TokenStream tokenStream = new StandardTokenizer(Version.LUCENE_34, new StringReader("hey, stop and think!"));
      TypeAwareStopFilter typeAwareStopFilter = new TypeAwareStopFilter(Version.LUCENE_34, tokenStream, new
              HashSet<String>(), true, Arrays.asList(new String[]{"word"}));
      assertTrue(!typeAwareStopFilter.accept());
      assertTrue(!typeAwareStopFilter.accept());
      assertTrue(!typeAwareStopFilter.accept());
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    maxTokenLength = getInt("maxTokenLength",
                            StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
  }

  public StandardTokenizer create(Reader input) {
    StandardTokenizer tokenizer
      = new StandardTokenizer(luceneMatchVersion, input);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

  }
  @Test
  public void testLuceneStandardTokenizer() throws Exception {
    String[] gold = {"I", "can't", "beleive", "that", "the", "Carolina", "Hurricanes", "won", "the", "2005", "2006", "Stanley", "Cup",};
    StandardTokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, new StringReader("I can't beleive that the Carolina Hurricanes won the 2005-2006 Stanley Cup."));
    List<String> result = new ArrayList<String>();
    while (tokenizer.incrementToken()) {
      result.add(((CharTermAttribute) tokenizer.getAttribute(CharTermAttribute.class)).toString());
    }
    assertTrue("result Size: " + result.size() + " is not: " + gold.length, result.size() == gold.length);
    int i = 0;
    for (String chunk : result) {
      assertTrue(chunk + " is not equal to " + gold[i], chunk.equals(gold[i]) == true);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

  @Test
  public void testVikings() throws Exception {
    String[] gold = {"Last", "week", "the", "National", "Football", "League", "crowned", "a", "new", "Super", "Bowl", "Champion",
            "Minnesota", "Vikings", "fans", "will", "take", "little", "solace", "in", "the", "fact", "that", "they",
            "lost", "to", "the", "eventual", "champion", "in", "the", "playoffs"};
    StandardTokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, new StringReader("Last week the National Football League crowned a new Super Bowl Champion." +
            "  Minnesota Vikings fans will take little solace in the fact that they" +
            " lost to the eventual champion in the playoffs."));
    List<String> result = new ArrayList<String>();
    while (tokenizer.incrementToken()) {
      result.add(((CharTermAttribute) tokenizer.getAttribute(CharTermAttribute.class)).toString());
    }
    assertTrue("result Size: " + result.size() + " is not: " + gold.length, result.size() == gold.length);
    int i = 0;
    for (String chunk : result) {
      System.out.println(chunk);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    return maxTokenLength;
  }

  @Override
  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
      @Override
      protected void setReader(final Reader reader) throws IOException {
        src.setMaxTokenLength(NoStopWordStandardAnalyzer.this.maxTokenLength);
        super.setReader(reader);
      }
    };
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

   * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
   *       {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and
   *          {@link BrazilianStemFilter}.
   */
  public final TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer( reader );
    result = new LowerCaseFilter( result );
    result = new StandardFilter( result );
    result = new StopFilter( result, stoptable );
    result = new BrazilianStemFilter( result, excltable );
    return result;
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
      if (streams == null) {
        streams = new SavedStreams();
        streams.source = new StandardTokenizer(reader);
        streams.result = new LowerCaseFilter(streams.source);
        streams.result = new StandardFilter(streams.result);
        streams.result = new StopFilter(streams.result, stoptable);
        streams.result = new BrazilianStemFilter(streams.result, excltable);
        setPreviousTokenStream(streams);
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

   * @return A {@link TokenStream} built from a {@link StandardTokenizer}
   *   filtered with {@link StandardFilter}, {@link StopFilter},
   *   and {@link DutchStemFilter}
   */
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new StopFilter(result, stoptable);
    result = new DutchStemFilter(result, excltable, stemdict);
    return result;
  }
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

    }
   
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(reader);
      streams.result = new StandardFilter(streams.source);
      streams.result = new StopFilter(streams.result, stoptable);
      streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
      setPreviousTokenStream(streams);
    } else {
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer

  public void testExceptionFromTokenStream() throws IOException {
    RAMDirectory dir = new MockRAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new Analyzer() {

      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new TokenFilter(new StandardTokenizer(reader)) {
          private int count = 0;

          public boolean incrementToken() throws IOException {
            if (count++ == 5) {
              throw new IOException();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.