Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.reset()


    if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      String word = termAtt.toString();
      if ( already.add( word))
        top.add( word);
    }
View Full Code Here


          } else {
            tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
          }

          // reset the TokenStream to the first token         
          tokenStream.reset();

          final Token reusableToken = new Token();
          for (Token nextToken = tokenStream.next(reusableToken); nextToken != null; nextToken = tokenStream.next(reusableToken)) {
            tokens.add((Token) nextToken.clone()); // the vector will be built on commit.
            fieldSetting.fieldLength++;
View Full Code Here

    assertNext(ts, "greetings_tellus");
    assertFalse(ts.incrementToken());

    // bi-grams with no spacer character, start offset, end offset

    tls.reset();
    ts = new ShingleMatrixFilter(tls, 2, 2, null, false, new ShingleMatrixFilter.TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
    assertNext(ts, "helloworld", 0, 10);
    assertNext(ts, "greetingsworld", 0, 10);
    assertNext(ts, "helloearth", 0, 10);
    assertNext(ts, "greetingsearth", 0, 10);
View Full Code Here

    assertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
    assertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
    assertFalse(ts.incrementToken());

    // test unlimited size and allow single boundary token as shingle
    tls.reset();
    ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, new Character('_'), false);

//
//  for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
//      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
View Full Code Here

    assertFalse(ts.incrementToken());

    // test unlimited size but don't allow single boundary token as shingle

    tls.reset();
    ts = new ShingleMatrixFilter(tls, 1, Integer.MAX_VALUE, new Character('_'), true);
//  for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
//      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
//      token.clear();
//    }
View Full Code Here

    try
    {
      TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      Term term = null;
      ts.reset();
      while (ts.incrementToken()) {
        if (term == null)
        {
          term = new Term(fieldName, termAtt.toString());
        } else
View Full Code Here

    try
    {
      TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      Term term = null;
      ts.reset();
        while (ts.incrementToken()) {
        if (term == null)
        {
          term = new Term(fieldName, termAtt.toString());
        } else
View Full Code Here

            {
                try
                {
                  TokenStream ts = analyzer.reusableTokenStream(fields[i],new StringReader(stopWords));
                  CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
                  ts.reset();
                  while(ts.incrementToken()) {
                      stopWordsSet.add(termAtt.toString());
                  }
                  ts.end();
                  ts.close();
View Full Code Here

        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
       
        int corpusNumDocs=reader.numDocs();
        Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
        HashSet<String> processedTerms=new HashSet<String>();
        ts.reset();
        while (ts.incrementToken())
        {
                String term = termAtt.toString();
          if(!processedTerms.contains(term))
          {
View Full Code Here

    {
      ArrayList<SpanQuery> clausesList=new ArrayList<SpanQuery>();
      TokenStream ts=analyzer.reusableTokenStream(fieldName,new StringReader(value));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     
      ts.reset();
      while (ts.incrementToken()) {
          SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.toString()));
          clausesList.add(stq);
      }
      ts.end();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.