Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()


      TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      TypeAttribute typeAttr = ts.addAttribute(TypeAttribute.class);
      PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class);
      while (ts.incrementToken()) {
        assertNotNull(offsetAtt);
        assertNotNull(termAtt);
        assertNotNull(posAtt);
        assertNotNull(typeAttr);
        System.out.println("token '" + termAtt.toString() + "' has type " + typeAttr.type());
View Full Code Here


  public void baseUIMAPayloadsAnalyzerStreamTest() {
    try {
      TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood"));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      PayloadAttribute payloadAttribute = ts.addAttribute(PayloadAttribute.class);
      while (ts.incrementToken()) {
        assertNotNull(termAtt);
        assertNotNull(payloadAttribute);
        System.out.println("token '" + termAtt.toString() + "' has payload " + new String(payloadAttribute.getPayload().getData()));
      }
    } catch (Exception e) {
View Full Code Here

      TokenStream ts = analyzer.tokenStream("text", new StringReader(app
              .getString(inputText, "text")));
      app.removeAll(resultsList);
      ts.reset();

      while (ts.incrementToken()) {
        Object row = app.create("item");
        app.setString(row, "text", ((CharTermAttribute)ts.getAttribute(CharTermAttribute.class)).toString());
        app.add(resultsList, row);
        app.putProperty(row, "state", ts.cloneAttributes());
      }
View Full Code Here

        try {
            TokenStream stream = _analyzer.tokenStream("content", new StringReader(contentText));
            CharTermAttribute termAtt = (CharTermAttribute) stream.addAttribute(CharTermAttribute.class);

            stream.reset();
            while (stream.incrementToken()) {
                if (termAtt.length() > 0) {
                    String term = termAtt.toString();
                        result.add(term);
                }
            }
View Full Code Here

    StringReader reader = new StringReader( source );
    TokenStream ts = loadTokenizer(tokFactory, reader);
    List<String> tokList = new ArrayList<String>();
    try {
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      while (ts.incrementToken()){
        if( termAtt.length() > 0 )
          tokList.add( termAtt.toString() );
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
View Full Code Here

    final Set<String> tokens = new HashSet<String>();
    final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
    final CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
    try {
      tokenStream.reset();
      while (tokenStream.incrementToken()) {
        tokens.add(termAtt.toString());
      }
    } catch (IOException ioe) {
      throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
    }
View Full Code Here

          FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
          TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
          PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
          PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
          stream.reset();
          while (stream.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setStartOffset(matcher.start());
            token.setEndOffset(matcher.end());
            token.setFlags(flagsAtt.getFlags());
View Full Code Here

    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
   
    while (ts.incrementToken()){
      Token token = new Token();
      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      token.setType(typeAtt.type());
      token.setFlags(flagsAtt.getFlags());
View Full Code Here

    StringReader sr = new StringReader(s);
//    Analyzer analyzer =new StandardAnalyzer((Version) Enum.valueOf((Class) Class.forName("org.apache.lucene.util.Version"),  Version.LUCENE_35.name()));
    Analyzer analyzer = schema.getAnalyzer();//JobIndexPublic.setAnalyzer(conf);
    TokenStream tk=analyzer.tokenStream("rawquery", sr);

    boolean hasnext = tk.incrementToken();

    while(hasnext){

      TermAttribute ta = tk.getAttribute(TermAttribute.class);
View Full Code Here

      TermAttribute ta = tk.getAttribute(TermAttribute.class);

      System.out.println(ta.term());

      hasnext = tk.incrementToken();

    }
  }

    protected void cleanup(Context context) throws IOException,  InterruptedException {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.