Examples of CharTermAttribute


Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

      }

      // common case fast-path of first token not matching anything
      AttributeSource firstTok = nextTok();
      if (firstTok == null) return false;
      CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
      SlowSynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
      if (result == null) {
        copy(this, firstTok);
        return true;
      }

      // fast-path failed, clone ourselves if needed
      if (firstTok == this)
        firstTok = cloneAttributes();
      // OK, we matched a token, so find the longest match.

      matched = new LinkedList<AttributeSource>();

      result = match(result);

      if (result==null) {
        // no match, simply return the first token read.
        copy(this, firstTok);
        return true;
      }

      // reuse, or create new one each time?
      ArrayList<AttributeSource> generated = new ArrayList<AttributeSource>(result.synonyms.length + matched.size() + 1);

      //
      // there was a match... let's generate the new tokens, merging
      // in the matched tokens (position increments need adjusting)
      //
      AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast();
      boolean includeOrig = result.includeOrig();

      AttributeSource origTok = includeOrig ? firstTok : null;
      PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(PositionIncrementAttribute.class);
      int origPos = firstPosIncAtt.getPositionIncrement()// position of origTok in the original stream
      int repPos=0; // curr position in replacement token stream
      int pos=0// current position in merged token stream

      for (int i=0; i<result.synonyms.length; i++) {
        Token repTok = result.synonyms[i];
        AttributeSource newTok = firstTok.cloneAttributes();
        CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
        OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
        PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);

        OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);

        newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
        newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
        repPos += repTok.getPositionIncrement();
        if (i==0) repPos=origPos;  // make position of first token equal to original

        // if necessary, insert original tokens and adjust position increment
        while (origTok != null && origPos <= repPos) {
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

      if (tok != null) {
        // clone ourselves.
        if (tok == this)
          tok = cloneAttributes();
        // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
        CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
        SlowSynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());

        if (subMap != null) {
          // recurse
          result = match(subMap);
        }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

  @Test public void testNameFilter() throws IOException {
    Reader in = new StringReader(input);
    Tokenizer tok = new SentenceTokenizer(in, detector);
    NameFilter nf = new NameFilter(tok, modelName, finder);

    CharTermAttribute cta;
    PositionIncrementAttribute pta;
    OffsetAttribute oa;
   
    int pass = 0;
   
    while (pass < 2) { // test reuse.
      int pos = 0;
      int lastStart = 0;
      int lastEnd   = 0;
     
      while (nf.incrementToken()) {
        cta = (CharTermAttribute) nf.getAttribute(CharTermAttribute.class);
        pta = (PositionIncrementAttribute) nf.getAttribute(PositionIncrementAttribute.class);
        oa  = (OffsetAttribute) nf.getAttribute(OffsetAttribute.class);
       
        System.err.println("'" + cta.toString() + "'");
        System.err.println(pta.toString());
        System.err.println(oa.toString());
        System.err.println("--- pass: " + pass);
       
        TestCase.assertEquals(tokenStrings[pos], cta.toString());
        TestCase.assertEquals(positionIncrements[pos], pta.getPositionIncrement());
       
        if (pta.getPositionIncrement() == 0) {
          TestCase.assertEquals(lastStart, oa.startOffset());
          TestCase.assertEquals(lastEnd, oa.endOffset());
        }
       
        if (!cta.toString().startsWith("NE_")) {
          TestCase.assertEquals(input.substring(oa.startOffset(), oa.endOffset()), cta.toString());
        }
       
        lastStart = oa.startOffset();
        lastEnd   = oa.endOffset();
       
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

   * @param analyzer The analyzer to use.
   */
  protected Set<String> getQueryTokenSet(String query, Analyzer analyzer) {
    final Set<String> tokens = new HashSet<String>();
    final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
    final CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
    try {
      tokenStream.reset();
      while (tokenStream.incrementToken()) {
        tokens.add(termAtt.toString());
      }
    } catch (IOException ioe) {
      throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
    }
    return tokens;
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

      String word = matcher.group(0);
      if (word.equals("AND") == false && word.equals("OR") == false) {
        try {
          stream = analyzer.reusableTokenStream("", new StringReader(word));
          // TODO: support custom attributes
          CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
          FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
          TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
          PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
          PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
          stream.reset();
          while (stream.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setStartOffset(matcher.start());
            token.setEndOffset(matcher.end());
            token.setFlags(flagsAtt.getFlags());
            token.setType(typeAtt.type());
            token.setPayload(payloadAtt.getPayload());
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

  private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<Token>();
    TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
    ts.reset();
    // TODO: support custom attributes
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
   
    while (ts.incrementToken()){
      Token token = new Token();
      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      token.setType(typeAtt.type());
      token.setFlags(flagsAtt.getFlags());
      token.setPayload(payloadAtt.getPayload());
      token.setPositionIncrement(posIncAtt.getPositionIncrement());
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

      HashMap<String,ArrayIntList> terms = new HashMap<String,ArrayIntList>();
      int numTokens = 0;
      int numOverlapTokens = 0;
      int pos = -1;
     
      CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      stream.reset();
      while (stream.incrementToken()) {
        String term = termAtt.toString();
        if (term.length() == 0) continue; // nothing to do
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

    args.put("modelDirectory", modelsDir.getAbsolutePath());
    factory.init(args);
   
    SentenceTokenizer tok = factory.create(new StringReader(inputString));
   
    CharTermAttribute cta;
    PositionIncrementAttribute pta;
    OffsetAttribute oa;

    int pass = 0;
   
    while (pass < 2) { // test reuse
      int pos = 0;
      int offset = 0;
     
      while (tok.incrementToken()) {
        cta = (CharTermAttribute) tok.getAttribute(CharTermAttribute.class);
        pta = (PositionIncrementAttribute) tok.getAttribute(PositionIncrementAttribute.class);
        oa  = (OffsetAttribute) tok.getAttribute(OffsetAttribute.class);
       
        System.err.println("'" + cta.toString() + "'");
        System.err.println(pta.toString());
        System.err.println(oa.toString());
        System.err.println("--- pass: " + pass);
       
        String expected = expectedStrings[pos];
        TestCase.assertEquals("Strings don't match", expected, cta.toString());
        TestCase.assertEquals("Positing increment is incorrect", 1, pta.getPositionIncrement());
        TestCase.assertEquals("Start offset is incorrect", offset, oa.startOffset());
        TestCase.assertEquals("End offset is incorrect",  offset + expected.length(), oa.endOffset());
       
        offset += expected.length() + 1; // space after end of sentence
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

      }

      if (!buffer.hasAttribute(CharTermAttribute.class)) {
        return new NoTokenFoundQueryNode();
      }
      final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

      if (numTokens == 0) {
        return new NoTokenFoundQueryNode();
      } else if (numTokens != 1) {
        // phrase query
        final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

        for (int i = 0; i < numTokens; i++) {
          String term = null;

          try {
            final boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.toString();

          } catch (final IOException e) {
            // safe to ignore, because we know the number of tokens
          }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.CharTermAttribute

      }

      if (!buffer.hasAttribute(CharTermAttribute.class)) {
        return new NoTokenFoundQueryNode();
      }
      final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

      if (numTokens == 0) {
        if (nbTwigs != 0) { // Twig special case
          return new WildcardNodeQueryNode();
        }
        return new NoTokenFoundQueryNode();
      }
      else if (numTokens == 1) {
        String term = null;
        try {
          boolean hasNext;
          hasNext = buffer.incrementToken();
          assert hasNext == true;
          term = termAtt.toString();
        } catch (final IOException e) {
          // safe to ignore, because we know the number of tokens
        }
        fieldNode.setText(term);
        return fieldNode;
      }
      else {
        // no phrase query:
        final LinkedList<QueryNode> children = new LinkedList<QueryNode>();

        int position = -1;

        for (int i = 0; i < numTokens; i++) {
          String term = null;
          final int positionIncrement = 1;

          try {
            final boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.toString();

          } catch (final IOException e) {
            // safe to ignore, because we know the number of tokens
          }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.