Examples of TermToBytesRefAttribute


Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e, "disableCoord", false));
    bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0));
    try {
      TokenStream ts = analyzer.tokenStream(fieldName, text);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      Term term = null;
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        term = new Term(fieldName, BytesRef.deepCopyOf(bytes));
        bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
      }
      ts.end();
      ts.close();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    String text = DOMUtils.getNonBlankTextOrFail(e);
    String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");

    try {
      TokenStream ts = analyzer.tokenStream(fieldName, text);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        terms.add(BytesRef.deepCopyOf(bytes));
      }
      ts.end();
      ts.close();
    }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    String value = DOMUtils.getNonBlankTextOrFail(e);

    try {
      List<SpanQuery> clausesList = new ArrayList<SpanQuery>();
      TokenStream ts = analyzer.tokenStream(fieldName, value);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
        clausesList.add(stq);
      }
      ts.end();
      ts.close();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName,
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
     
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

   *  automaton where arcs are bytes from each term. */
  public Automaton toAutomaton(TokenStream in) throws IOException {
    final Automaton a = new Automaton();
    boolean deterministic = true;

    final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
    final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
    final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);

    final BytesRef term = termBytesAtt.getBytesRef();

    in.reset();

    // Only temporarily holds states ahead of our current
    // position:

    final RollingBuffer<Position> positions = new Positions();

    int pos = -1;
    Position posData = null;
    int maxOffset = 0;
    while (in.incrementToken()) {
      int posInc = posIncAtt.getPositionIncrement();
      if (!preservePositionIncrements && posInc > 1) {
        posInc = 1;
      }
      assert pos > -1 || posInc > 0;

      if (posInc > 0) {

        // New node:
        pos += posInc;

        posData = positions.get(pos);
        assert posData.leaving == null;

        if (posData.arriving == null) {
          // No token ever arrived to this position
          if (pos == 0) {
            // OK: this is the first token
            posData.leaving = a.getInitialState();
          } else {
            // This means there's a hole (eg, StopFilter
            // does this):
            posData.leaving = new State();
            addHoles(a.getInitialState(), positions, pos);
          }
        } else {
          posData.leaving = new State();
          posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
          if (posInc > 1) {
            // A token spanned over a hole; add holes
            // "under" it:
            addHoles(a.getInitialState(), positions, pos);
          }
        }
        positions.freeBefore(pos);
      } else {
        // note: this isn't necessarily true. its just that we aren't surely det.
        // we could optimize this further (e.g. buffer and sort synonyms at a position)
        // but thats probably overkill. this is cheap and dirty
        deterministic = false;
      }

      final int endPos = pos + posLengthAtt.getPositionLength();

      termBytesAtt.fillBytesRef();
      final BytesRef term2 = changeToken(term);
      final Position endPosData = positions.get(endPos);
      if (endPosData.arriving == null) {
        endPosData.arriving = new State();
      }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

  protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
    List<BytesRef> bytesRefs = new ArrayList<BytesRef>();

    TokenStream tokenStream = analyzer.tokenStream(field, text);
    TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);

    BytesRef bytesRef = termAttribute.getBytesRef();

    tokenStream.reset();
   
    while (tokenStream.incrementToken()) {
      termAttribute.fillBytesRef();
      bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
    }

    tokenStream.end();
    tokenStream.close();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      throws Exception {
    TokenStream ts1 = a1.tokenStream("bogus", text);
    TokenStream ts2 = a2.tokenStream("bogus", text);
    ts1.reset();
    ts2.reset();
    TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
    TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
    assertTrue(ts1.incrementToken());
    assertTrue(ts2.incrementToken());
    BytesRef bytes1 = termAtt1.getBytesRef();
    BytesRef bytes2 = termAtt2.getBytesRef();
    termAtt1.fillBytesRef();
    termAtt2.fillBytesRef();
    assertEquals(bytes1, bytes2);
    assertFalse(ts1.incrementToken());
    assertFalse(ts2.incrementToken());
    ts1.close();
    ts2.close();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text");
      p.initCause(e);
      throw p;
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermToBytesRefAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;

    buffer.reset();

    if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
      termAtt = buffer.getAttribute(TermToBytesRefAttribute.class);
    }
    if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
      posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
    }

    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    boolean hasMoreTokens = false;
    if (termAtt != null) {
      try {
        hasMoreTokens = buffer.incrementToken();
        while (hasMoreTokens) {
          numTokens++;
          int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
          if (positionIncrement != 0) {
            positionCount += positionIncrement;
          } else {
            severalTokensAtSamePosition = true;
          }
          hasMoreTokens = buffer.incrementToken();
        }
      } catch (IOException e) {
        // ignore
      }
    }
    try {
      // rewind the buffer stream
      buffer.reset();

      // close original stream - all tokens buffered
      source.close();
    }
    catch (IOException e) {
      ParseException p = new ParseException("Cannot close TokenStream analyzing query text");
      p.initCause(e);
      throw p;
    }

    BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();

    if (numTokens == 0)
      return null;
    else if (numTokens == 1) {
      try {
        boolean hasNext = buffer.incrementToken();
        assert hasNext == true;
        termAtt.fillBytesRef();
      } catch (IOException e) {
        // safe to ignore, because we know the number of tokens
      }
      return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
    } else {
      if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
        if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
          // no phrase query:
         
          if (positionCount == 1) {
            // simple case: only one position, with synonyms
            BooleanQuery q = newBooleanQuery(true);
            for (int i = 0; i < numTokens; i++) {
              try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                termAtt.fillBytesRef();
              } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
              }
              Query currentQuery = newTermQuery(
                  new Term(field, BytesRef.deepCopyOf(bytes)));
              q.add(currentQuery, BooleanClause.Occur.SHOULD);
            }
            return q;
          } else {
            // multiple positions
            BooleanQuery q = newBooleanQuery(false);
            final BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
            Query currentQuery = null;
            for (int i = 0; i < numTokens; i++) {
              try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                termAtt.fillBytesRef();
              } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
              }
              if (posIncrAtt != null && posIncrAtt.getPositionIncrement() == 0) {
                if (!(currentQuery instanceof BooleanQuery)) {
                  Query t = currentQuery;
                  currentQuery = newBooleanQuery(true);
                  ((BooleanQuery)currentQuery).add(t, BooleanClause.Occur.SHOULD);
                }
                ((BooleanQuery)currentQuery).add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD);
              } else {
                if (currentQuery != null) {
                  q.add(currentQuery, occur);
                }
                currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
              }
            }
            q.add(currentQuery, occur);
            return q;
          }
        }
        else {
          // phrase query:
          MultiPhraseQuery mpq = newMultiPhraseQuery();
          mpq.setSlop(phraseSlop);
          List<Term> multiTerms = new ArrayList<Term>();
          int position = -1;
          for (int i = 0; i < numTokens; i++) {
            int positionIncrement = 1;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              termAtt.fillBytesRef();
              if (posIncrAtt != null) {
                positionIncrement = posIncrAtt.getPositionIncrement();
              }
            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }

            if (positionIncrement > 0 && multiTerms.size() > 0) {
              if (enablePositionIncrements) {
                mpq.add(multiTerms.toArray(new Term[0]),position);
              } else {
                mpq.add(multiTerms.toArray(new Term[0]));
              }
              multiTerms.clear();
            }
            position += positionIncrement;
            multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes)));
          }
          if (enablePositionIncrements) {
            mpq.add(multiTerms.toArray(new Term[0]),position);
          } else {
            mpq.add(multiTerms.toArray(new Term[0]));
          }
          return mpq;
        }
      }
      else {
        PhraseQuery pq = newPhraseQuery();
        pq.setSlop(phraseSlop);
        int position = -1;

        for (int i = 0; i < numTokens; i++) {
          int positionIncrement = 1;

          try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            termAtt.fillBytesRef();
            if (posIncrAtt != null) {
              positionIncrement = posIncrAtt.getPositionIncrement();
            }
          } catch (IOException e) {
            // safe to ignore, because we know the number of tokens
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      source.reset();
    } catch (IOException e) {
      throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
    }
     
    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();

    try {
      if (!source.incrementToken())
        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
    } catch (IOException e) {
      throw new RuntimeException("error analyzing range part: " + part, e);
    }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      String s = _TestUtil.randomRealisticUnicodeString(random());
      if (other != null && s.equals(other)) {
        continue;
      }
      final TokenStream ts = a.tokenStream("foo", s);
      final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
      final BytesRef termBytes = termAtt.getBytesRef();
      ts.reset();

      int count = 0;
      boolean changed = false;

      while(ts.incrementToken()) {
        termAtt.fillBytesRef();
        if (count == 0 && !termBytes.utf8ToString().equals(s)) {
          // The value was changed during analysis.  Keep iterating so the
          // tokenStream is exhausted.
          changed = true;
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.