Examples of TermToBytesRefAttribute

org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute
must call termAtt.fillBytesRef() before doing something with the bytes. // this encodes the term value (internally it might be a char[], etc) into the bytes. int hashCode = termAtt.fillBytesRef(); if (isInteresting(bytes)) { // because the bytes are reused by the attribute (like CharTermAttribute's char[] buffer), // you should make a copy if you need persistent access to the bytes, otherwise they will // be rewritten across calls to incrementToken() doSomethingWith(new BytesRef(bytes)); } } ... @lucene.experimental This is a very expert API, please use{@link CharTermAttributeImpl} and its implementation of this methodfor UTF-8 terms.

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      throws Exception {
    TokenStream ts1 = a1.tokenStream("bogus", text);
    TokenStream ts2 = a2.tokenStream("bogus", text);
    ts1.reset();
    ts2.reset();
    TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
    TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
    assertTrue(ts1.incrementToken());
    assertTrue(ts2.incrementToken());
    BytesRef bytes1 = termAtt1.getBytesRef();
    BytesRef bytes2 = termAtt2.getBytesRef();
    termAtt1.fillBytesRef();
    termAtt2.fillBytesRef();
    assertEquals(bytes1, bytes2);
    assertFalse(ts1.incrementToken());
    assertFalse(ts2.incrementToken());
    ts1.close();
    ts2.close();

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    // and ensure they are the same as the ones we produced in serial fashion.


    for (int i = 0; i < numTestPoints; i++) {
      String term = TestUtil.randomSimpleString(random());
      try (TokenStream ts = analyzer.tokenStream("fake", term)) {
        TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();
        ts.reset();
        assertTrue(ts.incrementToken());
        termAtt.fillBytesRef();
        // ensure we make a copy of the actual bytes too
        map.put(term, BytesRef.deepCopyOf(bytes));
        assertFalse(ts.incrementToken());
        ts.end();
      }
    }
    
    Thread threads[] = new Thread[numThreads];
    for (int i = 0; i < numThreads; i++) {
      threads[i] = new Thread() {
        @Override
        public void run() {
          try {
            for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
              String term = mapping.getKey();
              BytesRef expected = mapping.getValue();
              try (TokenStream ts = analyzer.tokenStream("fake", term)) {
                TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
                BytesRef bytes = termAtt.getBytesRef();
                ts.reset();
                assertTrue(ts.incrementToken());
                termAtt.fillBytesRef();
                assertEquals(expected, bytes);
                assertFalse(ts.incrementToken());
                ts.end();
              }
            }

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute


      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName, 
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, -1, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
      
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    TokenStream source = null;
    try {
      source = analyzerIn.tokenStream(field, part);
      source.reset();
      
      TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();


      if (!source.incrementToken())
        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
      source.end();
      return BytesRef.deepCopyOf(bytes);
    } catch (IOException e) {

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      throw new IllegalArgumentException("this suggester doesn't support contexts");
    }


    TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
    try {
      TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
      PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
      ts.reset();
      
      BytesRef[] lastTokens = new BytesRef[grams];
      //System.out.println("lookup: key='" + key + "'");
      
      // Run full analysis, but save only the
      // last 1gram, last 2gram, etc.:
      BytesRef tokenBytes = termBytesAtt.getBytesRef();
      int maxEndOffset = -1;
      boolean sawRealToken = false;
      while(ts.incrementToken()) {
        termBytesAtt.fillBytesRef();
        sawRealToken |= tokenBytes.length > 0;
        // TODO: this is somewhat iffy; today, ShingleFilter
        // sets posLen to the gram count; maybe we should make
        // a separate dedicated att for this?
        int gramCount = posLenAtt.getPositionLength();

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      String s = TestUtil.randomRealisticUnicodeString(random());
      if (other != null && s.equals(other)) {
        continue;
      }
      try (TokenStream ts = a.tokenStream("foo", s)) {
        final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
        final BytesRef termBytes = termAtt.getBytesRef();
        ts.reset();


        int count = 0;
        boolean changed = false;


        while(ts.incrementToken()) {
          termAtt.fillBytesRef();
          if (count == 0 && !termBytes.utf8ToString().equals(s)) {
            // The value was changed during analysis.  Keep iterating so the
            // tokenStream is exhausted.
            changed = true;
          }

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    // and ensure they are the same as the ones we produced in serial fashion.


    for (int i = 0; i < numTestPoints; i++) {
      String term = _TestUtil.randomSimpleString(random());
      TokenStream ts = analyzer.tokenStream("fake", term);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      assertTrue(ts.incrementToken());
      termAtt.fillBytesRef();
      // ensure we make a copy of the actual bytes too
      map.put(term, BytesRef.deepCopyOf(bytes));
    }
    
    Thread threads[] = new Thread[numThreads];
    for (int i = 0; i < numThreads; i++) {
      threads[i] = new Thread() {
        @Override
        public void run() {
          try {
            for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
              String term = mapping.getKey();
              BytesRef expected = mapping.getValue();
              TokenStream ts = analyzer.tokenStream("fake", term);
              TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
              BytesRef bytes = termAtt.getBytesRef();
              ts.reset();
              assertTrue(ts.incrementToken());
              termAtt.fillBytesRef();
              assertEquals(expected, bytes);
            }
          } catch (IOException e) {
            throw new RuntimeException(e);
          }

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      
      final TokenStream stream = field.tokenStream(analyzer);
      // reset the TokenStream to the first token
      stream.reset();


      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      while(stream.incrementToken()) {
        termAtt.fillBytesRef();
        tokenCount++;
      }
    }
    totalTokenCount += tokenCount;
    return tokenCount;

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute


      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName, 
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
      
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute


  protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
    List<BytesRef> bytesRefs = new ArrayList<BytesRef>();


    TokenStream tokenStream = analyzer.tokenStream(field, text);
    TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);


    BytesRef bytesRef = termAttribute.getBytesRef();


    tokenStream.reset();
    
    while (tokenStream.incrementToken()) {
      termAttribute.fillBytesRef();
      bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
    }


    tokenStream.end();
    tokenStream.close();

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.