Examples of TermToBytesRefAttribute

org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute
must call termAtt.fillBytesRef() before doing something with the bytes. // this encodes the term value (internally it might be a char[], etc) into the bytes. int hashCode = termAtt.fillBytesRef(); if (isInteresting(bytes)) { // because the bytes are reused by the attribute (like CharTermAttribute's char[] buffer), // you should make a copy if you need persistent access to the bytes, otherwise they will // be rewritten across calls to incrementToken() doSomethingWith(new BytesRef(bytes)); } } ... @lucene.experimental This is a very expert API, please use{@link CharTermAttributeImpl} and its implementation of this methodfor UTF-8 terms.

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    for (int i = 0; i < numTestPoints; i++) {
      String term = _TestUtil.randomSimpleString(random());
      IOException priorException = null;
      TokenStream ts = analyzer.tokenStream("fake", term);
      try {
        TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();
        ts.reset();
        assertTrue(ts.incrementToken());
        termAtt.fillBytesRef();
        // ensure we make a copy of the actual bytes too
        map.put(term, BytesRef.deepCopyOf(bytes));
        assertFalse(ts.incrementToken());
        ts.end();
      } catch (IOException e) {
        priorException = e;
      } finally {
        IOUtils.closeWhileHandlingException(priorException, ts);
      }
    }
    
    Thread threads[] = new Thread[numThreads];
    for (int i = 0; i < numThreads; i++) {
      threads[i] = new Thread() {
        @Override
        public void run() {
          try {
            for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
              String term = mapping.getKey();
              BytesRef expected = mapping.getValue();
              IOException priorException = null;
              TokenStream ts = analyzer.tokenStream("fake", term);
              try {
                TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
                BytesRef bytes = termAtt.getBytesRef();
                ts.reset();
                assertTrue(ts.incrementToken());
                termAtt.fillBytesRef();
                assertEquals(expected, bytes);
                assertFalse(ts.incrementToken());
                ts.end();
              } catch (IOException e) {
                priorException = e;

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      throws Exception {
    TokenStream ts1 = a1.tokenStream("bogus", text);
    TokenStream ts2 = a2.tokenStream("bogus", text);
    ts1.reset();
    ts2.reset();
    TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
    TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
    assertTrue(ts1.incrementToken());
    assertTrue(ts2.incrementToken());
    BytesRef bytes1 = termAtt1.getBytesRef();
    BytesRef bytes2 = termAtt2.getBytesRef();
    termAtt1.fillBytesRef();
    termAtt2.fillBytesRef();
    assertEquals(bytes1, bytes2);
    assertFalse(ts1.incrementToken());
    assertFalse(ts2.incrementToken());
    ts1.close();
    ts2.close();

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");


    TokenStream ts = null;
    try {
      ts = analyzer.tokenStream(fieldName, text);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        terms.add(BytesRef.deepCopyOf(bytes));
      }
      ts.end();
    }
    catch (IOException ioe) {

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    List<SpanQuery> clausesList = new ArrayList<SpanQuery>();


    TokenStream ts = null;
    try {
      ts = analyzer.tokenStream(fieldName, value);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
        clausesList.add(stq);
      }
      ts.end();
      SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute


  /** Retrieve suggestions. */
  public List<LookupResult> lookup(final CharSequence key, int num) throws IOException {
    TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
    try {
      TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
      PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
      ts.reset();
      
      BytesRef[] lastTokens = new BytesRef[grams];
      //System.out.println("lookup: key='" + key + "'");
      
      // Run full analysis, but save only the
      // last 1gram, last 2gram, etc.:
      BytesRef tokenBytes = termBytesAtt.getBytesRef();
      int maxEndOffset = -1;
      boolean sawRealToken = false;
      while(ts.incrementToken()) {
        termBytesAtt.fillBytesRef();
        sawRealToken |= tokenBytes.length > 0;
        // TODO: this is somewhat iffy; today, ShingleFilter
        // sets posLen to the gram count; maybe we should make
        // a separate dedicated att for this?
        int gramCount = posLenAtt.getPositionLength();

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute


      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName, 
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
      
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    TokenStream source = null;
    try {
      source = analyzerIn.tokenStream(field, part);
      source.reset();
      
      TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();


      if (!source.incrementToken())
        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
      source.end();
      return BytesRef.deepCopyOf(bytes);
    } catch (IOException e) {

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

  protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
    List<BytesRef> bytesRefs = new ArrayList<BytesRef>();


    TokenStream tokenStream = analyzer.tokenStream(field, text);
    try {
      TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);


      BytesRef bytesRef = termAttribute.getBytesRef();


      tokenStream.reset();
    
      while (tokenStream.incrementToken()) {
        termAttribute.fillBytesRef();
        bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
      }


      tokenStream.end();
    } finally {

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

  static final int ivalue = 123456;


  public void testLongStream() throws Exception {
    final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
    // use getAttribute to test if attributes really exist, if not an IAE will be throwed
    final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
    final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
    final BytesRef bytes = bytesAtt.getBytesRef();
    stream.reset();
    assertEquals(64, numericAtt.getValueSize());
    for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
      assertTrue("New token is available", stream.incrementToken());
      assertEquals("Shift value wrong", shift, numericAtt.getShift());
      final int hash = bytesAtt.fillBytesRef();
      assertEquals("Hash incorrect", bytes.hashCode(), hash);
      assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
      assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
      assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
    }

View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

  }


  public void testIntStream() throws Exception {
    final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
    // use getAttribute to test if attributes really exist, if not an IAE will be throwed
    final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
    final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
    final BytesRef bytes = bytesAtt.getBytesRef();
    stream.reset();
    assertEquals(32, numericAtt.getValueSize());
    for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
      assertTrue("New token is available", stream.incrementToken());
      assertEquals("Shift value wrong", shift, numericAtt.getShift());
      final int hash = bytesAtt.fillBytesRef();
      assertEquals("Hash incorrect", bytes.hashCode(), hash);
      assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
      assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
      assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
    }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.