Examples of TermToBytesRefAttribute


Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      throws Exception {
    TokenStream ts1 = a1.tokenStream("bogus", text);
    TokenStream ts2 = a2.tokenStream("bogus", text);
    ts1.reset();
    ts2.reset();
    TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
    TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
    assertTrue(ts1.incrementToken());
    assertTrue(ts2.incrementToken());
    BytesRef bytes1 = termAtt1.getBytesRef();
    BytesRef bytes2 = termAtt2.getBytesRef();
    termAtt1.fillBytesRef();
    termAtt2.fillBytesRef();
    assertEquals(bytes1, bytes2);
    assertFalse(ts1.incrementToken());
    assertFalse(ts2.incrementToken());
    ts1.close();
    ts2.close();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    // and ensure they are the same as the ones we produced in serial fashion.

    for (int i = 0; i < numTestPoints; i++) {
      String term = TestUtil.randomSimpleString(random());
      try (TokenStream ts = analyzer.tokenStream("fake", term)) {
        TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();
        ts.reset();
        assertTrue(ts.incrementToken());
        termAtt.fillBytesRef();
        // ensure we make a copy of the actual bytes too
        map.put(term, BytesRef.deepCopyOf(bytes));
        assertFalse(ts.incrementToken());
        ts.end();
      }
    }
   
    Thread threads[] = new Thread[numThreads];
    for (int i = 0; i < numThreads; i++) {
      threads[i] = new Thread() {
        @Override
        public void run() {
          try {
            for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
              String term = mapping.getKey();
              BytesRef expected = mapping.getValue();
              try (TokenStream ts = analyzer.tokenStream("fake", term)) {
                TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
                BytesRef bytes = termAtt.getBytesRef();
                ts.reset();
                assertTrue(ts.incrementToken());
                termAtt.fillBytesRef();
                assertEquals(expected, bytes);
                assertFalse(ts.incrementToken());
                ts.end();
              }
            }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName,
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, -1, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
     
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    TokenStream source = null;
    try {
      source = analyzerIn.tokenStream(field, part);
      source.reset();
     
      TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();

      if (!source.incrementToken())
        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
      source.end();
      return BytesRef.deepCopyOf(bytes);
    } catch (IOException e) {
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      throw new IllegalArgumentException("this suggester doesn't support contexts");
    }

    TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
    try {
      TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
      PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
      PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
      ts.reset();
     
      BytesRef[] lastTokens = new BytesRef[grams];
      //System.out.println("lookup: key='" + key + "'");
     
      // Run full analysis, but save only the
      // last 1gram, last 2gram, etc.:
      BytesRef tokenBytes = termBytesAtt.getBytesRef();
      int maxEndOffset = -1;
      boolean sawRealToken = false;
      while(ts.incrementToken()) {
        termBytesAtt.fillBytesRef();
        sawRealToken |= tokenBytes.length > 0;
        // TODO: this is somewhat iffy; today, ShingleFilter
        // sets posLen to the gram count; maybe we should make
        // a separate dedicated att for this?
        int gramCount = posLenAtt.getPositionLength();
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      String s = TestUtil.randomRealisticUnicodeString(random());
      if (other != null && s.equals(other)) {
        continue;
      }
      try (TokenStream ts = a.tokenStream("foo", s)) {
        final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
        final BytesRef termBytes = termAtt.getBytesRef();
        ts.reset();

        int count = 0;
        boolean changed = false;

        while(ts.incrementToken()) {
          termAtt.fillBytesRef();
          if (count == 0 && !termBytes.utf8ToString().equals(s)) {
            // The value was changed during analysis.  Keep iterating so the
            // tokenStream is exhausted.
            changed = true;
          }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

    // and ensure they are the same as the ones we produced in serial fashion.

    for (int i = 0; i < numTestPoints; i++) {
      String term = _TestUtil.randomSimpleString(random());
      TokenStream ts = analyzer.tokenStream("fake", term);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      assertTrue(ts.incrementToken());
      termAtt.fillBytesRef();
      // ensure we make a copy of the actual bytes too
      map.put(term, BytesRef.deepCopyOf(bytes));
    }
   
    Thread threads[] = new Thread[numThreads];
    for (int i = 0; i < numThreads; i++) {
      threads[i] = new Thread() {
        @Override
        public void run() {
          try {
            for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
              String term = mapping.getKey();
              BytesRef expected = mapping.getValue();
              TokenStream ts = analyzer.tokenStream("fake", term);
              TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
              BytesRef bytes = termAtt.getBytesRef();
              ts.reset();
              assertTrue(ts.incrementToken());
              termAtt.fillBytesRef();
              assertEquals(expected, bytes);
            }
          } catch (IOException e) {
            throw new RuntimeException(e);
          }
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

     
      final TokenStream stream = field.tokenStream(analyzer);
      // reset the TokenStream to the first token
      stream.reset();

      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      while(stream.incrementToken()) {
        termAtt.fillBytesRef();
        tokenCount++;
      }
    }
    totalTokenCount += tokenCount;
    return tokenCount;
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName,
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
     
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;
View Full Code Here

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

  protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
    List<BytesRef> bytesRefs = new ArrayList<BytesRef>();

    TokenStream tokenStream = analyzer.tokenStream(field, text);
    TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);

    BytesRef bytesRef = termAttribute.getBytesRef();

    tokenStream.reset();
   
    while (tokenStream.incrementToken()) {
      termAttribute.fillBytesRef();
      bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
    }

    tokenStream.end();
    tokenStream.close();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.