Examples of org.apache.lucene.analysis.Token.termLength()

org.apache.lucene.analysis.Token.termLength()
Return number of valid characters (length of the term) in the termBuffer array.


    Token token;
    List<String> coll = new ArrayList<String>();
    while ((token = ts.next()) != null) {
      char[] termBuffer = token.termBuffer();
      int termLen = token.termLength();
      String val = new String(termBuffer, 0, termLen);
      coll.add(val);
    }
    return coll.toArray(new String[coll.size()]);
  }

View Full Code Here

                                Fieldable[] fields = aDoc.getFieldables(FieldNames.PROPERTIES);
                                Token t = new Token();
                                for (Fieldable field : fields) {
                                    // assume properties fields use SingleTokenStream
                                    t = field.tokenStreamValue().next(t);
                                    String value = new String(t.termBuffer(), 0, t.termLength());
                                    if (value.startsWith(namePrefix)) {
                                        // extract value
                                        value = value.substring(namePrefix.length());
                                        // create new named value
                                        Path p = getRelativePath(state, propState);

View Full Code Here

                                Fieldable[] fields = aDoc.getFieldables(FieldNames.PROPERTIES);
                                Token t = new Token();
                                for (Fieldable field : fields) {
                                    // assume properties fields use SingleTokenStream
                                    t = field.tokenStreamValue().next(t);
                                    String value = new String(t.termBuffer(), 0, t.termLength());
                                    if (value.startsWith(namePrefix)) {
                                        // extract value
                                        value = value.substring(namePrefix.length());
                                        // create new named value
                                        Path p = getRelativePath(state, propState);

View Full Code Here


    if (t == null)
      return null;


    char[] buffer = t.termBuffer();
    final int bufferLength = t.termLength();
    final String type = t.type();


    if (type == APOSTROPHE_TYPE &&      // remove 's
  bufferLength >= 2 &&
        buffer[bufferLength-2] == '\'' &&

View Full Code Here

                        {
                           Fieldable field = fields[k];
                           // assume properties fields use
                           // SingleTokenStream
                           t = field.tokenStreamValue().next(t);
                           String value = new String(t.termBuffer(), 0, t.termLength());
                           if (value.startsWith(namePrefix))
                           {
                              // extract value
                              value = value.substring(namePrefix.length());
                              // create new named value

View Full Code Here

      writer.write('\t'); // edit: Inorder to match Hadoop standard
      // TextInputFormat
      Token token = new Token();
      while ((token = ts.next(token)) != null) {
        char[] termBuffer = token.termBuffer();
        int termLen = token.termLength();
        writer.write(termBuffer, 0, termLen);
        writer.write(' ');
      }
    } finally {
      IOUtils.quietClose(reader);

View Full Code Here


    Token token;
    List<String> coll = new ArrayList<String>();
    while ((token = ts.next()) != null) {
      char[] termBuffer = token.termBuffer();
      int termLen = token.termLength();
      String val = new String(termBuffer, 0, termLen);
      coll.add(val);
    }
    return coll.toArray(new String[coll.size()]);
  }

View Full Code Here

    if(!catMatch.equals("Unknown")){
      document = StringEscapeUtils.unescapeHtml(CLOSE_TEXT_TAG_PATTERN.matcher(OPEN_TEXT_TAG_PATTERN.matcher(document).replaceFirst("")).replaceAll(""));
      TokenStream stream = analyzer.tokenStream(catMatch, new StringReader(document));
      Token token = new Token();
      while((token = stream.next(token)) != null){
        contents.append(token.termBuffer(), 0, token.termLength()).append(' ');
      }
      output.collect(new Text(SPACE_NON_ALPHA_PATTERN.matcher(catMatch).replaceAll("_")), new Text(contents.toString()));
    }
  }

View Full Code Here

        Token t;
        StringBuilder sb = new StringBuilder();
        try {
            while ((t = psf.next()) != null) {
                sb.setLength(0);
                sb.append(t.termBuffer(), 0, t.termLength());
                //System.out.println(sb.toString());
                features.add(sb.toString());
            }
        } catch (IOException e) {
            // should never happen! We're reading a flippin' STRING!

View Full Code Here

        fields.add(field);
      } else {
        Token nextToken = tokenStream.next(new Token());
        while (nextToken != null) {
          String value = new String(nextToken.termBuffer(), 0,
              nextToken.termLength());
          field = new Field(fieldName, value, fieldStore, index);
          if (unique)
            field.setOmitTermFreqAndPositions(omitTF);
          fields.add(field);
          nextToken = tokenStream.next(nextToken);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.