Examples of ArabicNormalizationFilter


Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter

    }
  }

  @Override
  public ArabicNormalizationFilter create(TokenStream input) {
    return new ArabicNormalizationFilter(input);
  }
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter

   *         {@link PersianNormalizationFilter} and Persian Stop words
   */
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new ArabicLetterTokenizer(reader);
    result = new LowerCaseFilter(result);
    result = new ArabicNormalizationFilter(result);
    /* additional persian-specific normalization */
    result = new PersianNormalizationFilter(result);
    /*
     * the order here is important: the stopword list is normalized with the
     * above!
 
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter

    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new ArabicLetterTokenizer(reader);
      streams.result = new LowerCaseFilter(streams.source);
      streams.result = new ArabicNormalizationFilter(streams.result);
      /* additional persian-specific normalization */
      streams.result = new PersianNormalizationFilter(streams.result);
      /*
       * the order here is important: the stopword list is normalized with the
       * above!
 
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter


  @Override
  public String stem(String token) {
    tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(token));
    TokenStream tokenStream = new ArabicStemFilter(new ArabicNormalizationFilter(tokenizer));
    CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
    tokenStream.clearAttributes();
    StringBuilder stemmed = new StringBuilder();
    try {
      while (tokenStream.incrementToken()) {
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter

      }
    }
   
    if (isStemming()) {
      tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(finalTokenized.toString().trim()));
      tokenStream = new ArabicStemFilter(new ArabicNormalizationFilter(tokenizer));
      CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
      tokenStream.clearAttributes();
      try {
        while (tokenStream.incrementToken()) {
          String curToken = termAtt.toString();
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter


  @Override
  public String stem(String token) {
    tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(token));
    TokenStream tokenStream = new ArabicStemFilter(new ArabicNormalizationFilter(tokenizer));
    CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
    tokenStream.clearAttributes();
    StringBuilder stemmed = new StringBuilder();
    try {
      while (tokenStream.incrementToken()) {
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter

      }
    }
   
    if (isStemming()) {
      tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(finalTokenized.toString().trim()));
      tokenStream = new ArabicStemFilter(new ArabicNormalizationFilter(tokenizer));
      CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
      tokenStream.clearAttributes();
      try {
        while (tokenStream.incrementToken()) {
          String curToken = termAtt.toString();
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter

    tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(text));
    TokenStream tokenStream = new LowerCaseFilter(Version.LUCENE_35, tokenizer);
    if (isStopwordRemoval) {
      tokenStream = new StopFilter( Version.LUCENE_35, tokenStream, (CharArraySet) ArabicAnalyzer.getDefaultStopSet());
    }
    tokenStream = new ArabicNormalizationFilter(tokenStream);
    if (isStemming) {
      tokenStream = new ArabicStemFilter(tokenStream);
    }

    CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter

  @Override
  public String stem(String token) {
    tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(token));
    TokenStream tokenStream = new LowerCaseFilter(Version.LUCENE_35, tokenizer);
    tokenStream = new ArabicNormalizationFilter(tokenStream);
    tokenStream = new ArabicStemFilter(tokenStream);

    CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
    tokenStream.clearAttributes();
    try {
View Full Code Here

Examples of org.apache.lucene.analysis.ar.ArabicNormalizationFilter

      source = new StandardTokenizer(matchVersion, reader);
    } else {
      source = new ArabicLetterTokenizer(matchVersion, reader);
    }
    TokenStream result = new LowerCaseFilter(matchVersion, source);
    result = new ArabicNormalizationFilter(result);
    /* additional persian-specific normalization */
    result = new PersianNormalizationFilter(result);
    /*
     * the order here is important: the stopword list is normalized with the
     * above!
 
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.