Examples of org.apache.lucene.analysis.CharArraySet$CharArraySetIterator

org.apache.lucene.analysis.CharArraySet
The Iterator for this set. Strings are constructed on the fly, so use nextCharArray for more efficient access.

      boolean ignoreCase = false;
      String ignoreStr = args.get(KEEP_IGNORE_CASE);
      if ("true".equalsIgnoreCase(ignoreStr)) {
        ignoreCase = true;
      }
      keep = new CharArraySet(10, ignoreCase);
      while (st.hasMoreTokens()) {
        k = st.nextToken().trim();
        keep.add(k.toCharArray());
      }
    }

View Full Code Here

        boolean ignoreCase = settings.getAsBoolean("ignore_case", false);
        Set<String> rules = Analysis.getWordSet(env, settings, "keywords");
        if (rules == null) {
            throw new ElasticSearchIllegalArgumentException("keyword filter requires either `keywords` or `keywords_path` to be configured");
        }
        keywordLookup = new CharArraySet(Version.LUCENE_32, rules, ignoreCase);
    }

View Full Code Here

      try {
        File protectedWordFiles = new File(wordFiles);
        if (protectedWordFiles.exists()) {
          List<String> wlist = loader.getLines(wordFiles);
          //This cast is safe in Lucene
          protectedWords = new CharArraySet(wlist, false);//No need to go through StopFilter as before, since it just uses a List internally
        } else  {
          List<String> files = StrUtils.splitFileNames(wordFiles);
          for (String file : files) {
            List<String> wlist = loader.getLines(file.trim());
            if (protectedWords == null)
              protectedWords = new CharArraySet(wlist, false);
            else
              protectedWords.addAll(wlist);
          }
        }
      } catch (IOException e) {

View Full Code Here

      boolean ignoreCase) {
    super(input);
    if (commonWords instanceof CharArraySet) {
      this.commonWords = (CharArraySet) commonWords;
    } else {
      this.commonWords = new CharArraySet(commonWords.size(), ignoreCase);
      this.commonWords.addAll(commonWords);
    }
    init();
  }

View Full Code Here

   * @param ignoreCase If true, all words are lower cased first.
   * @return a Set containing the words
   */
  public static final CharArraySet makeCommonSet(String[] commonWords,
      boolean ignoreCase) {
    CharArraySet commonSet = new CharArraySet(commonWords.length, ignoreCase);
    commonSet.addAll(Arrays.asList(commonWords));
    return commonSet;
  }

View Full Code Here

      try {
        File protectedWordFiles = new File(wordFiles);
        if (protectedWordFiles.exists()) {
          List<String> wlist = loader.getLines(wordFiles);
          //This cast is safe in Lucene
          protectedWords = new CharArraySet(wlist, false);//No need to go through StopFilter as before, since it just uses a List internally
        } else  {
          List<String> files = StrUtils.splitFileNames(wordFiles);
          for (String file : files) {
            List<String> wlist = loader.getLines(file.trim());
            if (protectedWords == null)
              protectedWords = new CharArraySet(wlist, false);
            else
              protectedWords.addAll(wlist);
          }
        }
      } catch (IOException e) {

View Full Code Here

    if (stopWordFiles != null) {
      try {
        List<String> files = StrUtils.splitFileNames(stopWordFiles);
          if (stopWords == null && files.size() > 0){
            //default stopwords list has 35 or so words, but maybe don't make it that big to start
            stopWords = new CharArraySet(files.size() * 10, ignoreCase);
          }
          for (String file : files) {
            List<String> wlist = loader.getLines(file.trim());
            //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call
            stopWords.addAll(StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), ignoreCase));

View Full Code Here

    ignoreCase = getBoolean("ignoreCase", false);
    if (wordFiles != null) {
      try {
        List<String> files = StrUtils.splitFileNames(wordFiles);
        if (words == null && files.size() > 0){
          words = new CharArraySet(files.size() * 10, ignoreCase);
        }
        for (String file : files) {
          List<String> wlist = loader.getLines(file.trim());
          //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call
          words.addAll(StopFilter.makeStopSet((String[]) wlist.toArray(new String[0]), ignoreCase));

View Full Code Here

  /**
   * Set the keep word list.
   * NOTE: if ignoreCase==true, the words are expected to be lowercase
   */
  public void setWords(Set<String> words) {
    this.words = new CharArraySet(words, ignoreCase);
  }

View Full Code Here

    if (commonWordFiles != null) {
      try {
        List<String> files = StrUtils.splitFileNames(commonWordFiles);
          if (commonWords == null && files.size() > 0){
            //default stopwords list has 35 or so words, but maybe don't make it that big to start
            commonWords = new CharArraySet(files.size() * 10, ignoreCase);
          }
          for (String file : files) {
            List<String> wlist = loader.getLines(file.trim());
            //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call
            commonWords.addAll(CommonGramsFilter.makeCommonSet((String[])wlist.toArray(new String[0]), ignoreCase));

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.analysis.CharArraySet$CharArraySetIterator

com.github.le11.nls.lucene.TypeAwareStopFilter

com.github.le11.nls.solr.TypeAwareStopFilterFactory

de.jetwick.solrplugin.TWordDelimiterFilterFactory

edu.wiki.index.WikipediaAnalyzer

org.apache.lucene.analysis.ar.TestArabicAnalyzer

org.apache.lucene.analysis.ar.TestArabicStemFilter

org.apache.lucene.analysis.bg.TestBulgarianAnalyzer

org.apache.lucene.analysis.bg.TestBulgarianStemmer

org.apache.lucene.analysis.br.TestBrazilianStemmer

org.apache.lucene.analysis.ca.TestCatalanAnalyzer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.