Examples of ASCIIFoldingFilter

See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode The set of character conversions supported by this class is a superset of those supported by Lucene's {@link ISOLatin1AccentFilter} which stripsaccents from Latin1 characters. For example, 'à' will be replaced by 'a'.
  • org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter
    nicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf
  • Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
  • Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
  • Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
  • Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
  • Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
  • IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
  • Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
  • Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
  • General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
  • Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
  • Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
  • Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
  • Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
  • Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
  • Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode For example, 'à' will be replaced by 'a'.

  • Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

      public void testInvalidOffsets() throws Exception {
        Analyzer analyzer = new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
            TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
            filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
            return new TokenStreamComponents(tokenizer, filters);
          }
        };
        assertAnalyzesTo(analyzer, "mosfellsbær",
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

      public void testInvalidOffsets() throws Exception {
        Analyzer analyzer = new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
            TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
            filters = new EdgeNGramTokenFilter(Version.LUCENE_43, filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
            return new TokenStreamComponents(tokenizer, filters);
          }
        };
        assertAnalyzesTo(analyzer, "mosfellsbær",
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

        protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
            final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
            src.setMaxTokenLength(maxTokenLength);
    //        src.setReplaceInvalidAcronym(replaceInvalidAcronym);
            TokenStream tok = new StandardFilter(matchVersion, src);
            tok = new ASCIIFoldingFilter(tok);
            tok = new LowerCaseFilter(matchVersion, tok);
            tok = new StopFilter(matchVersion, tok, stopwords);
            return new TokenStreamComponents(src, tok);
    //        {
    //            @Override
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

      public void testInvalidOffset() throws Exception {
        Analyzer analyzer = new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
            TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
            filters = new WordTokenFilter(filters);
            return new TokenStreamComponents(tokenizer, filters);
          }
        };
       
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

      public void testInvalidOffsets() throws Exception {
        Analyzer analyzer = new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
            TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
            filters = new NGramTokenFilter(filters, 2, 2);
            return new TokenStreamComponents(tokenizer, filters);
          }
        };
        assertAnalyzesTo(analyzer, "mosfellsbær",
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

      public void testInvalidOffsets() throws Exception {
        Analyzer analyzer = new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
            TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
            filters = new EdgeNGramTokenFilter(filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
            return new TokenStreamComponents(tokenizer, filters);
          }
        };
        assertAnalyzesTo(analyzer, "mosfellsbær",
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

         * @return the {@link TokenStreamComponents} for this analyzer.
         */
        @Override
        protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
            final Tokenizer source = new StandardTokenizer(Geonet.LUCENE_VERSION, reader);
            ASCIIFoldingFilter asciiFoldingFilter = new ASCIIFoldingFilter(new LowerCaseFilter(Geonet.LUCENE_VERSION,
                    new StandardFilter(Geonet.LUCENE_VERSION, source)));

            if (this.stopwords != null && !this.stopwords.isEmpty()) {
                return new TokenStreamComponents(source, new StopFilter(Geonet.LUCENE_VERSION, asciiFoldingFilter, this.stopwords)) {
                    @Override
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

                @Override
                protected Analyzer.TokenStreamComponents createComponents(String fieldName, Reader reader) {
                    StandardTokenizer source = new StandardTokenizer(Version.LUCENE_45, reader);

                    TokenStream filter = new ASCIIFoldingFilter(new LemmagenFilter(
                            new LowerCaseFilter(TEST_VERSION_CURRENT,
                                    new StandardFilter(TEST_VERSION_CURRENT, source)), "mlteast-sk", TEST_VERSION_CURRENT));
                    return new Analyzer.TokenStreamComponents(source, filter);
                }
            };
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

    public final class ISOLatin1Analyzer extends Analyzer {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        StandardTokenizer tokenizer = new StandardTokenizer( TestConstants.getTargetLuceneVersion(), reader );
        TokenStream filter = new ASCIIFoldingFilter( tokenizer );
        return new TokenStreamComponents( tokenizer, filter );
      }
    View Full Code Here

    Examples of org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter

        }
      }
     
      @Override
      public ASCIIFoldingFilter create(TokenStream input) {
        return new ASCIIFoldingFilter(input, preserveOriginal);
      }
    View Full Code Here
    TOP
    Copyright © 2018 www.massapi.com. All rights reserved.
    All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.