Package org.elasticsearch.index.analysis

Examples of org.elasticsearch.index.analysis.TokenFilterFactory


    @Test
    public void test() throws IOException {
        AnalysisService analysisService = createAnalysisService();


        TokenFilterFactory tokenFilter = analysisService.tokenFilter("decomp");
        MatcherAssert.assertThat(tokenFilter, Matchers.instanceOf(DecompoundTokenFilterFactory.class));

        String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet";

        String[] expected = {
            "Die",
            "Die",
            "Jahresfeier",
            "Jahr",
            "feier",
            "der",
            "der",
            "Rechtsanwaltskanzleien",
            "Recht",
            "anwalt",
            "kanzlei",
            "auf",
            "auf",
            "dem",
            "dem",
            "Donaudampfschiff",
            "Donau",
            "dampf",
            "schiff",
            "hat",
            "hat",
            "viel",
            "viel",
            "Ökosteuer",
            "Ökosteuer",
            "gekostet",
            "gekosten"
        };

        Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, new StringReader(source));

        assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);

    }
View Full Code Here


    @Test
    public void test() throws IOException {
        AnalysisService analysisService = createAnalysisService();

        TokenFilterFactory tokenFilter = analysisService.tokenFilter("umlaut");
        MatcherAssert.assertThat(tokenFilter, Matchers.instanceOf(GermanNormalizationFilterFactory.class));

        String source = "Ein schöner Tag in Köln im Café an der Straßenecke";

        String[] expected = {
            "Ein",
            "schoner",
            "Tag",
            "in",
            "Koln",
            "im",
            "Café",
            "an",
            "der",
            "Strassenecke"
        };

        Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, new StringReader(source));

        assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);

    }
View Full Code Here

                     .putArray("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
                     .build();

            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            {
                TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_default");
                String source = "the quick brown is a fox Or noT";
                String[] expected = new String[] { "the", "quick", "brown", "is", "a", "fox", "Or", "noT" };
                Tokenizer tokenizer = new WhitespaceTokenizer();
                tokenizer.setReader(new StringReader(source));
                assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
            }
        }

        {
            Settings settings = ImmutableSettings.settingsBuilder().put("index.analysis.filter.common_grams_default.type", "common_grams")
                     .put("index.analysis.filter.common_grams_default.query_mode", false)
                     .putArray("index.analysis.filter.common_grams_default.common_words", "chromosome", "protein")
                     .build();
            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            {
                TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_default");
                String source = "the quick brown is a fox Or noT";
                String[] expected = new String[] { "the", "quick", "brown", "is", "a", "fox", "Or", "noT" };
                Tokenizer tokenizer = new WhitespaceTokenizer();
                tokenizer.setReader(new StringReader(source));
                assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
            }
        }
    }
View Full Code Here

            Settings settings = ImmutableSettings.settingsBuilder().put("index.analysis.filter.common_grams_1.type", "common_grams")
                    .put("index.analysis.filter.common_grams_1.ignore_case", true)
                    .putArray("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
                    .build();
            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_1");
            String source = "the quick brown is a fox or noT";
            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "fox_or", "or", "or_noT", "noT" };
            Tokenizer tokenizer = new WhitespaceTokenizer();
            tokenizer.setReader(new StringReader(source));
            assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
        }
        {
            Settings settings = ImmutableSettings.settingsBuilder().put("index.analysis.filter.common_grams_2.type", "common_grams")
                    .put("index.analysis.filter.common_grams_2.ignore_case", false)
                    .putArray("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
                    .build();
            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_2");
            String source = "the quick brown is a fox or why noT";
            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "or", "why", "why_noT", "noT" };
            Tokenizer tokenizer = new WhitespaceTokenizer();
            tokenizer.setReader(new StringReader(source));
            assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
        }
        {
            Settings settings = ImmutableSettings.settingsBuilder().put("index.analysis.filter.common_grams_3.type", "common_grams")
                    .putArray("index.analysis.filter.common_grams_3.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
                    .build();
            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_3");
            String source = "the quick brown is a fox Or noT";
            String[] expected = new String[] { "the", "the_quick", "quick", "brown", "brown_is", "is", "is_a", "a", "a_fox", "fox", "Or", "noT" };
            Tokenizer tokenizer = new WhitespaceTokenizer();
            tokenizer.setReader(new StringReader(source));
            assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
        }
    }
View Full Code Here

                    .put("index.analysis.filter.common_grams_1.query_mode", true)
                    .putArray("index.analysis.filter.common_grams_1.common_words", "the", "Or", "Not", "a", "is", "an", "they", "are")
                    .put("index.analysis.filter.common_grams_1.ignore_case", true)
                    .build();
            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_1");
            String source = "the quick brown is a fox or noT";
            String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox_or", "or_noT" };
            Tokenizer tokenizer = new WhitespaceTokenizer();
            tokenizer.setReader(new StringReader(source));
            assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
        }
        {
            Settings settings = ImmutableSettings.settingsBuilder().put("index.analysis.filter.common_grams_2.type", "common_grams")
                    .put("index.analysis.filter.common_grams_2.query_mode", true)
                    .putArray("index.analysis.filter.common_grams_2.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
                    .put("index.analysis.filter.common_grams_2.ignore_case", false)
                    .build();
            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_2");
            String source = "the quick brown is a fox or why noT";
            String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" };
            Tokenizer tokenizer = new WhitespaceTokenizer();
            tokenizer.setReader(new StringReader(source));
            assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
        }
        {
            Settings settings = ImmutableSettings.settingsBuilder().put("index.analysis.filter.common_grams_3.type", "common_grams")
                    .put("index.analysis.filter.common_grams_3.query_mode", true)
                    .putArray("index.analysis.filter.common_grams_3.common_words", "the", "Or", "noT", "a", "is", "an", "they", "are")
                    .build();
            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_3");
            String source = "the quick brown is a fox or why noT";
            String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "or", "why_noT" };
            Tokenizer tokenizer = new WhitespaceTokenizer();
            tokenizer.setReader(new StringReader(source));
            assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
        }
        {
            Settings settings = ImmutableSettings.settingsBuilder().put("index.analysis.filter.common_grams_4.type", "common_grams")
                    .put("index.analysis.filter.common_grams_4.query_mode", true)
                    .putArray("index.analysis.filter.common_grams_4.common_words", "the", "or", "not", "a", "is", "an", "they", "are")
                    .build();
            AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
            TokenFilterFactory tokenFilter = analysisService.tokenFilter("common_grams_4");
            String source = "the quick brown is a fox Or noT";
            String[] expected = new String[] { "the_quick", "quick", "brown_is", "is_a", "a_fox", "fox", "Or", "noT" };
            Tokenizer tokenizer = new WhitespaceTokenizer();
            tokenizer.setReader(new StringReader(source));
            assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
        }
    }
View Full Code Here

TOP

Related Classes of org.elasticsearch.index.analysis.TokenFilterFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.