Examples of ClassicAnalyzer

ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,as specified by UAX#29.
  • org.projectforge.lucene.ClassicAnalyzer
    s.apache.org/jira/browse/LUCENE-1068">LUCENE-1068) ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,as specified by UAX#29.

  • Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

      }

      public void testDomainNames() throws Exception {
        // Current lucene should not show the bug
        ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);

        // domain names
        assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
        //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
        // the following should be recognized as HOST:
        assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });

        // 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
        // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
        // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });

        // 2.4 should not show the bug. But, alas, it's also obsolete,
        // so we check latest released (Robert's gonna break this on 4.0 soon :) )
        a2 = new ClassicAnalyzer(Version.LUCENE_3_1);
        assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
      }
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

                        "<ALPHANUM>", "<NUM>", "<HOST>", "<NUM>", "<ALPHANUM>",
                        "<ALPHANUM>", "<HOST>"});
      }

      public void testJava14BWCompatibility() throws Exception {
        ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_3_0);
        assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
      }
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

       * Make sure we skip wicked long terms.
      */
      public void testWickedLongTerm() throws IOException {
        RAMDirectory dir = new RAMDirectory();
        IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
          TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));

        char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
        Arrays.fill(chars, 'x');
        Document doc = new Document();
        final String bigTerm = new String(chars);

        // This produces a too-long term:
        String contents = "abc xyz x" + bigTerm + " another term";
        doc.add(new TextField("content", contents, Field.Store.NO));
        writer.addDocument(doc);

        // Make sure we can add another normal document
        doc = new Document();
        doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
        writer.addDocument(doc);
        writer.close();

        IndexReader reader = IndexReader.open(dir);

        // Make sure all terms < max size were indexed
        assertEquals(2, reader.docFreq(new Term("content", "abc")));
        assertEquals(1, reader.docFreq(new Term("content", "bbb")));
        assertEquals(1, reader.docFreq(new Term("content", "term")));
        assertEquals(1, reader.docFreq(new Term("content", "another")));

        // Make sure position is still incremented when
        // massive term is skipped:
        DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
                                                                    MultiFields.getLiveDocs(reader),
                                                                    "content",
                                                                    new BytesRef("another"));
        assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
        assertEquals(1, tps.freq());
        assertEquals(3, tps.nextPosition());

        // Make sure the doc that has the massive term is in
        // the index:
        assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());

        reader.close();

        // Make sure we can add a document with exactly the
        // maximum length term, and search on that term:
        doc = new Document();
        doc.add(new TextField("content", bigTerm, Field.Store.NO));
        ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
        sa.setMaxTokenLength(100000);
        writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
        writer.addDocument(doc);
        writer.close();
        reader = IndexReader.open(dir);
        assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

        dir.close();
      }
     
      /** blast some random strings through the analyzer */
      public void testRandomStrings() throws Exception {
        checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER);
      }
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

      }
     
      /** blast some random large strings through the analyzer */
      public void testRandomHugeStrings() throws Exception {
        Random random = random();
        checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192);
      }
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

    public class TestClassicAnalyzer extends BaseTokenStreamTestCase {

      private Analyzer  a = new ClassicAnalyzer(TEST_VERSION_CURRENT);

      public void testMaxTermLength() throws Exception {
        ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
        sa.setMaxTokenLength(5);
        assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
      }
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

        sa.setMaxTokenLength(5);
        assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
      }

      public void testMaxTermLength2() throws Exception {
        ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
        assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"});
        sa.setMaxTokenLength(5);
       
        assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
      }
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

        assertAnalyzesTo(a, "Excite@Home", new String[]{"excite@home"});
      }

      public void testLucene1140() throws Exception {
        try {
          ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
          assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
        } catch (NullPointerException e) {
          fail("Should not throw an NPE and it did");
        }
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

      }

      public void testDomainNames() throws Exception {
        // Current lucene should not show the bug
        ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);

        // domain names
        assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
        //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
        // the following should be recognized as HOST:
        assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });

        // 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
        // a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
        // assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });

        // 2.4 should not show the bug. But, alas, it's also obsolete,
        // so we check latest released (Robert's gonna break this on 4.0 soon :) )
        a2 = new ClassicAnalyzer(Version.LUCENE_31);
        assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
      }
    View Full Code Here

    Examples of org.apache.lucene.analysis.standard.ClassicAnalyzer

                        "<ALPHANUM>", "<NUM>", "<HOST>", "<NUM>", "<ALPHANUM>",
                        "<ALPHANUM>", "<HOST>"});
      }

      public void testJava14BWCompatibility() throws Exception {
        ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
        assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
      }
    View Full Code Here
    TOP
    Copyright © 2018 www.massapi.com. All rights reserved.
    All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.