Package org.tartarus.snowball.ext

Examples of org.tartarus.snowball.ext.EnglishStemmer


public class SnowballStemmerTest extends TamingTextTestJ4 {

  @Test
  public void test() throws NoSuchMethodException {
    //<start id="stemmer"/>
    EnglishStemmer english = new EnglishStemmer();

    String[] test = {"bank", "banks", "banking", "banker", "banked", "bankers"};//<co id="stemmer.co.test"/>
    String[] gold = {"bank", "bank", "bank", "banker", "bank", "banker"};//<co id="stemmer.co.gold"/>
    for (int i = 0; i < test.length; i++) {
      english.setCurrent(test[i]);//<co id="stemmer.co.set"/>
      english.stem();//<co id="stemmer.co.stem"/>
      System.out.println("English: " + english.getCurrent());
      assertTrue(english.getCurrent() + " is not equal to " + gold[i], english.getCurrent().equals(gold[i]) == true);
    }
    /*
<calloutlist>
<callout arearefs="stemmer.co.test"><para>Setup some tokens to be stemmed</para></callout>
<callout arearefs="stemmer.co.gold"><para>Define our expectations for results</para></callout>
View Full Code Here


import java.util.Collections;

public class EnglishPorterFilterFactoryTest extends BaseTokenTestCase {

  public void test() throws IOException {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    String[] gold = new String[test.length];
    for (int i = 0; i < test.length; i++) {
      stemmer.setCurrent(test[i]);
      stemmer.stem();
      gold[i] = stemmer.getCurrent();
    }

    EnglishPorterFilterFactory factory = new EnglishPorterFilterFactory();
    Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
View Full Code Here

    TokenStream stream = factory.create(tokenizer);
    assertTokenStreamContents(stream, gold);
  }

  public void testProtected() throws Exception {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    String[] gold = new String[test.length];
    for (int i = 0; i < test.length; i++) {
      if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) {
        stemmer.setCurrent(test[i]);
        stemmer.stem();
        gold[i] = stemmer.getCurrent();
      } else {
        gold[i] = test[i];
      }
    }
View Full Code Here

import java.util.Collections;

public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {

  public void test() throws IOException {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    String[] gold = new String[test.length];
    for (int i = 0; i < test.length; i++) {
      stemmer.setCurrent(test[i]);
      stemmer.stem();
      gold[i] = stemmer.getCurrent();
    }

    SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
    Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
    args.put("language", "English");
View Full Code Here

  /**
   * Tests the protected words mechanism of EnglishPorterFilterFactory
   */
  @Deprecated
  public void testProtectedOld() throws Exception {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    String[] gold = new String[test.length];
    for (int i = 0; i < test.length; i++) {
      if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) {
        stemmer.setCurrent(test[i]);
        stemmer.stem();
        gold[i] = stemmer.getCurrent();
      } else {
        gold[i] = test[i];
      }
    }

View Full Code Here

import java.util.Collections;

public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {

  public void test() throws IOException {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    StringBuilder gold = new StringBuilder();
    for (String aTest : test) {
      stemmer.setCurrent(aTest);
      stemmer.stem();
      gold.append(stemmer.getCurrent()).append(' ');
    }

    SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
    Map<String, String> args = new HashMap<String, String>();
    args.put("language", "English");
View Full Code Here

    String out = tsToString(factory.create(new IterTokenStream(test)));
    assertEquals(gold.toString().trim(), out);
  }

  public void testProtected() throws Exception {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    StringBuilder gold = new StringBuilder();
    for (int i = 0; i < test.length; i++) {
      if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) {
        stemmer.setCurrent(test[i]);
        stemmer.stem();
        gold.append(stemmer.getCurrent()).append(' ');
      } else {
        gold.append(test[i]).append(' ');
      }
    }
View Full Code Here

   */
  public static void main(String[] args) throws Exception {
    LuceneIndexerSearcher lis = new LuceneIndexerSearcher(false);
    IndexSearcher is = lis.getIndexSearcher();

    Stemmer stemmerTools = new Stemmer(new EnglishStemmer());
   
    //QueryParser qp = new Oscar3QueryParser("txt", new Oscar3Analyzer(), lis, false);
    //Query q = qp.parse("NaCl");
   
    String queryTerm = "lipid";
View Full Code Here

      double expected = scaleFactor * docFreq;
      double excess = df.getCount(s) - expected;
      score = excess / clusterSize;       
      if(score > threshold) scores.put(s, score);
    }
    Stemmer st = new Stemmer(new EnglishStemmer());
    Map<String,List<String>> stems = st.wordsToStems(df.getSet());
    for(String stem : stems.keySet()) {
      List<String> words = stems.get(stem);
      if(words.size() > 1) {
        BooleanQuery bq = new BooleanQuery(true);
View Full Code Here

      }
     

      clusterFiles.add(new File(ir.document(i).getField("filename").stringValue().replaceAll("markedup", "source")));
    }
    Stemmer st = new Stemmer(new EnglishStemmer());
    Map<String,List<String>> stems = st.wordsToStems(dfs.getSet());

    dfs.discardInfrequent(2);
    NGramTfDf ngtd = NGramTfDf.analyseFiles(clusterFiles);
    ngtd.calculateNGrams();
View Full Code Here

TOP

Related Classes of org.tartarus.snowball.ext.EnglishStemmer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.