Package com.tamingtext.util

Source Code of com.tamingtext.util.StringUtilTest

/*
* Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
*
*    Licensed under the Apache License, Version 2.0 (the "License");
*    you may not use this file except in compliance with the License.
*    You may obtain a copy of the License at
*
*        http://www.apache.org/licenses/LICENSE-2.0
*
*    Unless required by applicable law or agreed to in writing, software
*    distributed under the License is distributed on an "AS IS" BASIS,
*    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*    See the License for the specific language governing permissions and
*    limitations under the License.
* -------------------
* To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
* http://www.manning.com/ingersoll
*/

package com.tamingtext.util;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import com.tamingtext.TamingTextTestJ4;
import junit.framework.TestCase;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import org.junit.*;

public class StringUtilTest extends TamingTextTestJ4 {



  @Test
  public void testWhitespace() throws Exception {
    String[] gold = {"The", "Carolina", "Hurricanes", "won", "the", "2006", "Stanley", "Cup."};
    String[] result = StringUtil.tokenizeWhitespace("The Carolina Hurricanes won the 2006 Stanley Cup.");
    assertTrue("result Size: " + result.length + " is not: " + gold.length, result.length == gold.length);
    for (int i = 0; i < result.length; i++) {
      assertTrue(result[i] + " is not equal to " + gold[i], result[i].equals(gold[i]) == true);

    }

  }
  @Test
  public void testLuceneStandardTokenizer() throws Exception {
    String[] gold = {"I", "can't", "beleive", "that", "the", "Carolina", "Hurricanes", "won", "the", "2005", "2006", "Stanley", "Cup",};
    StandardTokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, new StringReader("I can't beleive that the Carolina Hurricanes won the 2005-2006 Stanley Cup."));
    List<String> result = new ArrayList<String>();
    while (tokenizer.incrementToken()) {
      result.add(((CharTermAttribute) tokenizer.getAttribute(CharTermAttribute.class)).toString());
    }
    assertTrue("result Size: " + result.size() + " is not: " + gold.length, result.size() == gold.length);
    int i = 0;
    for (String chunk : result) {
      assertTrue(chunk + " is not equal to " + gold[i], chunk.equals(gold[i]) == true);
      i++;
    }
  }
  @Test
  public void testVikings() throws Exception {
    String[] gold = {"Last", "week", "the", "National", "Football", "League", "crowned", "a", "new", "Super", "Bowl", "Champion",
            "Minnesota", "Vikings", "fans", "will", "take", "little", "solace", "in", "the", "fact", "that", "they",
            "lost", "to", "the", "eventual", "champion", "in", "the", "playoffs"};
    StandardTokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, new StringReader("Last week the National Football League crowned a new Super Bowl Champion." +
            "  Minnesota Vikings fans will take little solace in the fact that they" +
            " lost to the eventual champion in the playoffs."));
    List<String> result = new ArrayList<String>();
    while (tokenizer.incrementToken()) {
      result.add(((CharTermAttribute) tokenizer.getAttribute(CharTermAttribute.class)).toString());
    }
    assertTrue("result Size: " + result.size() + " is not: " + gold.length, result.size() == gold.length);
    int i = 0;
    for (String chunk : result) {
      System.out.println(chunk);
      assertTrue(chunk + " is not equal to " + gold[i], chunk.equals(gold[i]) == true);
      i++;
    }
  }


}
TOP

Related Classes of com.tamingtext.util.StringUtilTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.