Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.Word


    return yieldWords(new ArrayList<Word>());
  }

  public ArrayList<Word> yieldWords(ArrayList<Word> y) {
    if (isLeaf()) {
      y.add(new Word(label()));
    } else {
      for (Tree kid : children()) {
        kid.yieldWords(y);
      }
    }
View Full Code Here


        } else {
          y.add((X) lab);
        }

      } else {
        y.add((X) new Word(lab));
      }

    } else {
      Tree[] kids = children();
      for (Tree kid : kids) {
View Full Code Here

  public static void main(String[] args) throws IOException {
    Stemmer s = new Stemmer();
    if (args[0].equals("-file")) {
      Iterator<Word> it = PTBTokenizer.newPTBTokenizer(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
      while (it.hasNext()) {
        Word token = it.next();
        System.out.print(s.stem(token.word()));
        System.out.print(' ');
      }
    } else {
      for (String arg : args) {
        System.out.print(s.stem(arg));
View Full Code Here

  /**
   * Stems <code>w</code> and returns stemmed <code>Word</code>.
   */

  public Word stem(Word w) {
    return (new Word(stem(w.word())));
  }
View Full Code Here

      new WhitespaceTokenizer<Word>(new WordTokenFactory(), reader,
                                    eolIsSignificant);
    PrintWriter pw =
      new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"), true);
    while (tokenizer.hasNext()) {
      Word w = tokenizer.next();
      if (w.value().equals(WhitespaceLexer.NEWLINE)) {
        pw.println("***CR***");
      } else {
        pw.println(w);
      }
    }
View Full Code Here

    try {
      BufferedReader reader = new BufferedReader(new FileReader(list));

      while (reader.ready()) {
        wordSet.add(new Word(reader.readLine()));
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
      //e.printStackTrace(System.err);
      //addGenericWords();
View Full Code Here

   * Adds some extremely common words to the stoplist.
   */
  private void addGenericWords() {
    String[] genericWords = {"a", "an", "the", "and", "or", "but", "nor"};
    for (int i = 1; i < 7; i++) {
      wordSet.add(new Word(genericWords[i]));
    }
  }
View Full Code Here

  /**
   * Returns true if the word is in the stoplist.
   */
  public boolean contains(String word) {
    return wordSet.contains(new Word(word));
  }
View Full Code Here

*/
public class WordTokenFactory implements LexedTokenFactory<Word> {

  @Override
  public Word makeToken(String str, int begin, int length) {
    return new Word(str, begin, begin+length);
  }
View Full Code Here

  public Word next() throws IOException {
    String nx = lexer.next();
    if (nx == null) {
      return null;
    } else {
      return new Word(nx);
    }
  }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.ling.Word

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.