Examples of edu.stanford.nlp.ling.Word

edu.stanford.nlp.ling.Word
A Word object acts as a Label by containing a String. This class is in essence identical to a StringLabel, but it also uses the value to implement the HasWord interface. @author Christopher Manning @version 2000/12/20

  static boolean isProper(String posTag) {
    return posTag.equals("NNP") || posTag.equals("NNPS") || posTag.equals("NP");
  }


  public Word stem(Word w) {
    return new Word(stem(w.value()));
  }

View Full Code Here

    String postSentString = ChineseStringUtils.postProcessingAnswerCTB(postProcessedSent.toString(),false,false);
    printlnErr("Sighan2005 output: "+postSentString);
    String[] postSentArray = postSentString.split("\\s+");
    ArrayList<Word> postSent = new ArrayList<Word>();
    for(String w : postSentArray) {
      postSent.add(new Word(w));
    }
    return new ArrayList<HasWord>(postSent);
  }

View Full Code Here

        boolean isOneChar = (start+1 == end);
        boolean isInDict = words.contains(str);
        if (isInDict || isOneChar) {
          double cost = isInDict ? 1 : 100;
          DFSATransition<Word, Integer> trans =
            new DFSATransition<Word, Integer>(null, states.get(start), states.get(end), new Word(str), null, cost);
          //System.err.println("start="+start+" end="+end+" word="+str);
          states.get(start).addTransition(trans);
          ++edgesNb;
        }
      }

View Full Code Here

    // Extract min-cost path:
    int i=len;
    while (i>0) {
      DFSATransition<Word, Integer> tr = bptrs.get(i);
      DFSAState<Word, Integer> fromState = tr.getSource();
      Word word = tr.getInput();
      if (!word.word().equals(" "))
        segmentedWords.add(0, word);
      i = fromState.stateID();
    }
    if(DEBUG) {
      // Print lattice density ([1,+inf[) : if equal to 1, it means

View Full Code Here

    while (start < length) {
      int end = Math.min(length, start + maxLength);
      while (end > start + 1) {
        String nextWord = s.substring(start, end);
        if (words.contains(nextWord)) {
          segmentedWords.add(new Word(nextWord));
          break;
        }
        end--;
      }
      if (end == start + 1) {
        // character does not start any word in our dictionary
        segmentedWords.add(new Word(new String(new char[] {s.charAt(start)} )));
        start++;
      } else {
        start = end;
      }
    }

View Full Code Here

        String prevWord = newSent.get(newSent.size()-1).toString();
        String curWord = word.toString();
        String prevChar = prevWord.substring(prevWord.length()-1);
        String curChar = curWord.substring(0,1);
        if(!isChinese(prevChar) && !isChinese(curChar)) {
          Word mergedWord = new Word(prevWord+curWord);
          newSent.set(newSent.size()-1, mergedWord);
          //printlnErr("merged: "+mergedWord);
          //printlnErr("merged: "+mergedWord+" from: "+prevWord+" and: "+curWord);
          continue;
        }

View Full Code Here

    for (int start = 0, length = s.length(); start < length; ) {
      int end = Math.min(length, start + maxLength);
      while (end > start + 1) {
        String nextWord = s.substring(start, end);
        if (words.contains(nextWord)) {
          segmentedWords.add(new Word(nextWord));
          break;
        }
        end--;
      }
      if (end == start + 1) {
        // character does not start any word in our dictionary
        // handle non-BMP characters
        if (s.codePointAt(start) >= 0x10000) {
          segmentedWords.add(new Word(new String(s.substring(start, start + 2))));
          start += 2;
        } else {
          segmentedWords.add(new Word(new String(s.substring(start, start + 1))));
          start++;
        }
      } else {
        start = end;
      }

View Full Code Here

    Label headLabel = new CategoryWordTag(headWordStr, headWordStr, headTagStr);
    int numTags = tagIndex.size();


    // deal with span 1
    if (end - start == 1) {
      Tree leaf = tf.newLeaf(new Word(headWordStr));
      return tf.newTreeNode(headLabel, Collections.singletonList(leaf));
    }
    // find backtrace
    List<Tree> children = new ArrayList<Tree>();
    double bestScore = iScore(start, end, hWord, hTag);

View Full Code Here

      System.err.println("Warning: tree is leaf: " + t);
      t = tf.newTreeNode(tlp.startSymbol(), Collections.singletonList(t));
    }
    t.setLabel(new CategoryWordTag(tlp.startSymbol(), Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG));
    List<Tree> preTermChildList = new ArrayList<Tree>();
    Tree boundaryTerm = tf.newLeaf(new Word(Lexicon.BOUNDARY));//CategoryWordTag(Lexicon.BOUNDARY,Lexicon.BOUNDARY,""));
    preTermChildList.add(boundaryTerm);
    Tree boundaryPreTerm = tf.newTreeNode(new CategoryWordTag(Lexicon.BOUNDARY_TAG, Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG), preTermChildList);
    List<Tree> childList = t.getChildrenAsList();
    childList.add(boundaryPreTerm);
    t.setChildren(childList);

View Full Code Here


    private Tree transformTreeHelper(Tree t) {
      if (t != null) {
        String cat = t.label().value();
        if (t.isLeaf()) {
          Label label = new Word(cat); //new CategoryWordTag(cat,cat,"");
          t.setLabel(label);
        } else {
          Tree[] kids = t.children();
          for (Tree child : kids) {
            transformTreeHelper(child); // recursive call

View Full Code Here

0 1 2 3 4 5

TOP

Related Classes of edu.stanford.nlp.ling.Word

edu.stanford.nlp.ie.machinereading.domains.ace.reader.RobustTokenizer

edu.stanford.nlp.ie.machinereading.structure.ExtractionSentence

edu.stanford.nlp.ie.pascal.ISODateInstance

edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexiconTraining

edu.stanford.nlp.parser.lexparser.demo.ParserDemo2

edu.stanford.nlp.parser.lexparser.ExhaustiveDependencyParser

edu.stanford.nlp.parser.lexparser.FactoredParser

edu.stanford.nlp.parser.lexparser.LexicalizedParser

edu.stanford.nlp.parser.lexparser.LexicalizedParserQuery

edu.stanford.nlp.parser.lexparser.MaxMatchSegmenter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.