Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.CoreLabel.word()


        CoreLabel testToken = testTokens.get(i);
        //System.err.println("POS: " + testToken.get(CoreAnnotations.PartOfSpeechAnnotation.class));
        String goldNer = goldToken.get(CoreAnnotations.AnswerAnnotation.class);
        String testNer = testToken.get(CoreAnnotations.NamedEntityTagAnnotation.class);
        //System.err.println("Ner tag for token " + i + " doc " + k +", GOLD: " + goldNer + ", TEST:" + testNer);
        assertEquals("Ner tag for token " + i + " (\"" + testToken.word() + "\") doc " + k, goldNer, testNer);
      }
      k++;
    }

  }
View Full Code Here


    List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    StringBuilder sb = new StringBuilder();
    for (int i = extentTokenSpan.start(); i < extentTokenSpan.end(); i ++){
      CoreLabel token = tokens.get(i);
      if(i > extentTokenSpan.start()) sb.append(" ");
      sb.append(token.word());
    }
    return sb.toString();
  }
 
  public String getType() { return type; }
View Full Code Here

      List<CoreLabel> words = new ArrayList<CoreLabel>();
      StringBuilder textContent = new StringBuilder();
      for(int i = 0; i < tokens.size(); i ++){
        CoreLabel l = new CoreLabel();
        l.setWord(tokens.get(i).getLiteral());
        l.set(CoreAnnotations.ValueAnnotation.class, l.word());
        l.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, tokens.get(i).getByteStart());
        l.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, tokens.get(i).getByteEnd());
        words.add(l);
        if(i > 0) textContent.append(" ");
        textContent.append(tokens.get(i).getLiteral());
View Full Code Here

      List<CoreLabel> words = tokenizerFactory.getTokenizer(new StringReader(sentenceString)).tokenize();

      // FIXING TOKENIZATION PROBLEMS
      for (int i = 0; i < words.size(); i++) {
        CoreLabel w = words.get(i);
        if (i > 0 && w.word().equals("$")) {
          if(!words.get(i-1).word().endsWith("PRP") && !words.get(i-1).word().endsWith("WP"))
            continue;
          words.get(i-1).set(CoreAnnotations.TextAnnotation.class, words.get(i-1).word()+"$");
          words.remove(i);
          i--;
View Full Code Here

          if(!words.get(i-1).word().endsWith("PRP") && !words.get(i-1).word().endsWith("WP"))
            continue;
          words.get(i-1).set(CoreAnnotations.TextAnnotation.class, words.get(i-1).word()+"$");
          words.remove(i);
          i--;
        } else if (w.word().equals("\\/")) {
          if(words.get(i-1).word().equals("</COREF>"))
            continue;
          w.set(CoreAnnotations.TextAnnotation.class, words.get(i-1).word()+"\\/"+words.get(i+1).word());
          words.remove(i+1);
          words.remove(i-1);
View Full Code Here

    extentTokens.add(initCoreLabel("was"));
    final int ADDED_WORDS = 2;
    for (int i = ent.getExtentTokenStart(); i < ent.getExtentTokenEnd(); i++) {
      // Add everything except separated dashes! The separated dashes mess with the parser too badly.
      CoreLabel label = tokens.get(i);
      if ( ! "-".equals(label.word())) {
        extentTokens.add(tokens.get(i));
      } else {
        approximateness++;
      }
    }
View Full Code Here

    CoreLabel c = cInfo.get(loc);

    // "Wrapper" feature: identity of first and last two chars of the current word.
    // This helps detect ma+_+sh in dialect, as well as avoiding segmenting possessive
    // pronouns if the word starts with al-.
    if (c.word().length() > 3) {
      String start = c.word().substring(0, 2);
      String end = c.word().substring(c.word().length() - 2);
      if (c.index() == 2) {
        features.add(start + "_" + end + "-begin-wrap");
      }
View Full Code Here

    // "Wrapper" feature: identity of first and last two chars of the current word.
    // This helps detect ma+_+sh in dialect, as well as avoiding segmenting possessive
    // pronouns if the word starts with al-.
    if (c.word().length() > 3) {
      String start = c.word().substring(0, 2);
      String end = c.word().substring(c.word().length() - 2);
      if (c.index() == 2) {
        features.add(start + "_" + end + "-begin-wrap");
      }
      if (c.index() == c.word().length() - 1) {
View Full Code Here

    // "Wrapper" feature: identity of first and last two chars of the current word.
    // This helps detect ma+_+sh in dialect, as well as avoiding segmenting possessive
    // pronouns if the word starts with al-.
    if (c.word().length() > 3) {
      String start = c.word().substring(0, 2);
      String end = c.word().substring(c.word().length() - 2);
      if (c.index() == 2) {
        features.add(start + "_" + end + "-begin-wrap");
      }
      if (c.index() == c.word().length() - 1) {
        features.add(start + "_" + end + "-end-wrap");
View Full Code Here

    // "Wrapper" feature: identity of first and last two chars of the current word.
    // This helps detect ma+_+sh in dialect, as well as avoiding segmenting possessive
    // pronouns if the word starts with al-.
    if (c.word().length() > 3) {
      String start = c.word().substring(0, 2);
      String end = c.word().substring(c.word().length() - 2);
      if (c.index() == 2) {
        features.add(start + "_" + end + "-begin-wrap");
      }
      if (c.index() == c.word().length() - 1) {
        features.add(start + "_" + end + "-end-wrap");
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.