Package edu.stanford.nlp.ling

Examples of edu.stanford.nlp.ling.CoreLabel.word()


      String start = c.word().substring(0, 2);
      String end = c.word().substring(c.word().length() - 2);
      if (c.index() == 2) {
        features.add(start + "_" + end + "-begin-wrap");
      }
      if (c.index() == c.word().length() - 1) {
        features.add(start + "_" + end + "-end-wrap");
      }
    }
   
    return features;
View Full Code Here


        lastEnd = 0;
      }
       */
      if(i > headTokenSpan.start()) sb.append(" ");

      sb.append(token.word());

    }

    return sb.toString();
  }
View Full Code Here

    }

    List<String> annotations = new ArrayList<String>();

    CoreLabel lab = (CoreLabel) t.label();
    String word = lab.word();
    String tag = lab.tag();
    String cat = lab.value();
    String baseCat = treebankLanguagePack().basicCategory(cat);

     //Tree parent = t.parent(root);
View Full Code Here

    PTBTokenizer<CoreLabel> tokenizer =
      new PTBTokenizer<CoreLabel>(testReader, new CoreLabelTokenFactory(), "");
    List<String> testResults = new ArrayList<String>();
    while (tokenizer.hasNext()) {
      CoreLabel w = tokenizer.next();
      testResults.add(w.word());
    }

    assertEquals(goldResults.size(), testResults.size());
    for (int i = 0; i < testResults.size(); ++i) {
      assertEquals(goldResults.get(i), testResults.get(i));
View Full Code Here

              (sent % 2 == 0) ? "strictTreebank3": "");
      int i = 0;
      while (ptbTokenizer.hasNext()) {
        CoreLabel w = ptbTokenizer.next();
        try {
          assertEquals("PTBTokenizer problem", corpGold[sent % 2][i], w.word());
        } catch (ArrayIndexOutOfBoundsException aioobe) {
          // the assertion below outside the loop will fail
        }
        i++;
      }
View Full Code Here

    StringBuilder prepend = new StringBuilder();

    /* This changes SGML tags into whitespace -- it should maybe be moved elsewhere */
    while (ptb.hasNext()) {
      CoreLabel w = ptb.next();
      Matcher m = sgml.matcher(w.word());
      if (m.matches()) {
        prepend.append(w.before()).append(w.word());
        previous.appendAfter(w.word() + w.after());
      } else {
        if (prepend.length() > 0) {
View Full Code Here

    /* This changes SGML tags into whitespace -- it should maybe be moved elsewhere */
    while (ptb.hasNext()) {
      CoreLabel w = ptb.next();
      Matcher m = sgml.matcher(w.word());
      if (m.matches()) {
        prepend.append(w.before()).append(w.word());
        previous.appendAfter(w.word() + w.after());
      } else {
        if (prepend.length() > 0) {
          w.prependBefore(prepend.toString());
          prepend = new StringBuilder();
View Full Code Here

    while (ptb.hasNext()) {
      CoreLabel w = ptb.next();
      Matcher m = sgml.matcher(w.word());
      if (m.matches()) {
        prepend.append(w.before()).append(w.word());
        previous.appendAfter(w.word() + w.after());
      } else {
        if (prepend.length() > 0) {
          w.prependBefore(prepend.toString());
          prepend = new StringBuilder();
        }
View Full Code Here

    PTBTokenizer<CoreLabel> tokenizer = new PTBTokenizer<CoreLabel>(r, new CoreLabelTokenFactory(), options);
    boolean printing = parseInsideBegin == null; // start off printing, unless you're looking for a start entity
    boolean beginLine = true;
    while (tokenizer.hasNext()) {
      CoreLabel obj = tokenizer.next();
      String str = obj.word();

      if (parseInsideBegin != null && parseInsideBegin.matcher(str).matches()) {
        printing = true;
      } else if (parseInsideEnd != null && parseInsideEnd.matcher(str).matches()) {
        printing = false;
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.