Package opennlp.tools.util

Examples of opennlp.tools.util.Span


            }
            else {
              break;
            }
          }
          Parse npPos = new Parse(parse.getText(),new Span(start,end),"NP",1,tags[ti+1]);
          parse.insert(npPos);
        }
      }
    }
  }
View Full Code Here


          start = ci;
        }
      }
      else {
        if (charType != state || charType == CharacterEnum.OTHER && c != pc) {
          tokens.add(new Span(start, ci));
          start = ci;
        }
      }
      state = charType;
      pc = c;
    }
    if (charType != CharacterEnum.WHITESPACE) {
      tokens.add(new Span(start, sl));
    }
    return tokens.toArray(new Span[tokens.size()]);
  }
View Full Code Here

        String type = getType(rest);
        if (type == null) {
          System.err.println("null type for: " + rest);
        }
        String token = getToken(rest);
        stack.push(new Constituent(type, new Span(offset,offset)));
        if (token != null) {
          if (type.equals("-NONE-") && gl != null) {
            //System.err.println("stack.size="+stack.size());
            gl.labelGaps(stack);
          }
          else {
            cons.add(new Constituent(AbstractBottomUpParser.TOK_NODE,
                new Span(offset, offset + token.length())));
            text.append(token).append(" ");
            offset += token.length() + 1;
          }
        }
      }
      else if (c == ')') {
        Constituent con = stack.pop();
        int start = con.getSpan().getStart();
        if (start < offset) {
          cons.add(new Constituent(con.getLabel(), new Span(start, offset-1)));
        }
      }
    }
    String txt = text.toString();
    int tokenIndex = -1;
    Parse p = new Parse(txt, new Span(0, txt.length()), AbstractBottomUpParser.TOP_NODE, 1,0);
    for (int ci=0;ci < cons.size();ci++) {
      Constituent con = cons.get(ci);
      String type = con.getLabel();
      if (!type.equals(AbstractBottomUpParser.TOP_NODE)) {
        if (type == AbstractBottomUpParser.TOK_NODE) {
View Full Code Here

   * @param names
   * @param tokens
   */
  public static void addNames(String tag, Span[] names, Parse[] tokens) {
    for (int ni=0,nn=names.length;ni<nn;ni++) {
      Span nameTokenSpan = names[ni];
      Parse startToken = tokens[nameTokenSpan.getStart()];
      Parse endToken = tokens[nameTokenSpan.getEnd()];
      Parse commonParent = startToken.getCommonParent(endToken);
      //System.err.println("addNames: "+startToken+" .. "+endToken+" commonParent = "+commonParent);
      if (commonParent != null) {
        Span nameSpan = new Span(startToken.getSpan().getStart(),endToken.getSpan().getEnd());
        if (nameSpan.equals(commonParent.getSpan())) {
          commonParent.insert(new Parse(commonParent.getText(),nameSpan,tag,1.0,endToken.getHeadIndex()));
        }
        else {
          Parse[] kids = commonParent.getChildren();
          boolean crossingKids = false;
          for (int ki=0,kn=kids.length;ki<kn;ki++) {
            if (nameSpan.crosses(kids[ki].getSpan())){
              crossingKids = true;
            }
          }
          if (!crossingKids) {
            commonParent.insert(new Parse(commonParent.getText(),nameSpan,tag,1.0,endToken.getHeadIndex()));
          }
          else {
            if (commonParent.getType().equals("NP")) {
              Parse[] grandKids = kids[0].getChildren();
              if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length-1].getSpan())) {
                commonParent.insert(new Parse(commonParent.getText(),commonParent.getSpan(),tag,1.0,commonParent.getHeadIndex()));
              }
            }
          }
        }
View Full Code Here

      }
      if (sb.length() < length) {
        sb.append(" ");
      }
      sb.append(token);
      spans.add(new Span(length,length+token.length()));
      length+=token.length();
    }
    //System.out.println();
    try {
      line = in.readLine();
View Full Code Here

          Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine));
        }
       
        // Simple way to drop intersecting spans, otherwise the
        // NameSample is invalid
        Span reducedNames[] = NameFinderME.dropOverlappingSpans(
            names.toArray(new Span[names.size()]));
       
        NameSample nameSample = new NameSample(whitespaceTokenizerLine,
            reducedNames, false);
       
View Full Code Here

        while (end > 0 && StringUtil.isWhitespace(s.charAt(end - 1)))
          end--;
       
        if ((end - start) > 0) {
          sentProbs.add(1d);
          return new Span[] {new Span(start, end)};
        }
        else
          return new Span[0];
    }
   
    // Now convert the sent indexes to spans
    boolean leftover = starts[starts.length - 1] != s.length();
    Span[] spans = new Span[leftover? starts.length + 1 : starts.length];
    for (int si=0;si<starts.length;si++) {
      int start,end;
      if (si==0) {
        start = 0;
       
        while (si < starts.length && StringUtil.isWhitespace(s.charAt(start)))
          start++;
      }
      else {
        start = starts[si-1];
      }
      end = starts[si];
      while (end > 0 && StringUtil.isWhitespace(s.charAt(end-1))) {
        end--;
      }
      spans[si]=new Span(start,end);
    }
   
    if (leftover) {
      spans[spans.length-1] = new Span(starts[starts.length-1],s.length());
      sentProbs.add(ONE);
    }
   
    return spans;
  }
View Full Code Here

          if (lastStartIndex == 0 && advanceNodeIndex == numNodes-1) { //check for top node to include end and begining punctuation
            //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children));
            newParse2.insert(new Parse(p.getText(), p.getSpan(), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType)));
          }
          else {
            newParse2.insert(new Parse(p.getText(), new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd()), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType)));
          }
          newParsesList.add(newParse2);
        }
      }
      if (cprobs[incompleteIndex] > q) { //make sure a shift is likely
View Full Code Here

      String sampleSentence = DictionaryDetokenizerTool.detokenize(sentenceTokens, operations);
     
      int beginIndex = documentBuilder.length();
      documentBuilder.append(sampleSentence);
     
      spans.add(new Span(beginIndex, documentBuilder.length()));
    }
   
    document = documentBuilder.toString();
    this.sentences = Collections.unmodifiableList(spans);
  }
View Full Code Here

    while ((sentence = samples.read()) != null && !sentence.equals("")) {

      int begin = sentencesString.length();
      sentencesString.append(sentence.trim());
      int end = sentencesString.length();
      sentenceSpans.add(new Span(begin, end));
      sentencesString.append(' ');
    }
   
    if (sentenceSpans.size() > 0) {
      return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()]));
View Full Code Here

TOP

Related Classes of opennlp.tools.util.Span

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.