Package opennlp.tools.parser

Examples of opennlp.tools.parser.Parse


     
      for (int i = 0; i < sentences.size(); i++) {
       
        String sentence[] = sentences.get(i);
       
        Parse incompleteParse = createIncompleteParse(sentence);
        Parse p = parser.parse(incompleteParse);
       
        // What to do when a parse cannot be found ?!
       
        enhancedParses.add(p);
      }
View Full Code Here


        sentenceNumber=0;
        document.clear();
        parses.clear();
      }
      else {
        Parse p = Parse.parseParse(line);
        parses.add(p);
        Mention[] extents = treebankLinker.getMentionFinder().getMentions(new DefaultParse(p,sentenceNumber));
        //construct new parses for mentions which don't have constituents.
        for (int ei=0,en=extents.length;ei<en;ei++) {
          //System.err.println("PennTreebankLiner.main: "+ei+" "+extents[ei]);

          if (extents[ei].getParse() == null) {
            //not sure how to get head index, but its not used at this point.
            Parse snp = new Parse(p.getText(),extents[ei].getSpan(),"NML",1.0,0);
            p.insert(snp);
            extents[ei].setParse(new DefaultParse(snp,sentenceNumber));
          }

        }
View Full Code Here

    parseMap = new HashMap<Parse, Integer>();
    for (int ei=0,en=entities.length;ei<en;ei++) {
      if (entities[ei].getNumMentions() > 1) {
        for (Iterator<MentionContext> mi = entities[ei].getMentions(); mi.hasNext();) {
          MentionContext mc = mi.next();
          Parse mentionParse = ((DefaultParse) mc.getParse()).getParse();
          parseMap.put(mentionParse,ei+1);
          //System.err.println("CorefParse: "+mc.getParse().hashCode()+" -> "+ (ei+1));
        }
      }
    }
View Full Code Here

    }
  }

  public void show() {
    for (int pi=0,pn=parses.size();pi<pn;pi++) {
      Parse p = parses.get(pi);
      show(p);
      System.out.println();
    }
  }
View Full Code Here

      //System.out.print(p.hashCode()+"-"+parseMap.containsKey(p));
      System.out.print(" ");
    }
    Parse[] children = p.getChildren();
    for (int pi=0,pn=children.length;pi<pn;pi++) {
      Parse c = children[pi];
      Span s = c.getSpan();
      if (start < s.getStart()) {
        System.out.print(p.getText().substring(start, s.getStart()));
      }
      show(c);
      start = s.getEnd();
View Full Code Here

   * @param root The root of the parse tree.
   * @return The right frontier of the specified parse tree.
   */
  public static List<Parse> getRightFrontier(Parse root,Set<String> punctSet) {
    List<Parse> rf = new LinkedList<Parse>();
    Parse top;
    if (root.getType() == AbstractBottomUpParser.TOP_NODE ||
        root.getType() == AbstractBottomUpParser.INC_NODE) {
      top = collapsePunctuation(root.getChildren(),punctSet)[0];
    }
    else {
      top = root;
    }
    while(!top.isPosTag()) {
      rf.add(0,top);
      Parse[] kids = top.getChildren();
      top = kids[kids.length-1];
    }
    return new ArrayList<Parse>(rf);
  }
View Full Code Here

  protected Parse[] advanceParses(Parse p, double probMass) {
    double q = 1 - probMass;
    /** The index of the node which will be labeled in this iteration of advancing the parse. */
    int advanceNodeIndex;
    /** The node which will be labeled in this iteration of advancing the parse. */
    Parse advanceNode=null;
    Parse[] originalChildren = p.getChildren();
    Parse[] children = collapsePunctuation(originalChildren,punctSet);
    int numNodes = children.length;
    if (numNodes == 0) {
      return null;
    }
    else if (numNodes == 1) {  //put sentence initial and final punct in top node
      if (children[0].isPosTag()) {
        return null;
      }
      else {
        p.expandTopNode(children[0]);
        return new Parse[] { p };
      }
    }
    //determines which node needs to adanced.
    for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++) {
      advanceNode = children[advanceNodeIndex];
      if (!isBuilt(advanceNode)) {
        break;
      }
    }
    int originalZeroIndex = mapParseIndex(0,children,originalChildren);
    int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren);
    List<Parse> newParsesList = new ArrayList<Parse>();
    //call build model
    buildModel.eval(buildContextGenerator.getContext(children, advanceNodeIndex), bprobs);
    double doneProb = bprobs[doneIndex];
    if (debugOn) System.out.println("adi="+advanceNodeIndex+" "+advanceNode.getType()+"."+advanceNode.getLabel()+" "+advanceNode+" choose build="+(1-doneProb)+" attach="+doneProb);
    if (1-doneProb > q) {
      double bprobSum = 0;
      while (bprobSum < probMass) {
        /** The largest unadvanced labeling. */
        int max = 0;
        for (int pi = 1; pi < bprobs.length; pi++) { //for each build outcome
          if (bprobs[pi] > bprobs[max]) {
            max = pi;
          }
        }
        if (bprobs[max] == 0) {
          break;
        }
        double bprob = bprobs[max];
        bprobs[max] = 0; //zero out so new max can be found
        bprobSum += bprob;
        String tag = buildModel.getOutcome(max);
        if (!tag.equals(DONE)) {
          Parse newParse1 = (Parse) p.clone();
          Parse newNode = new Parse(p.getText(),advanceNode.getSpan(),tag,bprob,advanceNode.getHead());
          newParse1.insert(newNode);
          newParse1.addProb(Math.log(bprob));
          newParsesList.add(newParse1);
          if (checkComplete) {
            cprobs = checkModel.eval(checkContextGenerator.getContext(newNode,children,advanceNodeIndex,false));
            if (debugOn) System.out.println("building "+tag+" "+bprob+" c="+cprobs[completeIndex]);
            if (cprobs[completeIndex] > probMass) { //just complete advances
              setComplete(newNode);
              newParse1.addProb(Math.log(cprobs[completeIndex]));
              if (debugOn) System.out.println("Only advancing complete node");
            }
            else if (1-cprobs[completeIndex] > probMass) { //just incomplete advances
              setIncomplete(newNode);
              newParse1.addProb(Math.log(1-cprobs[completeIndex]));
              if (debugOn) System.out.println("Only advancing incomplete node");
            }
            else { //both complete and incomplete advance
              if (debugOn) System.out.println("Advancing both complete and incomplete nodes");
              setComplete(newNode);
              newParse1.addProb(Math.log(cprobs[completeIndex]));

              Parse newParse2 = (Parse) p.clone();
              Parse newNode2 = new Parse(p.getText(),advanceNode.getSpan(),tag,bprob,advanceNode.getHead());
              newParse2.insert(newNode2);
              newParse2.addProb(Math.log(bprob));
              newParsesList.add(newParse2);
              newParse2.addProb(Math.log(1-cprobs[completeIndex]));
              setIncomplete(newNode2); //set incomplete for non-clone
            }
          }
          else {
            if (debugOn) System.out.println("building "+tag+" "+bprob);
          }
        }
      }
    }
    //advance attaches
    if (doneProb > q) {
      Parse newParse1 = (Parse) p.clone(); //clone parse
      //mark nodes as built
      if (checkComplete) {
        if (isComplete(advanceNode)) {
          newParse1.setChild(originalAdvanceIndex,Parser.BUILT+"."+Parser.COMPLETE); //replace constituent being labeled to create new derivation
        }
        else {
          newParse1.setChild(originalAdvanceIndex,Parser.BUILT+"."+Parser.INCOMPLETE); //replace constituent being labeled to create new derivation
        }
      }
      else {
        newParse1.setChild(originalAdvanceIndex,Parser.BUILT); //replace constituent being labeled to create new derivation
      }
      newParse1.addProb(Math.log(doneProb));
      if (advanceNodeIndex == 0) { //no attach if first node.
        newParsesList.add(newParse1);
      }
      else {
        List<Parse> rf = getRightFrontier(p,punctSet);
        for (int fi=0,fs=rf.size();fi<fs;fi++) {
          Parse fn = rf.get(fi);
          attachModel.eval(attachContextGenerator.getContext(children, advanceNodeIndex,rf,fi), aprobs);
          if (debugOn) {
            //List cs = java.util.Arrays.asList(attachContextGenerator.getContext(children, advanceNodeIndex,rf,fi,punctSet));
            System.out.println("Frontier node("+fi+"): "+fn.getType()+"."+fn.getLabel()+" "+fn+" <- "+advanceNode.getType()+" "+advanceNode+" d="+aprobs[daughterAttachIndex]+" s="+aprobs[sisterAttachIndex]+" ");
          }
          for (int ai=0;ai<attachments.length;ai++) {
            double prob = aprobs[attachments[ai]];
            //should we try an attach if p > threshold and
            // if !checkComplete then prevent daughter attaching to chunk
            // if checkComplete then prevent daughter attacing to complete node or
            //    sister attaching to an incomplete node
            if (prob > q && (
                (!checkComplete && (attachments[ai]!= daughterAttachIndex || !isComplete(fn)))
                ||
                (checkComplete && ((attachments[ai]== daughterAttachIndex && !isComplete(fn)) || (attachments[ai] == sisterAttachIndex && isComplete(fn)))))) {
              Parse newParse2 = newParse1.cloneRoot(fn,originalZeroIndex);
              Parse[] newKids = Parser.collapsePunctuation(newParse2.getChildren(),punctSet);
              //remove node from top level since were going to attach it (including punct)
              for (int ri=originalZeroIndex+1;ri<=originalAdvanceIndex;ri++) {
                //System.out.println(at"-removing "+(originalZeroIndex+1)+" "+newParse2.getChildren()[originalZeroIndex+1]);
                newParse2.remove(originalZeroIndex+1);
              }
              List<Parse> crf = getRightFrontier(newParse2,punctSet);
              Parse updatedNode;
              if (attachments[ai] == daughterAttachIndex) {//attach daughter
                updatedNode = crf.get(fi);
                updatedNode.add(advanceNode,headRules);
              }
              else { //attach sister
                Parse psite;
                if (fi+1 < crf.size()) {
                  psite = crf.get(fi+1);
                  updatedNode = psite.adjoin(advanceNode,headRules);
                }
                else {
                  psite = newParse2;
                  updatedNode = psite.adjoinRoot(advanceNode,headRules,originalZeroIndex);
                  newKids[0] = updatedNode;
                }
              }
              //update spans affected by attachment
              for (int ni=fi+1;ni<crf.size();ni++) {
                Parse node = crf.get(ni);
                node.updateSpan();
              }
              //if (debugOn) {System.out.print(ai+"-result: ");newParse2.show();System.out.println();}
              newParse2.addProb(Math.log(prob));
              newParsesList.add(newParse2);
              if (checkComplete) {
                cprobs = checkModel.eval(checkContextGenerator.getContext(updatedNode,newKids,advanceNodeIndex,true));
                if (cprobs[completeIndex] > probMass) {
                  setComplete(updatedNode);
                  newParse2.addProb(Math.log(cprobs[completeIndex]));
                  if (debugOn) System.out.println("Only advancing complete node");
                }
                else if (1-cprobs[completeIndex] > probMass) {
                  setIncomplete(updatedNode);
                  newParse2.addProb(Math.log(1-cprobs[completeIndex]));
                  if (debugOn) System.out.println("Only advancing incomplete node");
                }
                else {
                  setComplete(updatedNode);
                  Parse newParse3 = newParse2.cloneRoot(updatedNode,originalZeroIndex);
                  newParse3.addProb(Math.log(cprobs[completeIndex]));
                  newParsesList.add(newParse3);
                  setIncomplete(updatedNode);
                  newParse2.addProb(Math.log(1-cprobs[completeIndex]));
                  if (debugOn) System.out.println("Advancing both complete and incomplete nodes; c="+cprobs[completeIndex]);
                }
View Full Code Here

  @Override
  protected Parse[] advanceParses(final Parse p, double probMass) {
    double q = 1 - probMass;
    /** The closest previous node which has been labeled as a start node. */
    Parse lastStartNode = null;
    /** The index of the closest previous node which has been labeled as a start node. */
    int lastStartIndex = -1;
    /** The type of the closest previous node which has been labeled as a start node. */
    String lastStartType = null;
    /** The index of the node which will be labeled in this iteration of advancing the parse. */
    int advanceNodeIndex;
    /** The node which will be labeled in this iteration of advancing the parse. */
    Parse advanceNode=null;
    Parse[] originalChildren = p.getChildren();
    Parse[] children = collapsePunctuation(originalChildren,punctSet);
    int numNodes = children.length;
    if (numNodes == 0) {
      return null;
    }
    //determines which node needs to be labeled and prior labels.
    for (advanceNodeIndex = 0; advanceNodeIndex < numNodes; advanceNodeIndex++) {
      advanceNode = children[advanceNodeIndex];
      if (advanceNode.getLabel() == null) {
        break;
      }
      else if (startTypeMap.containsKey(advanceNode.getLabel())) {
        lastStartType = startTypeMap.get(advanceNode.getLabel());
        lastStartNode = advanceNode;
        lastStartIndex = advanceNodeIndex;
        //System.err.println("lastStart "+i+" "+lastStart.label+" "+lastStart.prob);
      }
    }
    int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren);
    List<Parse> newParsesList = new ArrayList<Parse>(buildModel.getNumOutcomes());
    //call build
    buildModel.eval(buildContextGenerator.getContext(children, advanceNodeIndex), bprobs);
    double bprobSum = 0;
    while (bprobSum < probMass) {
      // The largest unadvanced labeling.
      int max = 0;
      for (int pi = 1; pi < bprobs.length; pi++) { //for each build outcome
        if (bprobs[pi] > bprobs[max]) {
          max = pi;
        }
      }
      if (bprobs[max] == 0) {
        break;
      }
      double bprob = bprobs[max];
      bprobs[max] = 0; //zero out so new max can be found
      bprobSum += bprob;
      String tag = buildModel.getOutcome(max);
      //System.out.println("trying "+tag+" "+bprobSum+" lst="+lst);
      if (max == topStartIndex) { // can't have top until complete
        continue;
      }
      //System.err.println(i+" "+tag+" "+bprob);
      if (startTypeMap.containsKey(tag)) { //update last start
        lastStartIndex = advanceNodeIndex;
        lastStartNode = advanceNode;
        lastStartType = startTypeMap.get(tag);
      }
      else if (contTypeMap.containsKey(tag)) {
        if (lastStartNode == null || !lastStartType.equals(contTypeMap.get(tag))) {
          continue; //Cont must match previous start or continue
        }
      }
      Parse newParse1 = (Parse) p.clone(); //clone parse
      if (createDerivationString) newParse1.getDerivation().append(max).append("-");
      newParse1.setChild(originalAdvanceIndex,tag); //replace constituent being labeled to create new derivation
      newParse1.addProb(Math.log(bprob));
      //check
      //String[] context = checkContextGenerator.getContext(newParse1.getChildren(), lastStartType, lastStartIndex, advanceNodeIndex);
      checkModel.eval(checkContextGenerator.getContext(collapsePunctuation(newParse1.getChildren(),punctSet), lastStartType, lastStartIndex, advanceNodeIndex), cprobs);
      //System.out.println("check "+lastStartType+" "+cprobs[completeIndex]+" "+cprobs[incompleteIndex]+" "+tag+" "+java.util.Arrays.asList(context));
      Parse newParse2 = newParse1;
      if (cprobs[completeIndex] > q) { //make sure a reduce is likely
        newParse2 = (Parse) newParse1.clone();
        if (createDerivationString) newParse2.getDerivation().append(1).append(".");
        newParse2.addProb(Math.log(cprobs[completeIndex]));
        Parse[] cons = new Parse[advanceNodeIndex - lastStartIndex + 1];
        boolean flat = true;
        //first
        cons[0] = lastStartNode;
        flat &= cons[0].isPosTag();
        //last
        cons[advanceNodeIndex - lastStartIndex] = advanceNode;
        flat &= cons[advanceNodeIndex - lastStartIndex].isPosTag();
        //middle
        for (int ci = 1; ci < advanceNodeIndex - lastStartIndex; ci++) {
          cons[ci] = children[ci + lastStartIndex];
          flat &= cons[ci].isPosTag();
        }
        if (!flat) { //flat chunks are done by chunker
          if (lastStartIndex == 0 && advanceNodeIndex == numNodes-1) { //check for top node to include end and begining punctuation
            //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children));
            newParse2.insert(new Parse(p.getText(), p.getSpan(), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType)));
          }
          else {
            newParse2.insert(new Parse(p.getText(), new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd()), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType)));
          }
          newParsesList.add(newParse2);
        }
      }
      if (cprobs[incompleteIndex] > q) { //make sure a shift is likely
View Full Code Here

    //default
    features.add("default");
    //first constituent label
    features.add("fl="+constituents[0].getLabel());
    Parse pstart = constituents[start];
    Parse pend = constituents[end];
    checkcons(pstart, "begin", type, features);
    checkcons(pend, "last", type, features);
    StringBuffer production = new StringBuffer(20);
    StringBuffer punctProduction = new StringBuffer(20);
    production.append("p=").append(type).append("->");
    punctProduction.append("pp=").append(type).append("->");
    for (int pi = start; pi < end; pi++) {
      Parse p = constituents[pi];
      checkcons(p, pend, type, features);
      production.append(p.getType()).append(",");
      punctProduction.append(p.getType()).append(",");
      Collection<Parse> nextPunct = p.getNextPunctuationSet();
      if (nextPunct != null) {
        for (Iterator<Parse> pit=nextPunct.iterator();pit.hasNext();) {
          Parse punct = pit.next();
          punctProduction.append(punct.getType()).append(",");
        }
      }
    }
    production.append(pend.getType());
    punctProduction.append(pend.getType());
    features.add(production.toString());
    features.add(punctProduction.toString());
    Parse p_2 = null;
    Parse p_1 = null;
    Parse p1 = null;
    Parse p2 = null;
    Collection<Parse> p1s = constituents[end].getNextPunctuationSet();
    Collection<Parse> p2s = null;
    Collection<Parse> p_1s = constituents[start].getPreviousPunctuationSet();
    Collection<Parse> p_2s = null;
    if (start - 2 >= 0) {
View Full Code Here

  }

  private boolean containsPunct(Collection<Parse> puncts, String punct){
    if (puncts != null){
      for (Iterator<Parse> pi=puncts.iterator();pi.hasNext();) {
        Parse p = pi.next();
        if (p.getType().equals(punct)) {
          return true;
        }
      }
    }
    return false;
View Full Code Here

TOP

Related Classes of opennlp.tools.parser.Parse

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.