Package opennlp.ccg.parse

Examples of opennlp.ccg.parse.ParseException


      String ntId = derivElt.getAttributeValue("nt_id");
      String simpleCat = derivElt.getAttributeValue("stag");
      List childElts = derivElt.getChildren();
      int numChildren = childElts.size();
      if (numChildren == 0)
        throw new ParseException(header
            + ": no child elements for TreeNode for cat: " + cat);
      // if no cat element present, adjust list with an initial dummy node,
      // to avoid code changes in what follows
      Element elt0 = (Element) childElts.get(0);
      String elt0name = elt0.getName();
      if (elt0name.equals("Treenode") || elt0name.equals("Leafnode")) {
        childElts.add(0, new Element("dummy"));
        numChildren++;
      }
      if (numChildren != 2 && numChildren != 3)
        throw new ParseException(header
            + ": wrong number of child elements: " + numChildren
            + " for cat: " + cat);
      Element firstInputElt = (Element) childElts.get(1);
      SignHash firstSigns = followDerivR(firstInputElt);
      SignHash retval = new SignHash();
      // unary case
      if (numChildren == 2) {
        // apply rules
        for (Sign s : firstSigns.asSignSet()) {
          List<Sign> results = rules.applyUnaryRules(s);
          for (Sign rSign : results)
            retval.insert(rSign);
        }
        // caution/warn upon failure
        if (!containsCat(retval, simpleCat)) {
          boolean noResults = retval.isEmpty();
          String inCat = firstInputElt.getAttributeValue("cat");
          String msg = "Unable to derive: " + cat + " from: " + inCat;
          if (!noResults)
            ccgBankTaskTestbed.log("Caution for " + header + ": " + msg);
          if (ccgBankTaskTestbed.isDebugDerivations()) {
            ccgBankTaskTestbed.log(header + ": derivation stymied; inputs: ");
            for (Sign s : firstSigns.asSignSet()) {
              ccgBankTaskTestbed.log(s.toString());
            }
            if (!noResults) {
              ccgBankTaskTestbed.log("Outputs: ");
              for (Sign s : retval.asSignSet())
                ccgBankTaskTestbed.log(s.toString());
            }
          }
          if (noResults)
            throw new ParseException("Derivation blocked: " + msg);
        }
      }
      // binary case
      else if (numChildren == 3) {
        Element secondInputElt = (Element) childElts.get(2);
        SignHash secondSigns = followDerivR(secondInputElt);
        // apply rules
        for (Sign sign1 : firstSigns.asSignSet()) {
          for (Sign sign2 : secondSigns.asSignSet()) {
            List<Sign> results = rules.applyBinaryRules(sign1, sign2);
            for (Sign rSign : results)
              retval.insert(rSign);
          }
        }
        // if no results, propagate one input if the other is
        // internal punct
        if (retval.isEmpty()) {
          if (isPunct(secondInputElt))
            return firstSigns;
          else if (isPunct(firstInputElt))
            return secondSigns;
        }
        // caution/warn upon failure
        if (!containsCat(retval, simpleCat)) {
          boolean noResults = retval.isEmpty();
          String inCat1 = firstInputElt.getAttributeValue("cat");
          String inCat2 = secondInputElt.getAttributeValue("cat");
          String msg = "Unable to derive: " + cat + " from: " + inCat1 + " and: " + inCat2;
          if (!noResults)
            ccgBankTaskTestbed.log("Caution for " + header + ": " + msg);
          if (ccgBankTaskTestbed.isDebugDerivations()) {
            ccgBankTaskTestbed.log(header + ": derivation stymied; first inputs: ");
            for (Sign sign1 : firstSigns.asSignSet()) {
              ccgBankTaskTestbed.log(sign1.toString());
            }
            ccgBankTaskTestbed.log("Second inputs: ");
            for (Sign sign2 : secondSigns.asSignSet()) {
              ccgBankTaskTestbed.log(sign2.toString());
            }
            if (!noResults) {
              ccgBankTaskTestbed.log("Outputs: ");
              for (Sign s : retval.asSignSet())
                ccgBankTaskTestbed.log(s.toString());
            }
          }
          if (noResults)
            throw new ParseException("Derivation blocked: " + msg);
        }
      }

      // Store cat ids of tree nodes for printing to aux files
      if (treeInfoFlag) {

        for (Sign s : retval.asSignSet()) {

          Hashtable<String, String> idConvTally = new Hashtable<String, String>();
          Hashtable<String, Integer> freqTally = new Hashtable<String, Integer>();
          ArrayList<String> fullCat = new ArrayList<String>();
          String catId = "";

          Category treeCat = s.getCategory();
          // System.out.println(header+" "+ntId+" "+treeCat);
          recurseCat(treeCat, fullCat, idConvTally, freqTally);
          /*
           * System.out.println(freqTally);
           * System.out.println(fullCat); System.out.println('\n');
           */

          if (fullCat.size() > 1) {

            for (String x : fullCat) {

              String y[] = x.split("_");
              if (y.length == 1) {
                catId = catId + "," + y[0];
                continue;
              }

              int freq = freqTally.get(y[1]);
              freqTally.put(y[1], freq - 1);

              if (x.endsWith("_M") && freq <= 1)
                x = x.replaceFirst("_M", "");

              catId = catId + "," + x;
            }
            catId = catId.replaceFirst(",", "");
            treeInfo.add(header + " " + ntId + " " + catId);
          }
          /*
           * System.out.println(idConvTally);
           * System.out.println(fullCat); System.out.println('\n');
           */
        }

      }

      // done
      return retval;
    }
    // lex lookup
    // nb: not always insisting on right POS, b/c hashing strategy uses
    // surface words,
    // thus doesn't distinguish lex signs based solely on POS
    // nb: might make sense to warn on lex cats with missing semantics
    else if (eltName.equals("Leafnode")) {
      try {
        String lex = derivElt.getAttributeValue("lexeme");
        Word w = lexicon.tokenizer.parseToken(lex);
        str += w.getForm() + " ";
        String cat = derivElt.getAttributeValue("cat");
        String simpleCat = derivElt.getAttributeValue("stag");
        String rel = derivElt.getAttributeValue("rel");
        String indexRel = derivElt.getAttributeValue("indexRel");
        String semClass = "";
        semClass = derivElt.getAttributeValue("class");

        String roles = derivElt.getAttributeValue("argRoles");
        String pos = derivElt.getAttributeValue("pos");
        // nb: for now, need to ignore rel for non-VB pos
        if (!pos.startsWith("VB"))
          rel = null;
        // lex lookup with required supertag
        // NB: there's no guarantee of getting the right arg roles if the word-cat pair is observed
        lexicon.setSupertagger(supertaggerStandIn);
        supertaggerStandIn.setTag(simpleCat);
        SignHash lexSigns = lexicon.getSignsFromWord(w);

        if (semClass == null || semClass.length() == 0)
          semClass = "NoClass";

        // add lex signs, filtered by rel, reindexed
        // also check number with matching pos, match on no class
        int matchPOS = 0;
        boolean matchNoClass = false;
        for (Iterator<Sign> it = lexSigns.asSignSet().iterator(); it.hasNext();) {
          Sign s = it.next();

          Word wTemp = s.getWords().get(0);
          String morphClass = wTemp.getSemClass();
          if (morphClass == null || morphClass.length() == 0)
            morphClass = "NoClass";

          Category lexcat = s.getCategory();
          LF lexLF = lexcat.getLF();

          // allow any class if no sem class given
          if (!(semClass.equals("NoClass") || semClass.equals(morphClass))
              || !containsPred(lexLF, rel)
              || !containsRoles(lexLF, roles)
              || !containsRel(lexLF, indexRel, s)) {
            it.remove();
          }
          else {
            UnifyControl.reindex(lexcat);
            if (wTemp.getPOS().equals(pos)) {
              matchPOS++;
              if (semClass.equals("NoClass") && morphClass.equals("NoClass"))
                matchNoClass = true;
            }
          }
        }
        // filter by pos unless none match
        if (matchPOS > 0) {
          for (Iterator<Sign> it = lexSigns.asSignSet().iterator(); it.hasNext();) {
            Sign s = it.next();
            Word wTemp = s.getWords().get(0);
            if (!wTemp.getPOS().equals(pos)) {
              it.remove(); continue;
            }
            // filter by mismatched class if apropos
            if (matchNoClass) {
              String morphClass = wTemp.getSemClass();
              if (morphClass != null && morphClass.length() != 0)
                it.remove();
            }
          }
        }
        if (lexSigns.isEmpty())
          throw new LexException("No matching category " + cat + " for: " + w);
        return lexSigns;
      } catch (LexException exc) {
        // try continuing derivations without lex signs for punctuation,
        // otherwise throw parse exception
        if (isPunct(derivElt)) {
          if (ccgBankTaskTestbed.isDebugDerivations()) {
            ccgBankTaskTestbed.log(header + ": " + exc.toString());
          }
          return new SignHash();
        }
        throw new ParseException(exc.toString());
      } catch (RuntimeException exc) {
        // for other exceptions, throw parse exception
        throw new ParseException(exc.toString());
      }
    } else
      throw new RuntimeException(header + ": unrecognized element in derivation: " + eltName);
  }
View Full Code Here

TOP

Related Classes of opennlp.ccg.parse.ParseException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.