Package edu.stanford.nlp.trees

Source Code of edu.stanford.nlp.trees.CoordinationTransformer

package edu.stanford.nlp.trees;


import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

import edu.stanford.nlp.ling.StringLabel;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon;
import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;

/**
* Coordination transformer transforms a PennTreebank tree containing a coordination in a flat structure
* in order to get the dependencies right
*
* @author Marie-Catherine de Marneffe
*/
public class CoordinationTransformer implements TreeTransformer {

  boolean VERBOSE = false;
  boolean notDone = true;
  TreeTransformer tn = new DependencyTreeTransformer(); //to get rid of unwanted nodes and tag
  TreeTransformer qp = new QPTreeTransformer();         //to restructure the QP constituents
  // default constructor
  public CoordinationTransformer() {

  }

  /**
   * Transforms t if it contains a coordination in a flat structure (CCtransform)
   * and transforms UCP (UCPtransform)
   *
   * @param t a tree to be transformed
   * @return t transformed
   */
  public Tree transformTree(Tree t) {
    tn.transformTree(t);
    Tree tt = UCPtransform(t);
    Tree ttt = CCtransform(tt);
    return qp.transformTree(ttt);
  }


  private static final TregexPattern[][] matchPatterns = {
    {
      // UCP (JJ ...) -> ADJP
      TregexPattern.safeCompile("UCP=ucp <, /^JJ|ADJP/", true),
      // UCP (DT JJ ...) -> ADJP
      TregexPattern.safeCompile("UCP=ucp <, (DT $+ /^JJ|ADJP/)", true)
    },
    {
      // UCP (N ...) -> NP
      TregexPattern.safeCompile("UCP=ucp <, /^N/", true),
      TregexPattern.safeCompile("UCP=ucp <, (DT $+ /^N/)", true)
    }
  };

  private static final TsurgeonPattern[] operations = {
    Tsurgeon.parseOperation("relabel ucp ADJP"),
    Tsurgeon.parseOperation("relabel ucp NP"),
  };

  /**
   * Transforms t if it contains an UCP, it will change the UCP tag
   * into the phrasal tag of the first word of the UCP
   * (UCP (JJ electronic) (, ,) (NN computer) (CC and) (NN building))
   * will become
   * (ADJP (JJ electronic) (, ,) (NN computer) (CC and) (NN building))
   *
   * @param t a tree to be transformed
   * @return t transformed
   */
  public static Tree UCPtransform(Tree t) {
    Tree firstChild = t.firstChild();
    if (firstChild != null) {
      List<Pair<TregexPattern,TsurgeonPattern>> ops = Generics.newArrayList();

      for (int i = 0; i < operations.length; i++) {
        for (TregexPattern pattern : matchPatterns[i]) {
          ops.add(Generics.newPair(pattern, operations[i]));
        }
      }

      return Tsurgeon.processPatternsOnTree(ops, t);
    } else {
      return t;
    }
  }


  /**
   * Transforms t if it contains a coordination in a flat structure
   *
   * @param t a tree to be transformed
   * @return t transformed
   */
  public Tree CCtransform(Tree t) {
    notDone = true;
    Tree toReturn = new LabeledScoredTreeNode();
    while (notDone) {
      Tree cc = findCCparent(t, t);
      if (cc != null) {
        toReturn = cc;
        t = cc;
      } else {
        notDone = false;
        toReturn = t;
      }
    }
    return toReturn;
  }

  private static String getHeadTag(Tree t) {
    if (t.value().startsWith("NN")) {
      return "NP";
    } else if (t.value().startsWith("JJ")) {
      return "ADJP";
    } else {
      return "NP";
    }
  }

  private Tree transformCC(Tree t, int ccIndex) {
    //System.out.println(t);
    //System.out.println(ccIndex);

    Tree[] ccSiblings = t.children();

    //check if other CC
    List<Integer> list = new ArrayList<Integer>();
    for (int i = ccIndex + 1; i < ccSiblings.length; i++) {
      if (ccSiblings[i].value().startsWith("CC")) {
        list.add(Integer.valueOf(i));
      }
    }

    // a CC b c ... -> (a CC b) c ...
    if (ccIndex == 1 && !(ccSiblings[ccIndex - 1].value().equals("NP") || ccSiblings[ccIndex - 1].value().equals("ADJP") || ccSiblings[ccIndex - 1].value().equals("NNS"))) {// && (ccSiblings.length == ccIndex + 3 || !list.isEmpty())) {  // something like "soya or maize oil"
      String leftHead = getHeadTag(ccSiblings[ccIndex - 1]);
      //create a new tree to be inserted as first child of t
      Tree left = new LabeledScoredTreeNode(new StringLabel(leftHead), null);
      for (int i = 0; i < ccIndex + 2; i++) {
        left.addChild(ccSiblings[i]);
      }

      if(VERBOSE) {
        System.out.println("print left tree");
        left.pennPrint();
        System.out.println();
      }

      // remove all the children of t before ccIndex+2
      for (int i = 0; i < ccIndex + 2; i++) {
        t.removeChild(0);
      }

      // if stuff after (like "soya or maize oil and vegetables")
      // we need to put the tree in another tree
      if (!list.isEmpty()) {
        boolean comma = false;
        int index = list.get(0);
        if (VERBOSE) {System.err.println("more CC index " +  index);}
        if (ccSiblings[index - 1].value().equals(",")) {//to handle the case of a coma ("soya and maize oil, and vegetables")
          index = index - 1;
          comma = true;
        }
        if (VERBOSE) {System.err.println("more CC index " +  index);}
        String head = getHeadTag(ccSiblings[index - 1]);
        Tree tree = new LabeledScoredTreeNode(new StringLabel(head), null);
        tree.addChild(0, left);

        int k = 1;
        for(int j = ccIndex+2; j<index; j++) {
          if (VERBOSE) ccSiblings[j].pennPrint();
          t.removeChild(0);
          tree.addChild(k, ccSiblings[j]);
          k++;
        }

        if (VERBOSE) {
          System.out.println("print t");
          t.pennPrint();

          System.out.println("print tree");
          tree.pennPrint();
          System.out.println();
        }
        t.addChild(0, tree);
      } else {
        t.addChild(0, left);
      }
    }
    // DT a CC b c -> DT (a CC b) c
    else if (ccIndex == 2 && ccSiblings[0].value().startsWith("DT") && !ccSiblings[ccIndex - 1].value().equals("NNS") && (ccSiblings.length == 5 || (!list.isEmpty() && list.get(0) == 5))) {
      String head = getHeadTag(ccSiblings[ccIndex - 1]);
      //create a new tree to be inserted as second child of t (after the determiner
      Tree child = new LabeledScoredTreeNode(new StringLabel(head), null);
      for (int i = 1; i < ccIndex + 2; i++) {
        child.addChild(ccSiblings[i]);
      }
      // remove all the children of t between the determiner and ccIndex+2
      //System.out.println("print left tree");
      //child.pennPrint();

      for (int i = 1; i < ccIndex + 2; i++) {
        t.removeChild(1);
      }

      t.addChild(1, child);
    }

    // ... a, b CC c ... -> ... (a, b CC c) ...
    else if (ccIndex > 2 && ccSiblings[ccIndex - 2].value().equals(",") && !ccSiblings[ccIndex - 1].value().equals("NNS")) {
      String head = getHeadTag(ccSiblings[ccIndex - 1]);
      Tree child = new LabeledScoredTreeNode(new StringLabel(head), null);
      for (int i = ccIndex - 3; i < ccIndex + 2; i++) {
        child.addChild(ccSiblings[i]);
      }
      int i = ccIndex - 4;
      while (i > 0 && ccSiblings[i].value().equals(",")) {
        child.addChild(0, ccSiblings[i]);    // add the comma
        child.addChild(0, ccSiblings[i - 1])// add the word before the comma
        i = i - 2;
      }

      if (i < 0) {
        i = -1;
      }

      // remove the old children
      for (int j = i + 1; j < ccIndex + 2; j++) {
        t.removeChild(i + 1);
      }
      // put the new tree
      t.addChild(i + 1, child);
    }

    // something like "the new phone book and tour guide" -> multiple heads
    // we want (NP the new phone book) (CC and) (NP tour guide)
    else {
      boolean commaLeft = false;
      boolean commaRight = false;
      boolean preconj = false;
      int indexBegin = 0;
      Tree conjT = new LabeledScoredTreeNode(new StringLabel("CC"), null);
      // create the left tree
      String leftHead = getHeadTag(ccSiblings[ccIndex - 1]);
      Tree left = new LabeledScoredTreeNode(new StringLabel(leftHead), null);

      // handle the case of a preconjunct (either, both, neither)
      Tree first = ccSiblings[0];
      String leaf = first.children()[0].value().toLowerCase();
      if(leaf.equals("either") || leaf.equals("neither") || leaf.equals("both")) {
        preconj = true;
        indexBegin = 1;
        conjT.addChild(first.children()[0]);
      }

      for (int i = indexBegin; i < ccIndex - 1; i++) {
        left.addChild(ccSiblings[i]);
      }
      // handle the case of a comma ("GM soya and maize, and food ingredients")
      if (ccSiblings[ccIndex - 1].value().equals(",")) {
        commaLeft = true;
      } else {
        left.addChild(ccSiblings[ccIndex - 1]);
      }

      // create the CC tree
      Tree cc = ccSiblings[ccIndex];

      // create the right tree
      int nextCC;
      if (list.isEmpty()) {
        nextCC = ccSiblings.length;
      } else {
        nextCC = list.get(0);
      }
      String rightHead = getHeadTag(ccSiblings[nextCC - 1]);
      Tree right = new LabeledScoredTreeNode(new StringLabel(rightHead), null);
      for (int i = ccIndex + 1; i < nextCC - 1; i++) {
        right.addChild(ccSiblings[i]);
      }
      // handle the case of a comma ("GM soya and maize, and food ingredients")
      if (ccSiblings[nextCC - 1].value().equals(",")) {
        commaRight = true;
      } else {
        right.addChild(ccSiblings[nextCC - 1]);
      }

      // put trees together in old t, first we remove the old nodes
      for (int i = 0; i < nextCC; i++) {
        t.removeChild(0);
      }
      if (!list.isEmpty()) { // need an extra level
        Tree tree = new LabeledScoredTreeNode(new StringLabel("NP"), null);
        if(preconj) {
          tree.addChild(conjT);
        }
        tree.addChild(left);
        if (commaLeft) {
          tree.addChild(ccSiblings[ccIndex - 1]);
        }
        tree.addChild(cc);
        tree.addChild(right);
        if (commaRight) {
          t.addChild(0, ccSiblings[nextCC - 1]);
        }
        t.addChild(0, tree);
      } else {
         if(preconj) {
          t.addChild(conjT);
        }
        t.addChild(left);
        if (commaLeft) {
          t.addChild(ccSiblings[ccIndex - 1]);
        }
        t.addChild(cc);
        t.addChild(right);
        if (commaRight) {
          t.addChild(ccSiblings[nextCC - 1]);
        }
      }
    }

    return t;
  }

  private static boolean notNP(List<Tree> children, int ccIndex) {
    boolean toReturn = true;
    for (int i = ccIndex; i < children.size(); i++) {
      if (children.get(i).value().startsWith("NP")) {
        toReturn = false;
        break;
      }
    }
    return toReturn;
  }

  /*
   * Given a tree t, if this tree contains a CC inside a NP followed by 2 nodes
   * (i.e. we have a flat structure that will not work for the dependencies),
   * it will return the NP containing the CC and the index of the CC
   *
   */
  private Tree findCCparent(Tree t, Tree root) {
    if (t.isPreTerminal()) {
      if (t.value().startsWith("CC")) {
        Tree parent = t.parent(root);
        if (parent.value().startsWith("NP")) {
          List<Tree> children = parent.getChildrenAsList();
          //System.out.println(children);
          int ccIndex = children.indexOf(t);
          if (children.size() > ccIndex + 2 && notNP(children, ccIndex) && ccIndex != 0) {
            Tree newChild = transformCC(parent, ccIndex);
            parent = newChild;
            return root;
          }
        }
      }
    } else {
      for (Tree child : t.getChildrenAsList()) {
        Tree cur = findCCparent(child, root);
        if (cur != null) {
          return cur;
        }
      }

    }
    return null;
  }


  public static void main(String[] args) {

    CoordinationTransformer transformer = new CoordinationTransformer();
    Treebank tb = new MemoryTreebank();
    Properties props = StringUtils.argsToProperties(args);
    String treeFileName = props.getProperty("treeFile");

    if (treeFileName != null) {
      try {
        TreeReader tr = new PennTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFileName))), new LabeledScoredTreeFactory());
        Tree t;
        while ((t = tr.readTree()) != null) {
          tb.add(t);
        }
      } catch (IOException e) {
        throw new RuntimeException("File problem: " + e);
      }

    }

    for (Tree t : tb) {
      System.out.println("Original tree");
      t.pennPrint();
      System.out.println();
      System.out.println("Tree transformed");
      Tree tree = transformer.transformTree(t);
      tree.pennPrint();
      System.out.println();
      System.out.println("----------------------------");
    }
  }

}
TOP

Related Classes of edu.stanford.nlp.trees.CoordinationTransformer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.