Package org.apache.ctakes.assertion.util

Source Code of org.apache.ctakes.assertion.util.AssertionTreeUtils

package org.apache.ctakes.assertion.util;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;

import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.utils.tree.SimpleTree;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.tcas.Annotation;

public class AssertionTreeUtils {

  public static SimpleTree extractAboveLeftConceptTree(JCas jcas, Annotation mention, SemanticClasses sems){
    SimpleTree tree = null;
    TopTreebankNode annotationTree = AnnotationTreeUtils.getAnnotationTree(jcas, mention);
    if(annotationTree != null){
      TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(jcas, annotationTree);
      TreebankNode conceptNode = AnnotationTreeUtils.insertAnnotationNode(jcas, root, mention, "CONCEPT");
      // navigate up the tree to retrieve the first "S" above this node.
      TreebankNode node = conceptNode;
      while(node.getParent() != null && !node.getNodeType().startsWith("S")){
        node =  node.getParent();
      }

//      elevateListConcepts(jcas, node);
     
      // remove nodes to the right of the CONCEPT node
      AnnotationTreeUtils.removeRightOfAnnotation(jcas, node, conceptNode);
     
      tree = TreeExtractor.getSimpleClone(node);
    }else{
      tree = SimpleTree.fromString("(S noparse)");
    }

    TreeExtractor.lowercaseWords(tree);
    if(sems != null){
      replaceWordsWithSemanticClasses(tree, sems);
    }
   
    return tree;
  }
 
  public static void elevateListConcepts(JCas jcas, TreebankNode tree) {
    if(tree.getLeaf()) return;
   
    int conceptIndex = -1;
    for(int i = 0; i < tree.getChildren().size(); i++){
      if(tree.getChildren(i).getNodeType().equals("CONCEPT")){
        conceptIndex = i;
        break;
      }
    }
   
    if(conceptIndex == -1){
      // explore children
//      for(SimpleTree child : tree.children){
      for(int i = 0; i < tree.getChildren().size(); i++){
        elevateListConcepts(jcas, tree.getChildren(i));
      }
    }else{
      // check 3 conditions:
      // 1) First node under tree, with at least one node to the right, with category CC or ,
      // 2) last node under tree, with at least one node to the left, with category CC or ,
      // 3) node in the middle with node to the left category , and node to the right category CC or ,
      if(conceptIndex == 0 && tree.getChildren().size() > 1 && tree.getChildren(1).getNodeType().matches("CC|,")
          || conceptIndex == tree.getChildren().size()-1 && tree.getChildren().size() > 1 && tree.getChildren(conceptIndex-1).getNodeType().matches("CC|,")
          || conceptIndex > 0 && conceptIndex < tree.getChildren().size()-1 && tree.getChildren().size() > 2 && tree.getChildren(conceptIndex-1).getNodeType().equals(",") && tree.getChildren(conceptIndex+1).getNodeType().matches("CC|,")){
        // if we meet this simple condition we raise the CONCEPT node!
        // remove old concept node:
        TreebankNode entityRoot = tree.getChildren(conceptIndex).getChildren(0);
        tree.setChildren(conceptIndex, entityRoot);
        entityRoot.setParent(tree);
       
        // insert new concept node:
//        SimpleTree replacementNode = new SimpleTree(tree.cat);
        TreebankNode replacementNode = new TreebankNode(jcas);
        replacementNode.setNodeType(tree.getNodeType());
        replacementNode.setChildren(tree.getChildren());
        for(int i = 0; i < replacementNode.getChildren().size(); i++){
          replacementNode.getChildren(i).setParent(replacementNode);
        }
        replacementNode.setParent(tree);
       
        tree.setNodeType("CONCEPT");
//        tree.children = new ArrayList<SimpleTree>();
        FSArray children = new FSArray(jcas, 1);
        children.set(0, replacementNode);
        tree.setChildren(children);
//        tree.addChild(replacementNode);
      }
    }
  }

  public static SimpleTree extractAboveRightConceptTree(JCas jcas, Annotation mention, SemanticClasses sems){
    SimpleTree tree = null;
    TopTreebankNode annotationTree = AnnotationTreeUtils.getAnnotationTree(jcas, mention);
    if(annotationTree != null){
      TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(jcas, annotationTree);
      TreebankNode conceptNode = AnnotationTreeUtils.insertAnnotationNode(jcas, root, mention, "CONCEPT");
      //            SimpleTree tree = null;
      //            tree = TreeExtractor.getSurroundingTreeWithAnnotation(node, "CONCEPT");
      // navigate up the tree to retrieve the first "S" above this node.
      TreebankNode node = conceptNode;
      while(node.getParent() != null && !node.getNodeType().startsWith("S")){
        node =  node.getParent();
      }

      // get the VP node (clause) or S that most closely dominates the concept, and remove everything after that
      // should smallen the tree while also permitting post-mention negation like "problem resolved" or "problem ruled out"
     
      // remove nodes to the right of the CONCEPT node
      AnnotationTreeUtils.removeLeftOfAnnotation(jcas, node, conceptNode);
   
      tree = TreeExtractor.getSimpleClone(node);
    }else{
      tree = SimpleTree.fromString("(S noparse)");
    }

    TreeExtractor.lowercaseWords(tree);
    if(sems != null){
      replaceWordsWithSemanticClasses(tree, sems);
    }
    return tree;
  }
 
  public static void replaceWordsWithSemanticClasses(SimpleTree tree, SemanticClasses sems){
    // recursion base case... actually apply semantic classes...
    if(tree.isLeaf()){
      for(Map.Entry<String,HashSet<String>> semClass : sems.entrySet()){
        if(semClass.getValue().contains(tree.cat)){
          tree.cat = "semclass_" + semClass.getKey();
        }
      }
    }else{
      // iterate over children
      for(SimpleTree child : tree.children){
        replaceWordsWithSemanticClasses(child, sems);
      }
    }
  }
 
  static HashMap<String,String> wordMap = new HashMap<String,String>();
    static Random random = new Random();
  public void randomizeWords(SimpleTree tree, boolean dep) {
    if(!tree.cat.equals("CONCEPT") && !tree.cat.equals("TOP") && (dep || tree.children.size() == 0)){
      if(wordMap.containsKey(tree.cat)){
        tree.cat = wordMap.get(tree.cat);
      }else{
        // generate new random word... (from http://stackoverflow.com/a/4952066)
        String oldWord = tree.cat;
        char[] word = new char[random.nextInt(8)+3]; // words of length 3 through 10. (1 and 2 letter words are boring.)
        for(int j = 0; j < word.length; j++)
        {
          word[j] = (char)('a' + random.nextInt(26));
        }
        tree.cat = new String(word);
        wordMap.put(oldWord, tree.cat);
      }
    }
    if(tree.children.size() > 0){
      for(SimpleTree child : tree.children){
        randomizeWords(child, dep);
      }
    }
  }

}
TOP

Related Classes of org.apache.ctakes.assertion.util.AssertionTreeUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.