Package org.apache.ctakes.temporal.ae.feature

Source Code of org.apache.ctakes.temporal.ae.feature.ListFeaturesExtractor

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.temporal.ae.feature;

import java.util.Collection;
import java.util.HashSet;
import java.util.List;

import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.classifier.Feature;
import org.uimafit.util.JCasUtil;

import com.google.common.collect.Lists;

public class ListFeaturesExtractor implements RelationFeaturesExtractor {

  /*
   * (non-Javadoc)
   * @see org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor#extract(org.apache.uima.jcas.JCas, org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation, org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation)
   * This feature extractor checks if each argument is part of a list, by looking
   * in a parse tree at sibling categories, for commas, coordinators given an NP parent.
   * Features include whether either arg is in a list and what list position (start, middle, end),
   * and whether the left sibling in the list is part of an existing relation, and if so, whether
   * that relation has the same other argument as the current proposed relation.
   */
  public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
      IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
    List<Feature> feats = Lists.newArrayList();
   
    feats.addAll(getArgFeats(jCas, arg1, arg2, "Arg1"));
    feats.addAll(getArgFeats(jCas, arg2, arg2, "Arg2"));
    return feats;
  }

  private static Collection<? extends Feature> getArgFeats(JCas jCas, IdentifiedAnnotation primeArg,
      IdentifiedAnnotation secondArg, String prefix) {
    List<Feature> feats = Lists.newArrayList();
   
    List<TreebankNode> nodes = JCasUtil.selectCovered(TreebankNode.class, primeArg);
    if(nodes.size() > 0){
      TreebankNode node = nodes.get(0);
      if(node.getBegin() == primeArg.getBegin() && node.getEnd() == primeArg.getEnd()){
        HashSet<String> priorNPs = new HashSet<String>();
        // we have a node with the exact span as the argument
        // now check if it is an element of a list
        // first move NNs up to their constituent
        if(node.getNodeType().startsWith("NN")){
          node = node.getParent();
        }
        TreebankNode parent = node.getParent();
        if(parent == null) return feats;
        int childIndex = -1;
        for(int i = 0; i < parent.getChildren().size(); i++){
          if(parent.getChildren(i) == node){
            childIndex = i;
            break;
          }
          priorNPs.add(getKey(parent.getChildren(i)));
        }
          
        // cnoditions for this arg being an element of a list:
        // 1) is NP
        // 2) Parent is NP
        // 3) left neighbor is , or right neighbor is , or both neigbors are ,
        boolean lcComma=false, rcComma=false, lcAnd=false;
        if(node.getNodeType().equals("NP") && parent.getNodeType().equals("NP")){
          if(childIndex > 0 && parent.getChildren(childIndex-1).getNodeType().equals(",")){
            // left child is ","
            lcComma = true;
          }
          if(childIndex+1 < parent.getChildren().size() && parent.getChildren(childIndex+1).getNodeType().equals(",")){
            rcComma = true;
          }
          if(childIndex+1 == parent.getChildren().size() && childIndex > 0 && parent.getChildren(childIndex-1).getNodeType().equals("CC")){
            lcAnd = true;
          }
        }
        if(lcComma && rcComma){
          feats.add(new Feature(prefix + "_midlist", true));
        }else if(childIndex==0 && rcComma){
          feats.add(new Feature(prefix + "_startlist", true));
        }else if(lcAnd){
          feats.add(new Feature(prefix + "_endlist", true));
        }
       
        if(lcComma || rcComma || lcAnd){
          // somehow in a list
          // check to see if any element of the list is already part of a relation
          for(BinaryTextRelation otherRel : JCasUtil.select(jCas, BinaryTextRelation.class)){
            Annotation a1 = otherRel.getArg1().getArgument();
            Annotation a2 = otherRel.getArg2().getArgument();
            if(a1 instanceof TimeMention || a2 instanceof TimeMention) continue; // covered by another feature
            if(priorNPs.contains(getKey(a1))){
              // one of the left children is already in another relation!
              feats.add(new Feature(prefix + "_leftSiblingInRelation", true));
             
              // check if the other argument in that relation is the secondary arg
              if(secondArg.getBegin() == a2.getBegin() && secondArg.getEnd() == a2.getEnd()){
                // the other proposed arg of this relation is already in a relation with another element of this list!
                feats.add(new Feature(prefix + "_leftSiblingInRelationWithCurArg"));
              }
            }
           
            if(priorNPs.contains(getKey(a2))){
              feats.add(new Feature(prefix + "_leftSiblingInRelation", true));
             
              if(secondArg.getBegin() == a1.getBegin() && secondArg.getEnd() == a1.getEnd()){
                // the other proposed arg of this relation is already in a relation with another element of this list!
                feats.add(new Feature(prefix + "_leftSiblingInRelationWithCurArg"));
              }
            }
          }
        }
      }
     
     
    }
   
    return feats;
  }

  private static String getKey(Annotation annot){
    return annot.getBegin() + "-" + annot.getEnd();
  }
}
TOP

Related Classes of org.apache.ctakes.temporal.ae.feature.ListFeaturesExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.