Package org.apache.ctakes.assertion.attributes.subject

Source Code of org.apache.ctakes.assertion.attributes.subject.SubjectAttributeClassifier

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.assertion.attributes.subject;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;

import org.apache.ctakes.assertion.eval.AssertionEvaluation;
import org.apache.ctakes.dependency.parser.util.DependencyPath;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Predicate;
import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.uimafit.util.JCasUtil;


/**
* @author stephenwu
*
*/
public class SubjectAttributeClassifier {

  public static final String DONOR_TOKEN = "donor_token";
  public static final String DONOR_SRLARG = "donor_srlarg";
  public static final String DONOR_DEPPATH = "donor_deppath";
  public static final String DONOR_DEPTOK = "donor_depsrl";
  public static final String DONOR_OR = "donor_or";
  public static final String FAMILY_TOKEN = "family_token";
  public static final String FAMILY_SRLARG = "family_srlarg";
  public static final String FAMILY_DEPPATH = "family_deppath";
  public static final String FAMILY_DEPTOK = "family_depsrl";
  public static final String FAMILY_OR = "family_or";
  public static final String OTHER_TOKEN = "other_token";
  public static final String OTHER_SRLARG = "other_srlarg";
  public static final String OTHER_DEPPATH = "other_deppath";
  public static final String OTHER_DEPTOK = "other_depsrl";
  public static final String OTHER_OR = "other_or";
    public static ArrayList<String> FeatureIndex = new ArrayList<String>();
    private static Logger logger = Logger.getLogger(SubjectAttributeClassifier.class);

    static{
            FeatureIndex.add(DONOR_TOKEN);
            FeatureIndex.add(DONOR_SRLARG);
            FeatureIndex.add(DONOR_DEPPATH);
            FeatureIndex.add(DONOR_DEPTOK);
            FeatureIndex.add(DONOR_OR);
            FeatureIndex.add(FAMILY_TOKEN);
            FeatureIndex.add(FAMILY_SRLARG);
            FeatureIndex.add(FAMILY_DEPPATH);
            FeatureIndex.add(FAMILY_DEPTOK);
            FeatureIndex.add(FAMILY_OR);
            FeatureIndex.add(OTHER_TOKEN);
            FeatureIndex.add(OTHER_SRLARG);
            FeatureIndex.add(OTHER_DEPPATH);
            FeatureIndex.add(OTHER_DEPTOK);
            FeatureIndex.add(OTHER_OR);
    }

  // currently goes from entityMention to Sentence to SemanticArgument
  public static String getSubject(JCas jCas, IdentifiedAnnotation mention) {
   
    // Extract the stuff into features
    HashMap<String, Boolean> vfeat = extract(jCas, mention);
   
    // Logic to identify cases, may be replaced by learned classification
    return classifyWithLogic(vfeat);
     
  }


  public static HashMap<String, Boolean> extract(JCas jCas,
      Annotation mention) {
    HashMap<String,Boolean> vfeat = new HashMap<String,Boolean>();
    for (String feat : FeatureIndex) {
      vfeat.put(feat, false);
    }
   
    // find the sentence that entityMention is in
    Sentence sEntity = null;
    Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
    for (Sentence s : sentences) {
      if ( s.getBegin()<=mention.getBegin() && s.getEnd()>=mention.getEnd()) {
        sEntity = s;
        break;
      }
    }
   
    // if there is no sentence, then all these features are null!
    if (sEntity==null) {
//      for ( String feat : FeatureIndex ) {
//        vfeat.put(feat, null);
//      }
//      return vfeat;
      return new HashMap<String,Boolean>();
    }
       
    // get any SRL arguments
    List<SemanticArgument> args = JCasUtil.selectCovered(jCas, SemanticArgument.class, sEntity);
    for (SemanticArgument arg : args) {
     
      // look in SRL arguments for a family or other subject
      if (arg.getLabel().matches("A[01]")) {
        if ( isDonorTerm(arg) ) {
          vfeat.put(DONOR_SRLARG, true);
        }
        if ( isFamilyTerm(arg) ) {
          vfeat.put(FAMILY_SRLARG, true);
        }
        if ( isOtherTerm(arg) ) {
          vfeat.put(OTHER_SRLARG, true);
        }
      }

    }

    // get any SRL predicates
    List<Predicate> preds = JCasUtil.selectCovered(jCas, Predicate.class, sEntity);

   
    // search dependency paths for stuff
    List<ConllDependencyNode> depnodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, mention);
    if (!depnodes.isEmpty()) {
      ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode(depnodes);
      for (ConllDependencyNode dn : DependencyUtility.getPathToTop(jCas, depnode)) {
        if ( isDonorTerm(dn) ) {
          vfeat.put(DONOR_DEPPATH, true);
        }
        if ( isFamilyTerm(dn) ) {
          vfeat.put(FAMILY_DEPPATH, true);
        }
        if ( isOtherTerm(dn) ) {
          vfeat.put(OTHER_DEPPATH, true);
        }
       
      }
    }

    // look for mentions of "donor" in the tokens
    List<BaseToken> toks = JCasUtil.selectCovered(jCas, BaseToken.class, sEntity);
    for (BaseToken tok : toks) {
     
      if ( isDonorTerm(tok) ) {
        vfeat.put(DONOR_TOKEN, true);
       
        // check if there are one-removed dependencies on the dependency path
        DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok),
            DependencyUtility.getNominalHeadNode(jCas,mention));
        int commonInd = path.indexOf(path.getCommonNode());
        if (commonInd==1 || commonInd==path.size()-2) {
          vfeat.put(DONOR_DEPTOK, true);
        }
      }
      if ( isFamilyTerm(tok) ) {
        vfeat.put(FAMILY_TOKEN, true);

        // check if there are one-removed dependencies on the dependency path
        DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok),
            DependencyUtility.getNominalHeadNode(jCas,mention));
       
        // 6/28/13 srh fixing for null pointer exception
        if (path != null) {
          int commonInd = path.indexOf(path.getCommonNode());
          if (commonInd==1 || commonInd==path.size()-2) {
            vfeat.put(FAMILY_DEPTOK, true);
          }
        }
      }
     
      if ( isOtherTerm(tok) ) {
        vfeat.put(OTHER_TOKEN, true);

        // check if there are one-removed dependencies on the dependency path
        DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok),
            DependencyUtility.getNominalHeadNode(jCas,mention));
        int commonInd = path.indexOf(path.getCommonNode());
        if (commonInd==1 || commonInd==path.size()-2) {
          vfeat.put(OTHER_DEPTOK, true);
        }
      }
    }
    return vfeat;
  }
 
  public static String classifyWithLogic(HashMap<String, Boolean> vfeat) {
   
    if (vfeat==null) {
      // if missing values, use default subject value
      logger.warn("Subject attribute classifier missing feature values, defaulting to 'patient'");
      return CONST.ATTR_SUBJECT_PATIENT;
    }

    Boolean donor_summary = new Boolean(vfeat.get(DONOR_TOKEN) || vfeat.get(DONOR_DEPPATH) ||
        vfeat.get(DONOR_DEPTOK) || vfeat.get(DONOR_SRLARG));
    Boolean family_summary = new Boolean(                         vfeat.get(FAMILY_DEPPATH) ||
        vfeat.get(FAMILY_DEPTOK) || vfeat.get(FAMILY_SRLARG));
    Boolean other_summary = new Boolean(                          vfeat.get(OTHER_DEPPATH) ||
        vfeat.get(OTHER_DEPTOK) || vfeat.get(OTHER_SRLARG));
    vfeat.put(DONOR_OR, donor_summary);
    vfeat.put(FAMILY_OR, family_summary);
    vfeat.put(OTHER_OR, other_summary);

    if (vfeat.get(DONOR_OR) && vfeat.get(FAMILY_OR)) {
      return CONST.ATTR_SUBJECT_DONOR_FAMILY_MEMBER;
    } else if (vfeat.get(DONOR_OR) && !vfeat.get(FAMILY_OR)) {
      return CONST.ATTR_SUBJECT_DONOR_OTHER;
    } else if (!vfeat.get(DONOR_OR) && !vfeat.get(FAMILY_OR) && vfeat.get(OTHER_OR)) {
      return CONST.ATTR_SUBJECT_OTHER;
    } else if (!vfeat.get(DONOR_OR) && vfeat.get(FAMILY_OR)) {
      return (CONST.ATTR_SUBJECT_FAMILY_MEMBER);
    } else {
      return CONST.ATTR_SUBJECT_PATIENT;
    }

  }


  public static boolean isDonorTerm(Annotation arg) {
    return arg.getCoveredText().toLowerCase()
    .matches("(donor).*");
  }

 
  public static boolean isFamilyTerm(Annotation arg) {
    return arg.getCoveredText().toLowerCase()
    .matches("(father|dad|mother|mom|bro|sis|sib|cousin|aunt|uncle|grandm|grandp|grandf|" +
        "wife|spouse|husband|child|offspring|progeny|son|daughter|nephew|niece|kin|family).*");
  }


  public static boolean isOtherTerm(Annotation arg) {
    return arg.getCoveredText().toLowerCase()
    .matches(".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*");
  }


  // a main method for regex testing
  public static void main(String[] args) {
    String s = "steps";
    if (s.toLowerCase().matches(".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*")) {
      System.out.println("match");
    } else {
      System.out.println("no match");
    }
  }
}
TOP

Related Classes of org.apache.ctakes.assertion.attributes.subject.SubjectAttributeClassifier

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.