Package org.apache.ctakes.sideeffect.ae

Source Code of org.apache.ctakes.sideeffect.ae.SESentenceClassifierAnnotator

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.sideeffect.ae;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import libsvm.svm;
import libsvm.svm_model;
import libsvm.svm_node;

import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.JCas;

import org.apache.ctakes.sideeffect.type.PSESentenceFeature;
import org.apache.ctakes.sideeffect.type.SESentence;
import org.apache.ctakes.sideeffect.type.SideEffectAnnotation;

/**
* Classify side effect sentences and add them to SESentence
* @author Mayo Clinic
*
*/

public class SESentenceClassifierAnnotator extends JTextAnnotator_ImplBase {
  svm_model model; //trained libsvm model
  Map<String, String> feaMap; //key:nominal values, values: converted value
   
  public void initialize(AnnotatorContext annotCtx)
  throws AnnotatorInitializationException, AnnotatorConfigurationException
  {
    super.initialize(annotCtx);
         
    try {
      String pathOfTrainedModel = (String) getContext().getConfigParameterValue("PathOfModel");
          model = svm.svm_load_model(pathOfTrainedModel);
      }
    catch (Exception e) {
      e.printStackTrace();
    }
   
    //mapping nominal values to integer
    //why convert to integer? tried binary values but didn't performed well
    feaMap = new HashMap<String, String>();
    feaMap.put("nul", "0"); // both side effect keyword and location features
    feaMap.put("pre", "1"); // side effect keyword feature
    feaMap.put("bet", "1"); // location feature
    feaMap.put("bpd", "2"); // location feature
    feaMap.put("bdp", "3"); // location feature
    feaMap.put("bap", "4"); // location feature
    feaMap.put("bep", "5"); // location feature
    feaMap.put("afp", "6"); // location feature
    feaMap.put("any", "7"); // location feature
  }
 
  public void process(JCas jcas, ResultSpecification resSpec)
      throws AnnotatorProcessException {
    JFSIndexRepository indexes = jcas.getJFSIndexRepository();
    Iterator psfIter = indexes.getAnnotationIndex(PSESentenceFeature.type).iterator()
    Set<String> seSenSpans_ML = new HashSet<String>();
   
    //get side-effect sentence spans found by ML (livSVM)
    while(psfIter.hasNext()) {
      PSESentenceFeature psf = (PSESentenceFeature) psfIter.next();
     
      //classify SE sentences based on the trained libSVM model
        svm_node[] x = new svm_node[psf.getFeatures().size()];
      for(int j=0;j<psf.getFeatures().size();j++)
      {
        x[j] = new svm_node();
        x[j].index = j+1;
        String nomFea = psf.getFeatures(j);
        x[j].value = Double.parseDouble(feaMap.get(nomFea));
      }
     
      double clsLabel; //0:non-SE sentence , 1:SE sentence
      clsLabel = svm.svm_predict(model,x);
     
      //add side-effect sentence span to the Set
      if(clsLabel==1) {             
        String span = Integer.toString(psf.getPseSen().getBegin()) + "|"
                + Integer.toString(psf.getPseSen().getEnd());
        seSenSpans_ML.add(span);
     
    }
   
    //get side-effect sentence spans found by rules (SideEffectAnnotator)
    Set<String> seSenSpans_rule = new HashSet<String>();
    Iterator seIter = indexes.getAnnotationIndex(SideEffectAnnotation.type).iterator();
   
      while(seIter.hasNext()) {
        SideEffectAnnotation se = (SideEffectAnnotation) seIter.next();       
        String span = Integer.toString(se.getSentence().getBegin()) + "|"
                + Integer.toString(se.getSentence().getEnd());
        seSenSpans_rule.add(span);
      }
   
      //annotate SE sentences (add them to SESentence)
      Set<String> seSenSpans = new HashSet<String>(seSenSpans_ML);
      seSenSpans.addAll(seSenSpans_rule); //union of ML and rule

      for(String s : seSenSpans) {
        String[] stk = s.split("\\|");
        int begin = Integer.parseInt(stk[0]);
        int end = Integer.parseInt(stk[1]);
      SESentence ses = new SESentence(jcas);
      ses.setBegin(begin);
      ses.setEnd(end);
      ses.addToIndexes();
      }
  }
     
}
TOP

Related Classes of org.apache.ctakes.sideeffect.ae.SESentenceClassifierAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.