Source Code of org.apache.ctakes.sideeffect.ae.PSESentenceAnnotator

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.sideeffect.ae;


import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;


import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceInitializationException;


import org.apache.ctakes.core.util.FSUtil;
import org.apache.ctakes.sideeffect.util.SEUtil;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.sideeffect.type.PSESentence;


/**
 * Identify the sentence(s) that contains PSE (signs/symptoms or
 * disease/disorder) and drugs and add them to PSESentence. - If the sentence
 * containing PSE doesn't have drug, include the previous sentence if it
 * contains drug and it is in the same line (paragraph). - Disregard the
 * particular section(s) (eg, allergy section (20105)) This annotation will be
 * used to extract features for side-effect sentence classification.
 * 
 * @author Mayo Clinic
 * 
 */
public class PSESentenceAnnotator extends JCasAnnotator_ImplBase {
  public static final String PARAM_IGNORE_SECTIONS = "SectionsToIgnore";
  private Set setionsToIgnore;


  public void initialize(UimaContext aContext)
      throws ResourceInitializationException {
    super.initialize(aContext);


    // get sections to ignore
    String[] sections;
    try {
      sections = (String[]) getContext().getConfigParameterValue(
          PARAM_IGNORE_SECTIONS);
      setionsToIgnore = new HashSet();
      for (int i = 0; i < sections.length; i++)
        setionsToIgnore.add(sections[i]);
    } catch (Exception e) {
      throw new ResourceInitializationException(e);
    }


  }


  public void process(JCas jcas) throws AnalysisEngineProcessException {
    JFSIndexRepository indexes = jcas.getJFSIndexRepository();
    Iterator senIter = indexes.getAnnotationIndex(Sentence.type).iterator();


    while (senIter.hasNext()) {
      Sentence sen = (Sentence) senIter.next();
      boolean foundDrug = false;
      boolean foundPSE = false;


      if (setionsToIgnore.contains(sen.getSegmentId()))
        continue;


      // if drug is not found in the same sentence and the previous
      // sentence contains
      // drug and they are in the same line, then sentence will be
      // previous + current sentence
      Iterator neIter = FSUtil
          .getAnnotationsInSpanIterator(jcas,
              IdentifiedAnnotation.type, sen.getBegin(),
              sen.getEnd() + 1);
      while (neIter.hasNext()) {
        IdentifiedAnnotation n = (IdentifiedAnnotation) neIter.next();
        if (n.getTypeID() == 2 || n.getTypeID() == 3)
          foundPSE = true;


        if (n.getTypeID() == 1)
          foundDrug = true;
      }


      if (!foundPSE)
        continue;


      if (foundPSE && foundDrug) {
        PSESentence ps = new PSESentence(jcas);
        ps.setBegin(sen.getBegin());
        ps.setEnd(sen.getEnd());
        ps.addToIndexes();
      } else if (foundPSE && !foundDrug) {
        int num = sen.getSentenceNumber();
        num = (num > 0) ? num - 1 : num;
        int[] previousSenSpan = SEUtil
            .getSentenceSpanOfGivenSentenceNum(jcas, num);


        // only if they are in the same line
        if (SEUtil.isSpanInSameLine(jcas, previousSenSpan[0],
            sen.getEnd())) {
          neIter = FSUtil.getAnnotationsInSpanIterator(jcas,
              IdentifiedAnnotation.type, previousSenSpan[0],
              previousSenSpan[1] + 1);
          while (neIter.hasNext()) {
            IdentifiedAnnotation n = (IdentifiedAnnotation) neIter
                .next();
            if (n.getTypeID() == 1) {
              PSESentence ps = new PSESentence(jcas);
              ps.setBegin(previousSenSpan[0]);
              ps.setEnd(sen.getEnd());
              ps.addToIndexes();
              break;
            }
          }
        }
      }
    }
  }
}
Source Code of org.apache.ctakes.sideeffect.ae.PSESentenceAnnotator

Related Classes of org.apache.ctakes.sideeffect.ae.PSESentenceAnnotator