Package org.apache.ctakes.assertion.medfacts

Source Code of org.apache.ctakes.assertion.medfacts.AssertionAnalysisEngine

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.assertion.medfacts;

import java.io.File;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.logging.Logger;

import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.jcas.tcas.Annotation_Type;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
//import org.jfree.util.Log;
import org.mitre.jcarafe.jarafe.JarafeMEDecoder;
import org.mitre.medfacts.i2b2.annotation.PartOfSpeechTagger;
import org.mitre.medfacts.i2b2.annotation.ScopeParser;
import org.mitre.medfacts.i2b2.api.ApiConcept;
import org.mitre.medfacts.i2b2.api.AssertionDecoderConfiguration;
import org.mitre.medfacts.i2b2.api.SingleDocumentProcessor;
import org.mitre.medfacts.i2b2.cli.BatchRunner;
import org.mitre.medfacts.i2b2.util.StringHandling;
import org.mitre.medfacts.zoner.CharacterOffsetToLineTokenConverter;
import org.mitre.medfacts.zoner.LineTokenToCharacterOffsetConverter;


import org.apache.ctakes.assertion.attributes.generic.GenericAttributeClassifier;
import org.apache.ctakes.assertion.attributes.subject.SubjectAttributeClassifier;
import org.apache.ctakes.assertion.medfacts.i2b2.api.CharacterOffsetToLineTokenConverterCtakesImpl;
import org.apache.ctakes.assertion.medfacts.i2b2.api.SingleDocumentProcessorCtakes;
import org.apache.ctakes.assertion.medfacts.types.Assertion;
import org.apache.ctakes.assertion.medfacts.types.Concept;
import org.apache.ctakes.assertion.medfacts.types.Concept_Type;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;

public class AssertionAnalysisEngine extends JCasAnnotator_ImplBase
{
  private static Logger logger = Logger.getLogger(AssertionAnalysisEngine.class.getName());
 
  AssertionDecoderConfiguration assertionDecoderConfiguration;

  public AssertionAnalysisEngine()
  {
  }
 
  @Override
  public void initialize(UimaContext uimaContext)
    throws ResourceInitializationException
  {
      super.initialize(uimaContext);
   
      // byte assertionModelContents[];
      String scopeModelFilePath;
      String cueModelFilePath;
      String posModelFilePath;
      File enabledFeaturesFile;

      File assertionModelFile = null;
      try
      {
        String assertionModelResourceKey = "assertionModelResource";
        String assertionModelFilePath = getContext().getResourceFilePath(
            assertionModelResourceKey);
        assertionModelFile = new File(assertionModelFilePath);
        // assertionModelContents = StringHandling
        // .readEntireContentsBinary(assertionModelFile);
        String scopeModelResourceKey = "scopeModelResource";
        scopeModelFilePath = getContext().getResourceFilePath(
            scopeModelResourceKey);
        String cueModelResourceKey = "cueModelResource";
        cueModelFilePath = getContext().getResourceFilePath(cueModelResourceKey);

        String posModelResourceKey = "posModelResource";
        posModelFilePath = getContext().getResourceFilePath(posModelResourceKey);

        String enabledFeaturesResourceKey = "enabledFeaturesResource";
        String enabledFeaturesFilePath = getContext().getResourceFilePath(
            enabledFeaturesResourceKey);
        enabledFeaturesFile = new File(enabledFeaturesFilePath);
      } catch (ResourceAccessException e)
      {
        String message = String.format("problem accessing resource");
        throw new RuntimeException(message, e);
      }

      AssertionDecoderConfiguration assertionDecoderConfiguration = new AssertionDecoderConfiguration();

      logger.info(String.format("scope model file: %s", scopeModelFilePath));
      logger.info(String.format("cue model file: %s", cueModelFilePath));
      ScopeParser scopeParser = new ScopeParser(scopeModelFilePath,
          cueModelFilePath);
      assertionDecoderConfiguration.setScopeParser(scopeParser);

      logger.info(String.format("pos model file: %s", posModelFilePath));
      PartOfSpeechTagger posTagger = new PartOfSpeechTagger(posModelFilePath);
      assertionDecoderConfiguration.setPosTagger(posTagger);

      Set<String> enabledFeatureIdSet = null;
      enabledFeatureIdSet = BatchRunner
          .loadEnabledFeaturesFromFile(enabledFeaturesFile);
      assertionDecoderConfiguration.setEnabledFeatureIdSet(enabledFeatureIdSet);

      JarafeMEDecoder assertionDecoder = null;
      assertionDecoder = new JarafeMEDecoder(assertionModelFile);
      assertionDecoderConfiguration.setAssertionDecoder(assertionDecoder);

      this.assertionDecoderConfiguration = assertionDecoderConfiguration;
  }

  @Override
  public void process(JCas jcas) throws AnalysisEngineProcessException
  {
    logger.info("(logging statement) AssertionAnalysisEngine.process() BEGIN");
    System.out.println("(stdout) AssertionAnalysisEngine.process() BEGIN");
    String contents = jcas.getDocumentText();

    // String tokenizedContents = tokenizeCasDocumentText(jcas);

    int conceptType = Concept.type;
    AnnotationIndex<Annotation> conceptAnnotationIndex = jcas
        .getAnnotationIndex(conceptType);

    ArrayList<ApiConcept> apiConceptList = new ArrayList<ApiConcept>();
    for (Annotation annotation : conceptAnnotationIndex)
    {
      Concept conceptAnnotation = (Concept) annotation;

      ApiConcept apiConcept = new ApiConcept();
      int begin = conceptAnnotation.getBegin();
      int end = conceptAnnotation.getEnd();
      String conceptText = contents.substring(begin, end);

      apiConcept.setBegin(begin);
      apiConcept.setEnd(end);
      apiConcept.setText(conceptText);
      apiConcept.setType(conceptAnnotation.getConceptType());
      apiConcept.setExternalId(conceptAnnotation.getAddress());

      apiConceptList.add(apiConcept);
    }

    // String conceptFilePath =
    // currentTextFile.getAbsolutePath().replaceFirst("\\.txt$", ".con");
    // File conceptFile = new File(conceptFilePath);
    // logger.info(String.format("    - using concept file \"%s\"...",
    // conceptFile.getName()));
    // String conceptFileContents =
    // StringHandling.readEntireContents(conceptFile);
    // //List<Concept> parseConceptFileContents(conceptFileContents);
    //
    // LineTokenToCharacterOffsetConverter converter =
    // new LineTokenToCharacterOffsetConverter(contents);
    //
    // List<ApiConcept> apiConceptList = parseConceptFile(conceptFile, contents,
    // converter);

    // LineTokenToCharacterOffsetConverter converter =
    // new LineTokenToCharacterOffsetConverter(contents);


    // SingleDocumentProcessor p = new SingleDocumentProcessor();
    SingleDocumentProcessorCtakes p = new SingleDocumentProcessorCtakes();
    p.setJcas(jcas);
    p.setAssertionDecoderConfiguration(assertionDecoderConfiguration);
    // p.setContents(tokenizedContents);
    p.setContents(contents);
    CharacterOffsetToLineTokenConverter converter = new CharacterOffsetToLineTokenConverterCtakesImpl(
        jcas);
    p.setConverter2(converter);
    for (ApiConcept apiConcept : apiConceptList)
    {
      //logger
      //    .info(String.format("dir loader concept: %s", apiConcept.toString()));
      p.addConcept(apiConcept);
    }

    logger
        .info("(logging statement) AssertionAnalysisEngine.process() BEFORE CALLING p.processSingleDocument()");

    p.processSingleDocument();

    logger
        .info("(logging statement) AssertionAnalysisEngine.process() AFTER CALLING p.processSingleDocument()");

    Map<Integer, String> assertionTypeMap = p.getAssertionTypeMap();
    //logger.info(String.format("    - done processing ..\"."));

    // Map<Integer, Annotation> annotationMap = generateAnnotationMap(jcas,
    // Concept.type);
    CasIndexer<Annotation> indexer = new CasIndexer<Annotation>(jcas, null);

    //logger.info("assertionTypeMap loop OUTSIDE BEFORE...");
    for (Entry<Integer, String> current : assertionTypeMap.entrySet())
    {
      //logger.info("    assertionTypeMap loop INSIDE BEGIN");
      String currentAssertionType = current.getValue();
      //logger.info(String.format("  currentAssertionType: %s",
      //    currentAssertionType));
      Integer currentIndex = current.getKey();
      ApiConcept originalConcept = apiConceptList.get(currentIndex);

      Concept associatedConcept = (Concept) indexer
          .lookupByAddress(originalConcept.getExternalId());
      int entityAddress = associatedConcept.getOriginalEntityExternalId();
      IdentifiedAnnotation annotation = (IdentifiedAnnotation) indexer
          .lookupByAddress(entityAddress);

      // possible values for currentAssertionType:
      // present
      // absent
      // associated_with_someone_else
      // conditional
      // hypothetical
      // possible

//      logger.info(String.format("removed entityMention (%s) from indexes",
//          entityMention.toString()));
//      entityMention.removeFromIndexes();
      mapI2B2AssertionValueToCtakes(currentAssertionType, annotation);
     
//      // Overwrite mastif's generic attribute with Mayo's generic attribute
//      Boolean generic = GenericAttributeClassifier.getGeneric(jcas,entityMention);
//      Boolean oldgeneric = entityMention.getGeneric();
//      entityMention.setGeneric(generic);
//      System.out.println("overwrote mastif's generic="+oldgeneric+" for "+entityMention.getCoveredText()+" with "+generic);
//
//      // Overwrite mastif's subject attribute with Mayo subject attribute.
//      // SHARP annotation guidelines have subject=NULL whenever generic=true
//      String subject = null;
//      String oldsubj = entityMention.getSubject();
//      if (entityMention.getGeneric()==false) {
//          subject = SubjectAttributeClassifier.getSubject(jcas,entityMention);
//      }
//      entityMention.setSubject(subject);
//      System.out.println("overwrote mastif's subject="+oldsubj+" for "+entityMention.getCoveredText()+" with "+subject);

//      entityMention.addToIndexes();
//      logger.info(String.format("added back entityMention (%s) to indexes",
//          entityMention.toString()));

      // Assertion assertion = new Assertion(jcas, originalConcept.getBegin(),
      // originalConcept.getEnd());
      // assertion.setAssertionType(currentAssertionType);
      // Concept associatedConcept = (Concept)
      // annotationMap.get(originalConcept.getExternalId());
      // assertion.setAssociatedConcept(associatedConcept);
      // assertion.addToIndexes();

      //logger.info("    assertionTypeMap loop INSIDE END");
    }
    //logger.info("assertionTypeMap loop OUTSIDE AFTER!!");
    System.out.println("(stdout) AssertionAnalysisEngine.process() END");
    logger.info("(logging statement) AssertionAnalysisEngine.process() END");
  }

  public static void mapI2B2AssertionValueToCtakes(String assertionType,
      IdentifiedAnnotation annotation) throws AnalysisEngineProcessException
  {
    if (assertionType == null)
    {
      String message = "current assertion type is null; this is a problem!!";
      System.err.println(message);
      logger.severe(message);
      // Exception runtimeException = new RuntimeException(message);
      // throw new AnalysisEngineProcessException(runtimeException);
   
      // ALL OBVIOUS ERROR VALUES!!
      annotation.setSubject("skipped");
      annotation.setPolarity(-2);
      annotation.setConfidence(-2.0f);
      annotation.setUncertainty(-2);
      annotation.setConditional(false);
      annotation.setGeneric(false);

    } else if (assertionType.equals("present"))
    // PRESENT (mastif value)
    {
      //debugAnnotationsInCas(jcas, entityMention, "=== BEFORE setting entity mention properties (PRESENT)... ===");
      // ALL DEFAULT VALUES!! (since this is present)
      annotation.setSubject(CONST.ATTR_SUBJECT_PATIENT);
      annotation.setPolarity(1);
      annotation.setConfidence(1.0f);
      annotation.setUncertainty(0);
      annotation.setConditional(false);
      annotation.setGeneric(false);

      //debugAnnotationsInCas(jcas, entityMention, "=== AFTER setting entity mention properties (PRESENT)... ===");
    } else if (assertionType.equals("absent"))
    // ABSENT (mastif value)
    {
      annotation.setSubject(CONST.ATTR_SUBJECT_PATIENT);
      annotation.setPolarity(-1); // NOT DEFAULT VALUE
      annotation.setConfidence(1.0f);
      annotation.setUncertainty(0);
      annotation.setConditional(false);
      annotation.setGeneric(false);

    } else if (assertionType.equals("associated_with_someone_else"))
    // ASSOCIATED WITH SOMEONE ELSE (mastif value)
    {
      annotation.setSubject("CONST.ATTR_SUBJECT_FAMILY_MEMBER"); // NOT DEFAULT VALUE
      annotation.setPolarity(1);
      annotation.setConfidence(1.0f);
      annotation.setUncertainty(0);
      annotation.setConditional(false);
      annotation.setGeneric(false);

    } else if (assertionType.equals("conditional"))
    // CONDITIONAL (mastif value)
    {
      // currently no mapping to sharp type...all sharp properties are defaults!
      annotation.setSubject(CONST.ATTR_SUBJECT_PATIENT);
      annotation.setPolarity(1);
      annotation.setConfidence(1.0f);
      annotation.setUncertainty(0);
      annotation.setConditional(false);
      annotation.setGeneric(false);

    } else if (assertionType.equals("hypothetical"))
    // HYPOTHETICAL (mastif value)
    {
      annotation.setSubject(CONST.ATTR_SUBJECT_PATIENT);
      annotation.setPolarity(1);
      annotation.setConfidence(1.0f);
      annotation.setUncertainty(0);
      annotation.setConditional(true); // NOT DEFAULT VALUE
      annotation.setGeneric(false);

    } else if (assertionType.equals("possible"))
    // POSSIBLE (mastif value)
    {
      annotation.setSubject(CONST.ATTR_SUBJECT_PATIENT);
      annotation.setPolarity(1);
      annotation.setConfidence(1.0f);
      annotation.setUncertainty(1); // NOT DEFAULT VALUE
      annotation.setConditional(false);
      annotation.setGeneric(false);
    } else
    {
      String message = String.format(
          "unexpected assertion value returned!! \"%s\"",
          assertionType);
      logger.severe(message);
      System.err.println(message);
      Exception runtimeException = new RuntimeException(message);
      throw new AnalysisEngineProcessException(runtimeException);
    }
  }

  public void debugAnnotationsInCas(JCas jcas, IdentifiedAnnotation annotation,
      String label)
  {
    CasIndexer<IdentifiedAnnotation> i = new CasIndexer<IdentifiedAnnotation>(jcas, annotation.getType());
   
    StringBuilder b = new StringBuilder();
    b.append(String.format("<<<<<%n### TARGET ###%nclass: %s%naddress: %d%nvalue: %s%n### END TARGET ###%n>>>>>%n%n", annotation.getClass().getName(), annotation.getAddress(), annotation.toString()));
   
    String debugOutput = i.convertToDebugOutput(label, annotation);
   
    b.append(debugOutput);
   
    logger.info(b.toString());
   
  }

  public Map<Integer, Annotation> generateAnnotationMap(JCas jcas)
  {
    return generateAnnotationMap(jcas, null);
  }

  public Map<Integer, Annotation> generateAnnotationMap(JCas jcas,
      Integer typeId)
  {
    Map<Integer, Annotation> annotationMap = new HashMap<Integer, Annotation>();

    AnnotationIndex<Annotation> index = null;
    if (typeId == null)
    {
      index = jcas.getAnnotationIndex();
    } else
    {
      index = jcas.getAnnotationIndex(typeId);
    }
    FSIterator<Annotation> iterator = index.iterator();
    while (iterator.hasNext())
    {
      Annotation current = iterator.next();
      int address = current.getAddress();
      annotationMap.put(address, current);
    }

    return annotationMap;
  }

  // public String tokenizeCasDocumentText(JCas jcas)
  // {
  // ArrayList<ArrayList<String>> arrayOfLines = construct2DTokenArray(jcas);
  //
  // String spaceSeparatedTokensInput = convert2DTokenArrayToText(arrayOfLines);
  //
  // return spaceSeparatedTokensInput;
  // }
  //
  // public ArrayList<ArrayList<String>> construct2DTokenArray(JCas jcas)
  // {
  // int sentenceType = Sentence.type;
  // AnnotationIndex<Annotation> sentenceAnnotationIndex =
  // jcas.getAnnotationIndex(sentenceType);
  // ArrayList<ArrayList<String>> arrayOfLines = new
  // ArrayList<ArrayList<String>>();
  //
  // //ArrayList<ApiConcept> apiConceptList = new ArrayList<ApiConcept>();
  // for (Annotation annotation : sentenceAnnotationIndex)
  // {
  // Sentence sentence = (Sentence)annotation;
  // int sentenceBegin = sentence.getBegin();
  // int sentenceEnd = sentence.getEnd();
  //
  // AnnotationIndex<Annotation> tokenAnnotationIndex =
  // jcas.getAnnotationIndex(BaseToken.type);
  // ArrayList<String> arrayOfTokens = new ArrayList<String>();
  // for (Annotation baseTokenAnnotationUntyped : tokenAnnotationIndex)
  // {
  // // ignore tokens that are outside of the sentence.
  // // there has to be a better way to do this with Constraints, but this
  // // should work for now...
  // if (baseTokenAnnotationUntyped.getBegin() < sentenceBegin ||
  // baseTokenAnnotationUntyped.getEnd() > sentenceEnd)
  // {
  // continue;
  // }
  // BaseToken baseToken = (BaseToken)baseTokenAnnotationUntyped;
  // if (baseToken instanceof WordToken ||
  // baseToken instanceof PunctuationToken)
  // {
  // String currentTokenText = baseToken.getCoveredText();
  // arrayOfTokens.add(currentTokenText);
  // }
  // }
  // arrayOfLines.add(arrayOfTokens);
  //
  // }
  // return arrayOfLines;
  // }
  //
  public String convert2DTokenArrayToText(
      ArrayList<ArrayList<String>> arrayOfLines)
  {
    final String DELIM = " ";
    StringWriter writer = new StringWriter();
    PrintWriter printer = new PrintWriter(writer);

    boolean isFirstLine = true;
    for (ArrayList<String> line : arrayOfLines)
    {
      if (!isFirstLine)
      {
        printer.println();
      }

      boolean isFirstTokenOnLine = true;
      for (String currentToken : line)
      {
        if (!isFirstTokenOnLine)
        {
          printer.print(DELIM);
        }
        printer.print(currentToken);
        isFirstTokenOnLine = false;
      }

      isFirstLine = false;
    }

    printer.close();

    String output = writer.toString();
    return output;
  }

}
TOP

Related Classes of org.apache.ctakes.assertion.medfacts.AssertionAnalysisEngine

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.