Package org.apache.ctakes.drugner.cc

Source Code of org.apache.ctakes.drugner.cc.ConsumeNamedEntityRecordModel

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.drugner.cc;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;

import org.apache.ctakes.core.util.FSUtil;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.refsem.Date;
import org.apache.ctakes.typesystem.type.refsem.MedicationStrength;
import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.drugner.type.ChunkAnnotation;
import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
import org.apache.ctakes.typesystem.type.util.Pair;
import org.apache.ctakes.typesystem.type.util.Pairs;
//import org.apache.ctakes.drugner.type.DrugMentionAnnotation;
import org.apache.ctakes.drugner.type.SubSectionAnnotation;

/**
* UIMA annotator that preps the CAS for extraction into DB2.
*
* @author
*/
public class ConsumeNamedEntityRecordModel extends CasConsumer_ImplBase {
  private String iv_annotVerPropKey;


  protected ArrayList al = new ArrayList();
  File iv_outputDirectory;

  int keepTrackOfDupBegin = 0, keepTrackOfDupEnd = 0, milliWeek = 604800000,
      milliDay = 86400000;


  public void initialize() throws ResourceInitializationException {
    fileForIO = (String) (String) getConfigParameterValue("OutputDirectory");
    iv_outputDirectory = new File(fileForIO);
    iv_useCurrentMedsSectionOnly = (Boolean) getConfigParameterValue("useMedicationSectionOnly");
    pathToDrugInclusions = (String) getConfigParameterValue("filterGroupDrugs");
    iv_fileForInput = (String) getConfigParameterValue("locationForClinicRcdInput");
    String holdListMedSections  = (String) getConfigParameterValue("listMedicationSection");

    StringTokenizer nextMedSection = new StringTokenizer(holdListMedSections, "|");
    while(nextMedSection.hasMoreTokens()) {
       iv_medicalSections.add(nextMedSection.nextToken());
      
    }
    if (!iv_outputDirectory.exists() || !iv_outputDirectory.isDirectory()) {
      if (!iv_outputDirectory.exists())
        // try{
        (new File(fileForIO)).mkdir();
      else
        throw new ResourceInitializationException(
            new Exception(
                "Parameter setting 'OutputDirectory' does not point to an existing directory."));
    }

  }
  /**
   * Stores annotation version as a property JCas object.
   *
   * @param jcas
   */
  private void storeAnnotationVersion(JCas jcas) {
     FSIterator<TOP> itr = jcas.getJFSIndexRepository().getAllIndexedFS(Pairs.type);
    if (itr == null || !itr.hasNext())
      return;

    Pairs props = (Pairs) itr.next();

    // create a new property array that is one item bigger
    FSArray propArr = props.getPairs();
    FSArray newPropArr = new FSArray(jcas, propArr.size() + 1);
    for (int i = 0; i < propArr.size(); i++) {
      newPropArr.set(i, propArr.get(i));
    }

    Pair annotVerProp = new Pair(jcas);       
    annotVerProp.setAttribute(iv_annotVerPropKey);
    annotVerProp.setValue(String.valueOf(iv_annotVer));

    // add annotation version prop as last item in array
    newPropArr.set(newPropArr.size() - 1, annotVerProp);
    props.setPairs(newPropArr);
  }
  public void processCas(CAS cas) throws ResourceProcessException {
    vRevDate = "";
    vNoteDate = "";
    vClinicalNumber = "";
    gotValidDate = false;
    gotDup = false;
    clinicNumber = "";

//    TODO: Move to Common Type System
//    generateTokenNormForms(cas);
    assignNamedEntityFeats(cas);

    //storeAnnotationVersion(cas);
  }

  /**
   * Stores annotation version as a property JCas object.
   *
   * @param jcas
   */
//  TODO: Move to Common Type System
//  private void storeAnnotationVersion(CAS cas)
//      throws ResourceProcessException {
//    try {
//      JCas jcas = cas.getJCas();
//      Iterator itr = jcas.getJFSIndexRepository().getAnnotationIndex(
//          Properties.type).iterator();
//      if (itr == null || !itr.hasNext())
//        return;
//
//      Properties props = (Properties) itr.next();
//
//      // create a new property array that is one item bigger
//      FSArray propArr = props.getPropArr();
//      FSArray newPropArr = new FSArray(jcas, propArr.size() + 1);
//      for (int i = 0; i < propArr.size(); i++) {
//        newPropArr.set(i, propArr.get(i));
//      }
//
//      Property annotVerProp = new Property(jcas);
//      annotVerProp.setKey(iv_annotVerPropKey);
//      annotVerProp.setValue(String.valueOf(iv_annotVer));
//
//      // add annotation version prop as last item in array
//      newPropArr.set(newPropArr.size() - 1, annotVerProp);
//      props.setPropArr(newPropArr);
//    } catch (Exception e) {
//      throw new ResourceProcessException(e);
//    }
//  }

  /**
   * Generates normalized form for each token annotation.
   */
//  TODO: Move to Common Type System 
//  private void generateTokenNormForms(CAS cas)
//      throws ResourceProcessException {
//    try {
//        JCas jcas  = cas.getJCas().getView("plaintext");
//      JFSIndexRepository indexes = jcas.getJFSIndexRepository();
//      Iterator propertiesItr = indexes
//          .getAnnotationIndex(Properties.type).iterator();
//
//      while (propertiesItr.hasNext()) {
//        Properties props = (Properties) propertiesItr.next();
//        FSArray fsArr = props.getPropArr();
//        for (int i = 0; i < fsArr.size(); i++)
//        {
//          if (fsArr.get(i) != null)
//          {
//              Property fs = (Property) fsArr.get(i);
//             
//              if (fs.getKey().compareTo("REV_DATE") == 0)
//              {
//            gotValidDate = true;
//            vRevDate = fs.getValue();
//              }
//              else if (fs.getKey().compareTo("NOTE_DATE") == 0)
//              {
//            vNoteDate = fs.getValue();
//              }
//              else if (fs.getKey().compareTo("CLINICAL_NUMBER") == 0)
//              {
//            if (fs.getValue().length() < 8)
//                vClinicalNumber = "0"+fs.getValue();
//            else
//                vClinicalNumber = fs.getValue();
//              }
//          }
//        }
//      }
//      Map abbrMap = new HashMap();
//
//      Iterator docItr = indexes.getAnnotationIndex(DocumentID.type).iterator();
//     
//      while (docItr.hasNext())
//      {
//          DocumentID doc = (DocumentID) docItr.next();
//          if (gotValidDate)
//        clinicNumber = vClinicalNumber;
//          abbrMap.put(new Integer(doc.getBegin()), doc);
//      }
//
//
//      Iterator btaItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
//      while (btaItr.hasNext())
//      {
//          BaseToken bta = (BaseToken) btaItr.next();
//          String normForm = null;
//          bta.setNormalizedForm(normForm);
//      }
//    } catch (Exception e) {
//        e.printStackTrace();
//      throw new ResourceProcessException(e);
//    }
//  }

  /**
   * Assigns typeID and segmentID values to Drug NamedEntities
   */
  private void assignNamedEntityFeats(CAS cas)
      throws ResourceProcessException {
    try {
     
          JCas jcas = cas.getCurrentView().getJCas();
        //JCas jcas  = cas.getJCas().getView("plaintext");
                 
            //System.err.println("Document Id: "+DocumentIDAnnotationUtil.getDocumentID(jcas));
           
      boolean gotMeds = false;

      int trackMedOccur = 0;

      String medInfo = "";
      //int keepTrackOfUID = 0;
     
      JFSIndexRepository indexes = jcas.getJFSIndexRepository();
   
      Set segmentSet = new HashSet();
      Iterator segmentItr = indexes.getAnnotationIndex(Segment.type).iterator();
      while (segmentItr.hasNext())
      {
          Segment s = (Segment)segmentItr.next();
          segmentSet.add(s);
          //System.err.println("Segment :"+ s.getCoveredText());
      }
     
            Iterator nerItr = indexes.getAnnotationIndex(MedicationEventMention.type).iterator();
      while (nerItr.hasNext())
      {
        MedicationEventMention neAnnot = (MedicationEventMention) nerItr.next();
//System.err.println("DrugNE :"+neAnnot.getCoveredText());       
        gotDup = false;

        // assign segment ID
        Iterator segItr = segmentSet.iterator();
        while (segItr.hasNext())
        {
          Segment seg = (Segment) segItr.next();
          if ((keepTrackOfDupBegin == neAnnot.getBegin()) && (keepTrackOfDupEnd == neAnnot.getEnd()))
          {
            gotDup = true;
          }

          if ((neAnnot.getBegin() >= seg.getBegin())
              && (neAnnot.getEnd() <= seg.getEnd()) && !gotDup) {

            // found segment for this NE
            String segmentID = seg.getId();
         
            if (iv_medicalSections.contains(segmentID)
                || !iv_useCurrentMedsSectionOnly.booleanValue()) {

              if (!gotDup) {
                keepTrackOfDupBegin = neAnnot.getBegin();
                keepTrackOfDupEnd = neAnnot.getEnd();



                Date localDate = neAnnot.getStartDate();//.getStartDate();
                String chunk = null;

                boolean foundChunk = false;
                Iterator findChunk = indexes
                .getAnnotationIndex(
                    ChunkAnnotation.type)
                    .iterator();

                try {
                  while (findChunk.hasNext() && !foundChunk)
                  {
                      ChunkAnnotation ca = (ChunkAnnotation) findChunk.next();
                    if (neAnnot.getBegin() >= ca.getBegin()
                        && neAnnot.getEnd() <= ca
                        .getEnd()) {
                      chunk = ca.getCoveredText()
                      .replace('\n', ' ')
                      .replace(',', ';');
                      foundChunk = true;
                    }
                  }
                } catch (Exception e) {
                  // TODO Auto-generated catch block
                  e.printStackTrace();
                }
                String containedInSubSection = segmentID;
                Iterator subSectionItr = indexes.getAnnotationIndex(
                    SubSectionAnnotation.type).iterator();
                while (subSectionItr.hasNext())
                {
                    SubSectionAnnotation ssAnnot = (SubSectionAnnotation) subSectionItr.next();
                    if (ssAnnot.getSubSectionBodyBegin() <= neAnnot.getBegin() && ssAnnot.getSubSectionBodyEnd() >= neAnnot.getEnd())
                    {
                  Iterator textSpanInSs = FSUtil.getAnnotationsIteratorInSpan(jcas, WordToken.type, ssAnnot.getSubSectionHeaderBegin(), ssAnnot.getSubSectionHeaderEnd());
                  String subSectionHeaderName = "";
                   
                  while (textSpanInSs.hasNext())
                  {
                      WordToken wta = (WordToken) textSpanInSs.next();
                      subSectionHeaderName = subSectionHeaderName + " " + wta.getCoveredText();
                  }
                  containedInSubSection = containedInSubSection+"|"+subSectionHeaderName+"|"+ssAnnot.getStatus();
                    }
                }
                gotMeds = true;
                trackMedOccur++;

                Calendar calendar = Calendar.getInstance();

                SimpleDateFormat format = new SimpleDateFormat("MM'/'dd'/'yyyy");

                if(vRevDate != null && vRevDate.length() > 0)
                    calendar.setTimeInMillis(new Long(vRevDate).longValue());
                else if(vNoteDate != null && vNoteDate.length() > 0)
                    calendar.setTimeInMillis(new Long(vNoteDate).longValue());
               
                String globalDate = format.format(calendar.getTime());
//                if (localDate == null
//                    || localDate.length() < 1) {
//                  localDate = globalDate;
//                }
  //              Iterator neItr = FSUtil.getAnnotationsIteratorInSpan(jcas, IdentifiedAnnotation.type, neAnnot.getBegin(), neAnnot.getEnd()+1);
                String neCui = "n/a";
                String status = "n/a";
                String rxNormCui = "n/a";

                FSArray ocArr = neAnnot.getOntologyConceptArr();
                if (ocArr != null)
                {
                  for (int i = 0; i < ocArr.size(); i++)
                  {
                    OntologyConcept oc = (OntologyConcept) ocArr.get(i);
                    neCui = oc.getCode();
                    rxNormCui = oc.getOui();
                  }
                }

                MedicationStrength strengthTerm = neAnnot.getMedicationStrength();//getStrength();
                String strengthTermString = "null";
                if (strengthTerm != null)
                  strengthTermString = strengthTerm.getNumber()+ " " +strengthTerm.getUnit();
                String medicationDosageString = "null";
                if (neAnnot.getMedicationDosage() != null && neAnnot.getMedicationDosage().getValue() != null)
                  medicationDosageString = neAnnot.getMedicationDosage().getValue();
                String medicationFrequencyNumber = "null";
                if (neAnnot.getMedicationFrequency() != null && neAnnot.getMedicationFrequency().getNumber() != null)
                  medicationFrequencyNumber = neAnnot.getMedicationFrequency().getNumber()+" "+neAnnot.getMedicationFrequency().getUnit();
                String duration = "null";
                if (neAnnot.getMedicationDuration() != null && neAnnot.getMedicationDuration().getValue() != null)
                  duration = neAnnot.getMedicationDuration().getValue();

                String route = "null";
                if (neAnnot.getMedicationRoute() != null && neAnnot.getMedicationRoute().getValue() != null)
                  route = neAnnot.getMedicationRoute().getValue();
                String form = "null";
                if (neAnnot.getMedicationForm() != null && neAnnot.getMedicationForm().getValue() != null)
                  form = neAnnot.getMedicationForm().getValue();
                String changeStatus = "null";
                if (neAnnot.getMedicationStatusChange() != null && neAnnot.getMedicationStatusChange().getValue() != null )
                  changeStatus = neAnnot.getMedicationStatusChange().getValue();
                medInfo = clinicNumber + "," +neAnnot.getCoveredText()  + "," + rxNormCui
                + ",\"" + neAnnot.getStartDate() + "\","
                + globalDate + "," + medicationDosageString + "," +strengthTermString + ","
                + medicationFrequencyNumber + "," +  duration + "," + route + ","
                +  form + "," + status + ","
                + changeStatus + "," +neAnnot.getConfidence() + "," +containedInSubSection
                + "," +docLinkId+"_"+docRevision+","+chunk; 
                store(fileForIO, medInfo);
              }
            }
          }

        }
      }

    } catch (NullPointerException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    catch (Exception e) {
      throw new ResourceProcessException(e);
    }
  }

  /**
   * Loads text from a file. Specialized to load array idAndDate and return it
   * too
   *
   * @param filename
   * @return
   * @throws FileNotFoundException
   * @throws IOException
   */
  public static List load(String filename) throws FileNotFoundException,
      IOException {

    String[][] idDate = null;
    List listIDandDates = new ArrayList();
    File f = new File(filename);
    BufferedReader br = new BufferedReader(new FileReader(f));
    br.readLine();// dummy line to go beyond columm headers
    br.readLine();
    String line = br.readLine();

    int index = 0;

    while ((line != null) && (line != "") && (line.length() > 0)) {
      int firstComma = line.indexOf(',');
      int lastComma = line.lastIndexOf(',');
      String id = line.substring(0, firstComma);
      String date = line.substring(lastComma + 1, line.indexOf("0:00"));
      idDate = new String[][] { { id }, { date } };
      listIDandDates.add(index, idDate);
      index++;
      line = br.readLine();
    }
    br.close();

    return listIDandDates;
  }

  /**
   * Loads text from a file. Specialized to load array idAndDate and return it
   * too
   *
   * @param filename
   * @return
   * @throws FileNotFoundException
   * @throws IOException
   */
  public void store(String filename, String lineToStore)
      throws FileNotFoundException, IOException {
    int howMany = 132;
    boolean skipDate = false;
    boolean preExists = true;
    if (filename.endsWith(System.getProperty("file.separator")))
      filename = filename
          + lineToStore.substring(0, lineToStore.indexOf(','))
          + ".csv";
    else
      filename = filename + "/"
          + lineToStore.substring(0, lineToStore.indexOf(','))
          + ".csv";

    File f = new File(filename);
    if (!f.exists()){
      f.createNewFile();
      preExists = false;
    }
    BufferedReader br = new BufferedReader(new FileReader(f));
    br.readLine();// dummy line to go beyond columm headers
    br.close();

    ByteArrayOutputStream bout = new ByteArrayOutputStream(howMany * 4);

    DataOutputStream dout = new DataOutputStream(bout);
    FileOutputStream fos = new FileOutputStream(filename, true);
    if (!preExists){
      dout.writeBytes(drugHeaders);
    }
    if (!skipDate)
      dout.writeBytes(lineToStore + '\n');

    try {
      if (!skipDate) {
        bout.writeTo(fos);
        fos.flush();
      }
    } finally {
      fos.close();
    }

  }

  protected String parseStengthValue(Object strength) {

    String text = (String) strength;
    String strengthText = "";
    boolean containsNums = false;
    boolean doneHere = false;
    int textSize = text.length();
    int pos = 0;
    Integer posInt = null;
    String strengthString = "";
    while (!doneHere && (textSize > pos) && (textSize > 1)) {
      try {
        strengthString = text.substring(pos, pos + 1);
        /*
         * if (numString.compareTo(".") == 0) { subText =
         * text.substring(pos + 1, textSize); pos++; }
         */
        Integer posNum = posInt.decode(strengthString);
        int checkInt = posNum.intValue();

        if ((checkInt >= 0) && (checkInt <= 9)) {
          containsNums = true;

        } else {

          strengthText = strengthText + strengthString;
        }
        pos++;

      }

      catch (NullPointerException npe) {
        return null;
      } catch (NumberFormatException nfe) {
        if (!containsNums)
          doneHere = true;
        else {
          pos++;
          strengthText = strengthText + strengthString;
        }
      }
    }
    return strengthText;

  }

  protected int parseIntValue(Object strength) {

    String text = (String) strength;
    String strengthNumText = "";
    String subText = "";
    boolean containsNums = false;
    boolean doneHere = false;
    int textSize = text.length();
    int pos = 0;
    Integer posInt = null;
    while (!doneHere && (textSize > pos) && (textSize > 1)) {
      try {
        String numString = text.substring(pos, pos + 1);

        Integer posNum = posInt.decode(numString);
        int checkInt = posNum.intValue();

        if ((checkInt >= 0) && (checkInt <= 9)) {
          containsNums = true;
          subText = text.substring(pos + 1, textSize);
          pos++;
          strengthNumText = strengthNumText + numString;

        } else
          return 0;
      }

      catch (NullPointerException npe) {
        return 0;
      } catch (NumberFormatException nfe) {
        if (!containsNums)
          return 0;
        else
          doneHere = true;

      }
    }
    if (strengthNumText != "")
      return new Integer(strengthNumText).intValue();
    else
      return 0;

  }
  private int iv_annotVer;
  private boolean gotValidDate = false;
  private boolean gotDup = false;
  private String vRevDate = null;
  private String vNoteDate = null;
  private String vClinicalNumber = null;
  private String clinicNumber = null;
  private String docLinkId = "";
  private String docRevision = "";
  private String iv_fileForInput = null;
  private String pathToDrugInclusions;
  private Set<String> iv_medicalSections = new HashSet();
  private String drugHeaders = "clinicNumber,drug_mention_text,rxnorm_cui,local_date,note_date,dosage,strength,frequency,frequency_unit,duration,route,form,status,change_status,certainty,section|subsection|status,documentId_revision\n";
  private Boolean iv_useCurrentMedsSectionOnly = new Boolean("true");
  public static final String PARAM_OUTPUTDIR = "OutputDirectory";

  private String fileForIO = new String(
      "R:\\Dept\\projects\\Text\\DrugProfile\\data\\psychiatry\\goldStandard\\work.csv");


}
TOP

Related Classes of org.apache.ctakes.drugner.cc.ConsumeNamedEntityRecordModel

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.