Package org.apache.ctakes.drugner.fsm.machines.util

Source Code of org.apache.ctakes.drugner.fsm.machines.util.SubSectionIndicatorFSM

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.drugner.fsm.machines.util;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.ctakes.core.fsm.condition.PunctuationValueCondition;
import org.apache.ctakes.core.fsm.condition.TextSetCondition;
import org.apache.ctakes.core.fsm.condition.TextValueCondition;
import org.apache.ctakes.core.fsm.machine.FSM;
import org.apache.ctakes.core.fsm.state.NamedState;
import org.apache.ctakes.core.fsm.state.NonTerminalEndState;
import org.apache.ctakes.core.fsm.token.BaseToken;
import org.apache.ctakes.drugner.fsm.output.util.SubSectionIndicator;

import net.openai.util.fsm.AnyCondition;
import net.openai.util.fsm.Condition;
import net.openai.util.fsm.Machine;
import net.openai.util.fsm.State;

/**
* Uses one or more finite state machines to detect sub-sections in the given input of
* tokens.
*
* @author Mayo Clinic
*/
public class SubSectionIndicatorFSM implements FSM {

  // contains the finite state machines
  private Set iv_machineSet = new HashSet();

  private Set iv_probableSubBeginSet = new HashSet();

  private Set iv_probableSubNextSet = new HashSet();
 
  private Set iv_confirmedSubBeginSet = new HashSet();

  private Set iv_confirmedSubNextSet = new HashSet();
 
  private Set iv_historySubBeginSet = new HashSet();

  private Set iv_historySubNextSet = new HashSet();
  private Set iv_historySubMidSet = new HashSet();
 
  private Set iv_middleWordSet = new HashSet();
 
  private Set iv_probableSubEndSet = new HashSet();

  private Machine iv_subSectionIDProbableMachine = new Machine();

  private Machine iv_subSectionIDHistoryMachine = new Machine();
 
  private Machine iv_subSectionIDConfirmMachine = new Machine();

  /**
   *
   * Constructor
   *
   */
  public SubSectionIndicatorFSM() {

   
    iv_probableSubBeginSet.add("taper");
    iv_probableSubBeginSet.add("tapered");
    iv_probableSubBeginSet.add("changed");
    iv_probableSubBeginSet.add("altered");
    iv_probableSubBeginSet.add("alter");
    iv_probableSubBeginSet.add("patient");
   
    iv_probableSubNextSet.add("medication");
    iv_probableSubNextSet.add("medications");
    iv_probableSubNextSet.add("take");

    iv_probableSubEndSet.add("discontinued");
    iv_probableSubEndSet.add("discontinue");
   
    iv_historySubBeginSet.add("discontinued");
    iv_historySubBeginSet.add("discontinue");
    iv_historySubBeginSet.add("past");
    iv_historySubBeginSet.add("recently");
    iv_historySubBeginSet.add("medication");
    iv_historySubBeginSet.add("radiology");
    iv_historySubBeginSet.add("food");
 
    iv_historySubNextSet.add("medication");
    iv_historySubNextSet.add("medications");
    iv_historySubNextSet.add("hold");

   
    iv_historySubMidSet.add("psychotropic");
    iv_historySubMidSet.add("discontinued");
    iv_historySubMidSet.add("on");
   
    iv_confirmedSubBeginSet.add("new");
    iv_confirmedSubBeginSet.add("dose");
    iv_confirmedSubBeginSet.add("reinstituted");
    iv_confirmedSubBeginSet.add("continue");
    iv_confirmedSubBeginSet.add("prn");
    iv_confirmedSubBeginSet.add("continued");
   
    iv_confirmedSubNextSet.add("adjustment");
    iv_confirmedSubNextSet.add("dose");
    iv_confirmedSubNextSet.add("dosage");
    iv_confirmedSubNextSet.add("dosages");
    iv_confirmedSubNextSet.add("adjustments");
    iv_confirmedSubNextSet.add("medication");
    iv_confirmedSubNextSet.add("medications");
   
    iv_middleWordSet.add("and");
    iv_middleWordSet.add("may");
    iv_subSectionIDProbableMachine = getProbableSubSectionMachine();
    iv_subSectionIDHistoryMachine = getHistorySubSectionMachine();
    iv_subSectionIDConfirmMachine = getConfirmSubSectionMachine();
    iv_machineSet.add(iv_subSectionIDProbableMachine);
    iv_machineSet.add(iv_subSectionIDHistoryMachine);
    iv_machineSet.add(iv_subSectionIDConfirmMachine);

  }

  private Machine getHistorySubSectionMachine() {
    State startState = new NamedState("START");
    State endState = new NamedState("END");
    State medState = new NamedState("MEDHIST");
    State midWordState = new NamedState("MIDDLE");

    State ntEndState = new NonTerminalEndState("NON TERMINAL END");
    endState.setEndStateFlag(true);
    ntEndState.setEndStateFlag(true);

    Machine m = new Machine(startState);

    Condition subFirstBegin = new TextSetCondition(iv_historySubBeginSet,
        false);
    Condition subFirstMid = new TextSetCondition(iv_historySubMidSet,
        false);
    Condition subFirstNext = new TextSetCondition(iv_historySubNextSet,
        false);

    startState.addTransition(subFirstBegin, medState);
      startState.addTransition(new AnyCondition(), startState);
     
    medState.addTransition(subFirstNext, endState);
    medState.addTransition(subFirstMid, midWordState);
    medState.addTransition(new PunctuationValueCondition(':'), endState);
    //medState.addTransition(new PunctuationValueCondition('/'), ntEndState);
    medState.addTransition(new AnyCondition(), startState);
   
    midWordState.addTransition(subFirstNext, endState);
    midWordState.addTransition(new PunctuationValueCondition(':'), endState);
    midWordState.addTransition(new AnyCondition(), startState);
   
    ntEndState.addTransition(new AnyCondition(),  startState);
    endState.addTransition(new AnyCondition(), startState);
    return m;
  }

  /**
   * Executes the finite state machines.
   *
   * @param tokens
   * @return Set of DateToken objects.
   * @throws Exception
   */
  public Set execute(List tokens) throws Exception {
    Set outSet = new HashSet();

    // maps a fsm to a token start index
    // key = fsm , value = token start index
    Map tokenStartMap = new HashMap();

    for (int i = 0; i < tokens.size(); i++) {
      BaseToken token = (BaseToken) tokens.get(i);

      Iterator machineItr = iv_machineSet.iterator();
      while (machineItr.hasNext()) {
        Machine fsm = (Machine) machineItr.next();

        fsm.input(token);

        State currentState = fsm.getCurrentState();
        if (currentState.getStartStateFlag()) {
          tokenStartMap.put(fsm, new Integer(i));
        }
        if (currentState.getEndStateFlag()) {
          Object o = tokenStartMap.get(fsm);
          int tokenStartIndex;
          if (o == null) {
            // By default, all machines start with
            // token zero.
            tokenStartIndex = 0;
          } else {
            tokenStartIndex = ((Integer) o).intValue();
            // skip ahead over single token we don't want
            tokenStartIndex++;
          }
          BaseToken endToken = null;
          if (currentState instanceof NonTerminalEndState) {
            endToken = (BaseToken) tokens.get(i - 1);
          } else {
            endToken = token;
          }

          BaseToken startToken = (BaseToken) tokens
              .get(tokenStartIndex);
          SubSectionIndicator subs = null;
          if (fsm.equals(iv_subSectionIDProbableMachine)) {
            subs = new SubSectionIndicator(startToken
                .getStartOffset(), endToken.getEndOffset(),
                SubSectionIndicator.PROBABLE_STATUS);
          } else if (fsm.equals(iv_subSectionIDHistoryMachine)) {
            subs = new SubSectionIndicator(startToken
                .getStartOffset(), endToken.getEndOffset(),
                SubSectionIndicator.HISTORY_STATUS);
          } else if (fsm.equals(iv_subSectionIDConfirmMachine)) {
            subs = new SubSectionIndicator(startToken
                .getStartOffset(), endToken.getEndOffset(),
                SubSectionIndicator.CONFIRMED_STATUS);
          } else
            subs = new SubSectionIndicator(startToken
                .getStartOffset(), endToken.getEndOffset(),
                SubSectionIndicator.FAMILY_HISTORY_STATUS);
          outSet.add(subs);

          fsm.reset();
        }
      }
    }

    // cleanup
    tokenStartMap.clear();

    // reset machines
    Iterator itr = iv_machineSet.iterator();
    while (itr.hasNext()) {
      Machine fsm = (Machine) itr.next();
      fsm.reset();
    }
 
    return outSet;
  }


  private Machine getProbableSubSectionMachine() {
    State startState = new NamedState("START");
    State endState = new NamedState("END");
    State medState = new NamedState("PROBHIST");
 
    State endWordState = new NamedState("ENDWORD");

    State ntEndState = new NonTerminalEndState("NON TERMINAL END");
    endState.setEndStateFlag(true);
    ntEndState.setEndStateFlag(true);

    Machine m = new Machine(startState);

    Condition subFirstBegin = new TextSetCondition(iv_probableSubBeginSet,
        false);
    Condition subFirstNext = new TextSetCondition(iv_probableSubNextSet,
        false);

    startState.addTransition(subFirstBegin, medState);
 
    startState.addTransition(new AnyCondition(), startState);
   
     
    medState.addTransition(subFirstNext, endState);
    medState.addTransition(new TextSetCondition(iv_middleWordSet, false), endWordState);
    medState.addTransition(new AnyCondition(), startState);
   
    endWordState.addTransition(new TextSetCondition(iv_probableSubEndSet, false), endState);
    endWordState.addTransition(new AnyCondition(), startState);
   
    endState.addTransition(new AnyCondition(), startState);
    return m;
  }

  private Machine getConfirmSubSectionMachine() {
    State startState = new NamedState("START");
    State endState = new NamedState("END");
    State medState = new NamedState("MEDHIST");
    State firstDotState = new NamedState("FIRSTDOT");
    State rState = new NamedState("RSTATE");
    State secondDotState = new NamedState("SECONDDOT");
    State nState = new NamedState("NSTATE");
    State thirdDotState = new NamedState("THIRDDOT");
 
 
    State ntEndState = new NonTerminalEndState("NON TERMINAL END");
    endState.setEndStateFlag(true);
    ntEndState.setEndStateFlag(true);
 
    Machine m = new Machine(startState);
 
    Condition subFirstBegin = new TextSetCondition(iv_confirmedSubBeginSet,
        false);
    Condition subFirstNext = new TextSetCondition(iv_confirmedSubNextSet,
        false);
 
    startState.addTransition(subFirstBegin, medState);
      startState.addTransition(new TextValueCondition("p", false), firstDotState);
      startState.addTransition(new AnyCondition(), startState);
     
      firstDotState.addTransition(new PunctuationValueCondition('.'), rState);
      firstDotState.addTransition(new AnyCondition(), startState);
     
      rState.addTransition(new TextValueCondition("r", false), secondDotState);
      rState.addTransition(new AnyCondition(), startState);
     
      secondDotState.addTransition(new PunctuationValueCondition('.'), nState);
      secondDotState.addTransition(new AnyCondition(), startState);
     
      nState.addTransition(new TextValueCondition("n", false), thirdDotState);
      nState.addTransition(new AnyCondition(), startState);
     
      thirdDotState.addTransition(new PunctuationValueCondition('.'), medState);
      thirdDotState.addTransition(new AnyCondition(), startState);
     
    medState.addTransition(subFirstNext, endState);
    medState.addTransition(new AnyCondition(), startState);
 
    endState.addTransition(new AnyCondition(), startState);
    return m;
  }

}
TOP

Related Classes of org.apache.ctakes.drugner.fsm.machines.util.SubSectionIndicatorFSM

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.