Package org.apache.ctakes.drugner.fsm.machines.elements

Source Code of org.apache.ctakes.drugner.fsm.machines.elements.RangeStrengthFSM

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.drugner.fsm.machines.elements;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.ctakes.core.fsm.condition.DecimalCondition;
import org.apache.ctakes.core.fsm.condition.NumberCondition;
import org.apache.ctakes.core.fsm.condition.PunctuationValueCondition;
import org.apache.ctakes.core.fsm.condition.RangeCondition;
import org.apache.ctakes.core.fsm.condition.WordSetCondition;
import org.apache.ctakes.core.fsm.state.NamedState;
import org.apache.ctakes.core.fsm.token.BaseToken;
import org.apache.ctakes.drugner.fsm.elements.conditions.FractionStrengthCondition;
import org.apache.ctakes.drugner.fsm.output.util.RangeStrengthToken;

import net.openai.util.fsm.AnyCondition;
import net.openai.util.fsm.Condition;
import net.openai.util.fsm.Machine;
import net.openai.util.fsm.State;

/**
* Uses one or more finite state machines to detect range strength tokens in the given
* input of tokens.
*
* @author Mayo Clinic
*/
public class RangeStrengthFSM
{
  // text fractions
  Set iv_textNumberSet = new HashSet();
  // range text
  Set iv_rangeSet = new HashSet();

  Set iv_hyphenatedSet = new HashSet();
  // contains the finite state machines
  private Set iv_machineSet = new HashSet();

  /**
   * Constructor
   */
  public RangeStrengthFSM()
  {
    iv_textNumberSet.add("one");
    iv_textNumberSet.add("two");
    iv_textNumberSet.add("three");
    iv_textNumberSet.add("four");
    iv_textNumberSet.add("five");
    iv_textNumberSet.add("six");
    iv_textNumberSet.add("seven");
    iv_textNumberSet.add("eight");
    iv_textNumberSet.add("nine");
    iv_textNumberSet.add("ten");
    iv_rangeSet.add("to");
    iv_rangeSet.add("through");
    iv_rangeSet.add("thru");
   
    iv_hyphenatedSet.add("one-two");
    iv_hyphenatedSet.add("one-three");
    iv_hyphenatedSet.add("one-four");
    iv_hyphenatedSet.add("one-five");
    iv_hyphenatedSet.add("one-six");
    iv_hyphenatedSet.add("one-seven");
    iv_hyphenatedSet.add("one-eight");
    iv_hyphenatedSet.add("one-nine");
    iv_hyphenatedSet.add("one-ten");
   

    iv_hyphenatedSet.add("two-three");
    iv_hyphenatedSet.add("two-four");
    iv_hyphenatedSet.add("two-five");
    iv_hyphenatedSet.add("two-six");
    iv_hyphenatedSet.add("two-seven");
    iv_hyphenatedSet.add("two-eight");
    iv_hyphenatedSet.add("two-nine");
    iv_hyphenatedSet.add("two-ten")
   

    iv_hyphenatedSet.add("three-four");
    iv_hyphenatedSet.add("three-five");
    iv_hyphenatedSet.add("three-six");
    iv_hyphenatedSet.add("three-seven");
    iv_hyphenatedSet.add("three-eight");
    iv_hyphenatedSet.add("three-nine");
    iv_hyphenatedSet.add("three-ten");
   
    iv_hyphenatedSet.add("four-five");
    iv_hyphenatedSet.add("four-six");
    iv_hyphenatedSet.add("four-seven");
    iv_hyphenatedSet.add("four-eight");
    iv_hyphenatedSet.add("four-nine");
    iv_hyphenatedSet.add("four-ten");
   
    iv_hyphenatedSet.add("five-six");
    iv_hyphenatedSet.add("five-seven");
    iv_hyphenatedSet.add("five-eight");
    iv_hyphenatedSet.add("five-nine");
    iv_hyphenatedSet.add("five-ten");
   
    iv_hyphenatedSet.add("six-ten");
    iv_hyphenatedSet.add("six-seven");
    iv_hyphenatedSet.add("six-eight");
    iv_hyphenatedSet.add("six-nine");
   
    iv_hyphenatedSet.add("seven-eight");
    iv_hyphenatedSet.add("seven-nine");
    iv_hyphenatedSet.add("seven-ten");
   
    iv_hyphenatedSet.add("eight-nine");
    iv_hyphenatedSet.add("eight-ten");
   
    iv_hyphenatedSet.add("nine-ten");
   
    iv_hyphenatedSet.add("one-to-two");
    iv_hyphenatedSet.add("one-to-three");
    iv_hyphenatedSet.add("one-to-four");
    iv_hyphenatedSet.add("one-to-five");
    iv_hyphenatedSet.add("one-to-six");
    iv_hyphenatedSet.add("one-to-seven");
    iv_hyphenatedSet.add("one-to-eight");
    iv_hyphenatedSet.add("one-to-nine");
    iv_hyphenatedSet.add("one-to-ten");
   

    iv_hyphenatedSet.add("two-to-three");
    iv_hyphenatedSet.add("two-to-four");
    iv_hyphenatedSet.add("two-to-five");
    iv_hyphenatedSet.add("two-to-six");
    iv_hyphenatedSet.add("two-to-seven");
    iv_hyphenatedSet.add("two-to-eight");
    iv_hyphenatedSet.add("two-to-nine");
    iv_hyphenatedSet.add("two-to-ten")
   

    iv_hyphenatedSet.add("three-to-four");
    iv_hyphenatedSet.add("three-to-five");
    iv_hyphenatedSet.add("three-to-six");
    iv_hyphenatedSet.add("three-to-seven");
    iv_hyphenatedSet.add("three-to-eight");
    iv_hyphenatedSet.add("three-to-nine");
    iv_hyphenatedSet.add("three-to-ten");
   
    iv_hyphenatedSet.add("four-to-five");
    iv_hyphenatedSet.add("four-to-six");
    iv_hyphenatedSet.add("four-to-seven");
    iv_hyphenatedSet.add("four-to-eight");
    iv_hyphenatedSet.add("four-to-nine");
    iv_hyphenatedSet.add("four-to-ten");
   
    iv_hyphenatedSet.add("five-to-six");
    iv_hyphenatedSet.add("five-to-seven");
    iv_hyphenatedSet.add("five-to-eight");
    iv_hyphenatedSet.add("five-to-nine");
    iv_hyphenatedSet.add("five-to-ten");
   
    iv_hyphenatedSet.add("six-to-ten");
    iv_hyphenatedSet.add("six-to-seven");
    iv_hyphenatedSet.add("six-to-eight");
    iv_hyphenatedSet.add("six-to-nine");
   
    iv_hyphenatedSet.add("seven-to-eight");
    iv_hyphenatedSet.add("seven-to-nine");
    iv_hyphenatedSet.add("seven-to-ten");
   
    iv_hyphenatedSet.add("eight-to-nine");
    iv_hyphenatedSet.add("eight-to-ten");
   
    iv_hyphenatedSet.add("nine-to-ten");
   
    iv_machineSet.add(getDashMachine());
    iv_machineSet.add(getDotDashMachine());
    iv_machineSet.add(getDashDashMachine());

  }

  /**
   * Gets a finite state machine that detects the following:
   * <ol>
   *     <li>25.4-30.4</li>
   *     <li>32.1-three</li>
   * </ol>
   * @return
   */
  private Machine getDashDashMachine()
  {
    State startState = new NamedState("START");
    State endState = new NamedState("END");
    endState.setEndStateFlag(true);
 
    Machine m = new Machine(startState);
   
    State leftNumTextState = new NamedState("LEFT_NUM_TEXT");
    State rightNumTextState = new NamedState("RIGHT_NUM_TEXT");
    State middleDash = new NamedState("MIDDASH");
    State dashAnotherState = new NamedState("DASH_2");
    State dash2State = new NamedState("DASH2");
   
    Condition rightIntCondition = new NumberCondition();
   
    Condition rightNumTextCondition =
      new WordSetCondition(iv_textNumberSet, false);
 
   
    startState.addTransition(new WordSetCondition(iv_textNumberSet, false), leftNumTextState);
    startState.addTransition(new AnyCondition(), startState);
 
    leftNumTextState.addTransition(new PunctuationValueCondition('-'), dash2State);
    leftNumTextState.addTransition(new WordSetCondition(iv_rangeSet, false), rightNumTextState);
    leftNumTextState.addTransition(new AnyCondition(), startState);
 
     
      dash2State.addTransition(rightIntCondition, endState);
    dash2State.addTransition(rightNumTextCondition, endState);
    dash2State.addTransition(new WordSetCondition(iv_rangeSet, false), middleDash);
    dash2State.addTransition(new AnyCondition(), startState);
   
    middleDash.addTransition(new PunctuationValueCondition('-'), dashAnotherState);
    middleDash.addTransition(new AnyCondition(), startState);
   
    rightNumTextState.addTransition(new NumberCondition(), endState);
    rightNumTextState.addTransition(new WordSetCondition(iv_textNumberSet, false), endState);
    rightNumTextState.addTransition(new AnyCondition(), startState);
   
    dashAnotherState.addTransition(new WordSetCondition(iv_textNumberSet, false), endState);
    dashAnotherState.addTransition(new NumberCondition(), endState);
   
    dashAnotherState.addTransition(new AnyCondition(), startState);
 
    endState.addTransition(new AnyCondition(), startState);
 
    return m;
  }

  /**
   * Gets a finite state machine that detects the following:
   * <ol>
   *     <li>25.4-30.4</li>
   *     <li>32.1-three</li>
   * </ol>
   * @return
   */
  private Machine getDashMachine()
  {
      State startState = new NamedState("START");
      State endState = new NamedState("END");
      endState.setEndStateFlag(true);
 
      Machine m = new Machine(startState);
      State leftNumIntegerState = new NamedState("LEFT_NUM_INTEGER");
      State leftNumTextState = new NamedState("LEFT_NUM_TEXT");
      State dashState = new NamedState("DASH1");
      State dash1State = new NamedState("DASH_1");
   
      Condition leftIntCondition = new NumberCondition();
      Condition rightIntCondition = new NumberCondition();
 
      startState.addTransition(leftIntCondition, leftNumIntegerState);
      startState.addTransition(new WordSetCondition(iv_hyphenatedSet, false), endState);
      startState.addTransition(new WordSetCondition(iv_textNumberSet, false), leftNumTextState);
      startState.addTransition(new AnyCondition(), startState);
 
      leftNumIntegerState.addTransition(new PunctuationValueCondition('-'), dashState);
      leftNumIntegerState.addTransition(new AnyCondition(), startState);
 
      leftNumTextState.addTransition(new PunctuationValueCondition('-'), dash1State);
      leftNumTextState.addTransition(new AnyCondition(), startState);
 
      dashState.addTransition(rightIntCondition, endState);
      dashState.addTransition(new WordSetCondition(iv_textNumberSet, false), endState);
 
      dashState.addTransition(new AnyCondition(), startState);
   
      dash1State.addTransition(rightIntCondition, endState);
      dash1State.addTransition(new WordSetCondition(iv_textNumberSet, false), endState);
 
      dash1State.addTransition(new AnyCondition(), startState);
 
      endState.addTransition(new AnyCondition(), startState);
 
      return m;
  }

  /**
   * Gets a finite state machine that detects the following:
   * <ol>
   *     <li>250-300</li>
   *     <li>I-IV</li>
   *     <li>two-three</li>
   *     <li>two-to-three</li>
   * </ol>
   * @return
   */
  private Machine getDotDashMachine()
 
  {
    State startState = new NamedState("START");
    State endState = new NamedState("END");
    endState.setEndStateFlag(true);
 
    Machine m = new Machine(startState);
    State leftNumIntegerState = new NamedState("LEFT_N2_INT");
    State decPartNumState = new NamedState("DECIMAL_NUM");

    State dashState = new NamedState("DASH");
    State dotState = new NamedState("DOT");

    Condition rangeCondition = new RangeCondition();
    Condition leftIntCondition = new NumberCondition();
    Condition decIntCondition = new NumberCondition();
    Condition rightIntCondition = new NumberCondition();

    Condition rightDecimalCondition = new DecimalCondition();
    Condition dashCondition = new PunctuationValueCondition('-');
   
    Condition dotCondition = new PunctuationValueCondition('.');

    Condition rightDoseagesCondition = new FractionStrengthCondition();
 
    startState.addTransition(leftIntCondition, leftNumIntegerState);
    startState.addTransition(rangeCondition, leftNumIntegerState);

 
    startState.addTransition(new AnyCondition(), startState);
   
     
    leftNumIntegerState.addTransition(dotCondition, dotState);
    leftNumIntegerState.addTransition(new AnyCondition(), startState);
 
    dotState.addTransition(decIntCondition, decPartNumState);
    dotState.addTransition(new AnyCondition(), startState);
   
    decPartNumState.addTransition(dashCondition, dashState);
    decPartNumState.addTransition(new AnyCondition(), startState);

 
    dashState.addTransition(rightIntCondition, endState);
    dashState.addTransition(rightDecimalCondition, endState);
    dashState.addTransition(rightDoseagesCondition, endState);
    
    dashState.addTransition(new AnyCondition(), startState);
 
 
    endState.addTransition(new AnyCondition(), startState);
 
    return m;
  }

  /**
   * Executes the finite state machines.
   * @param tokens
   * @return Set of RangeToken objects.
   * @throws Exception
   */
  public Set execute(List tokens, Set overrideSet) throws Exception
  {
    Set rangeSet = new HashSet();

    // maps a fsm to a token start index
    // key = fsm , value = token start index
    Map tokenStartMap = new HashMap();

    Iterator overrideTokenItr = overrideSet.iterator();
    // key = start offset, value = override BaseToken object
    Map overrideTokenMap = new HashMap();
    while (overrideTokenItr.hasNext())
    {
      BaseToken t = (BaseToken)overrideTokenItr.next();
      Integer key = new Integer(t.getStartOffset());
      overrideTokenMap.put(key, t);
    }

    boolean overrideOn = false;
    int overrideEndOffset = -1;
    for (int i = 0; i < tokens.size(); i++)
    {
      BaseToken token = (BaseToken) tokens.get(i);

      Integer key = new Integer(token.getStartOffset());           
     
      if (overrideOn)
      {
        if (token.getStartOffset() >= overrideEndOffset)
        {
          overrideOn = false;
          overrideEndOffset = -1;           
        }
        else
        {
          // step to next iteration of for loop
          continue;
        }
      }     
      else
      {
        if (overrideTokenMap.containsKey(key))
        {
          // override one or more tokens until the override
          // token is complete
          token = (BaseToken)overrideTokenMap.get(key);
          overrideOn = true;
          overrideEndOffset = token.getEndOffset();
        }
      }

      Iterator machineItr = iv_machineSet.iterator();
      while (machineItr.hasNext())
      {
        Machine fsm = (Machine) machineItr.next();

        fsm.input(token);

        State currentState = fsm.getCurrentState();
        if (currentState.getStartStateFlag())
        {
          tokenStartMap.put(fsm, new Integer(i));
        }
        if (currentState.getEndStateFlag())
        {
          Object o = tokenStartMap.get(fsm);
          int tokenStartIndex;
          if (o == null)
          {
            // By default, all machines start with
            // token zero.
            tokenStartIndex = 0;
          }
          else
          {
            tokenStartIndex = ((Integer) o).intValue();
            // skip ahead over single token we don't want
            tokenStartIndex++;           
          }
          BaseToken startToken =
            (BaseToken) tokens.get(tokenStartIndex);
          BaseToken endToken = token;
          RangeStrengthToken segmentToken =
            new RangeStrengthToken(
              startToken.getStartOffset(),
              endToken.getEndOffset());
          rangeSet.add(segmentToken);
          fsm.reset();
        }
      }
    }

    // cleanup
    tokenStartMap.clear();

    // reset machines
    Iterator itr = iv_machineSet.iterator();
    while (itr.hasNext())
    {
      Machine fsm = (Machine) itr.next();
      fsm.reset();
    }

    return rangeSet;
  }

  /**
   * Executes the finite state machines.
   * @param tokens
   * @return Set of FractionToken objects.
   * @throws Exception
   */
  public Set execute(List tokens) throws Exception {
    Set fractionSet = new HashSet();
 
    // maps a fsm to a token start index
    // key = fsm , value = token start index
    Map tokenStartMap = new HashMap();
 
    for (int i = 0; i < tokens.size(); i++) {
      BaseToken token = (BaseToken) tokens.get(i);
 
      Iterator machineItr = iv_machineSet.iterator();
      while (machineItr.hasNext()) {
        Machine fsm = (Machine) machineItr.next();
 
        fsm.input(token);
 
        State currentState = fsm.getCurrentState();
        if (currentState.getStartStateFlag()) {
          tokenStartMap.put(fsm, new Integer(i));
        }
        if (currentState.getEndStateFlag()) {
          Object o = tokenStartMap.get(fsm);
          int tokenStartIndex;
          if (o == null) {
            // By default, all machines start with
            // token zero.
            tokenStartIndex = 0;
          } else {
            tokenStartIndex = ((Integer) o).intValue();
            // skip ahead over single token we don't want
            tokenStartIndex++;
          }
          BaseToken startToken = (BaseToken) tokens
              .get(tokenStartIndex);
          BaseToken endToken = token;
          RangeStrengthToken fractionToken = new RangeStrengthToken(startToken
              .getStartOffset(), endToken.getEndOffset());
          fractionSet.add(fractionToken);
          fsm.reset();
        }
      }
    }
 
    // cleanup
    tokenStartMap.clear();
 
    // reset machines
    Iterator itr = iv_machineSet.iterator();
    while (itr.hasNext()) {
      Machine fsm = (Machine) itr.next();
      fsm.reset();
    }
 
    return fractionSet;
  }
}
TOP

Related Classes of org.apache.ctakes.drugner.fsm.machines.elements.RangeStrengthFSM

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.