Source Code of org.apache.ctakes.assertion.medfacts.i2b2.api.CharacterOffsetToLineTokenConverterCtakesImpl

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.assertion.medfacts.i2b2.api;


import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.log4j.Logger;


import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.mitre.medfacts.i2b2.api.ApiConcept;
import org.mitre.medfacts.zoner.CharacterOffsetToLineTokenConverter;
import org.mitre.medfacts.zoner.LineAndTokenPosition;


import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;


public class CharacterOffsetToLineTokenConverterCtakesImpl implements CharacterOffsetToLineTokenConverter
{
  protected Logger logger = Logger.getLogger(CharacterOffsetToLineTokenConverterCtakesImpl.class.getName());
  protected JCas jcas;
  
  protected TreeMap<Integer, Sentence> beginTreeMap;
  protected TreeSet<Integer> tokenBeginEndTreeSet;
  
  public CharacterOffsetToLineTokenConverterCtakesImpl()
  {
    
  }
  
  public CharacterOffsetToLineTokenConverterCtakesImpl(JCas jcas)
  {
    this.jcas = jcas;
    buildSentenceBoundaryMap();
    buildTokenBoundaryMap();
  }
  
  public void buildSentenceBoundaryMap()
  {
    beginTreeMap = new TreeMap<Integer, Sentence>();
    
    AnnotationIndex<Annotation> annotationIndex = jcas.getAnnotationIndex(Sentence.type);
    for (Annotation current : annotationIndex)
    {
      Sentence currentSentence = (Sentence)current;
      
      int begin = currentSentence.getBegin();
      beginTreeMap.put(begin, currentSentence);
    }
  }
  
  public void buildTokenBoundaryMap()
  {
    tokenBeginEndTreeSet = new TreeSet<Integer>();
    
    AnnotationIndex<Annotation> annotationIndex = jcas.getAnnotationIndex(BaseToken.type);
    for (Annotation current : annotationIndex)
    {
      BaseToken bt = (BaseToken)current;
      int begin = bt.getBegin();
      int end = bt.getEnd();
      
      tokenBeginEndTreeSet.add(begin);
      tokenBeginEndTreeSet.add(end);
    }
  }
  
  public Sentence findPreviousOrCurrentSentence(int characterOffset)
  {
    Integer floorKey = beginTreeMap.floorKey(characterOffset);
    if (floorKey == null)
    {
      return null;
    }
    Sentence floorEntry = beginTreeMap.get(floorKey);
    
    return floorEntry;
  }
  
  public LineAndTokenPosition convert(int characterOffset)
  {
    return convertCharacterOffsetToLineToken(characterOffset);
  }
  
  public int adjustOffsetToBestMatch(int original)
  {
    logger.debug("inside adjustOffsetToBestMatch");
    Integer newValue = tokenBeginEndTreeSet.floor(original);
    
    if (newValue == null)
    {
      logger.debug("no previous token begin or end found. using begin of first token.");
      newValue = tokenBeginEndTreeSet.first();
    } else
    {
      if (original == newValue)
        logger.debug("value not adjusted: " + original);
      else
        logger.debug("found previous token boundary. original: " + original + "; new value: " + newValue);
    }
    
    if (newValue == null)
    {
      logger.info("no previous and no first token found!!");
    }
    
    logger.debug("end adjustOffsetToBestMatch");
    
    return newValue;
  }
  
  public LineAndTokenPosition convertCharacterOffsetToLineToken(int characterOffset)
  {
    logger.debug("entering CharacterOffsetToLineTokenConverterCtakesImpl.convertCharacterOffsetToLineToken() with a characterOffset of: " + characterOffset);
    
    logger.debug("before adjusting input character offset...");
    characterOffset = adjustOffsetToBestMatch(characterOffset);
    logger.debug("after adjusting input character offset.");
    int baseTokenTypeId = BaseToken.type;
    
    ConstraintConstructorFindContainedBy constraintConstructorFindContainedBy = new ConstraintConstructorFindContainedBy(jcas);
    ConstraintConstructorFindContainedWithin constraintConstructorFindContainedWithin = new ConstraintConstructorFindContainedWithin(jcas);
    
    Type sentenceType = jcas.getTypeSystem().getType(Sentence.class.getName());
    Type baseTokenType = jcas.getTypeSystem().getType(BaseToken.class.getName());


//    FSIterator<Annotation> filteredIterator =
//        constraintConstructorFindContainedBy.createFilteredIterator(
//          characterOffset, characterOffset, sentenceType);
//
//    if (!filteredIterator.hasNext())
//    {
//      throw new RuntimeException("Surrounding sentence annotation not found[" + characterOffset + "]!!");
//    }
//    Annotation sentenceAnnotation = filteredIterator.next();
//    Sentence sentence = (Sentence)sentenceAnnotation;
    
    logger.debug("finding current or previous sentence for character offset " + characterOffset);
    Sentence sentence = findPreviousOrCurrentSentence(characterOffset);
    if (sentence == null)
    {
      logger.info("current or previous sentence IS NULL!");
    } else
    {
      logger.debug("current or previous sentence -- id: " + sentence.getAddress() +
          "; begin: " + sentence.getBegin() + 
          "; end: " + sentence.getEnd());
    }
    
    int lineNumber = sentence.getSentenceNumber() + 1;
    
    
    FSIterator<Annotation> tokensInSentenceIterator =
        jcas.getAnnotationIndex(baseTokenTypeId).subiterator(sentence);
    
    if (!tokensInSentenceIterator.hasNext())
    {
      throw new RuntimeException("First token in sentence not found!!");
    }
    Annotation firstTokenAnnotation = tokensInSentenceIterator.next();
    BaseToken firstToken = (BaseToken)firstTokenAnnotation;
    int firstTokenInSentenceNumber = firstToken.getTokenNumber();
    
    
    FSIterator<Annotation> beginTokenInSentenceIterator =
        constraintConstructorFindContainedBy.createFilteredIterator(
          characterOffset, characterOffset, baseTokenType);
    
    if (!beginTokenInSentenceIterator.hasNext())
    {
      throw new RuntimeException("First token in sentence not found!! (character offset request = " + characterOffset);
    }
    Annotation beginTokenAnnotation = beginTokenInSentenceIterator.next();
    BaseToken beginToken = (BaseToken)beginTokenAnnotation;
    int beginTokenNumber = beginToken.getTokenNumber();
    int beginTokenWordNumber = beginTokenNumber - firstTokenInSentenceNumber;
    
    LineAndTokenPosition b = new LineAndTokenPosition();
    b.setLine(lineNumber);
    b.setTokenOffset(beginTokenWordNumber);


    return b;
  }


  public List<LineAndTokenPosition> calculateBeginAndEndOfConcept
    (ApiConcept problem)
  {
    return calculateBeginAndEndOfConcept(problem.getBegin(), problem.getEnd());
  }
  
  public List<LineAndTokenPosition> calculateBeginAndEndOfConcept(
      int problemBegin, int problemEnd)
  {
    //int externalId = problem.getExternalId();
    //int sentenceTypeId = Sentence.type;
    int baseTokenTypeId = BaseToken.type;
    //jcas.getAnnotationIndex(sentenceTypeId);
    
    ConstraintConstructorFindContainedBy constraintConstructorFindContainedBy = new ConstraintConstructorFindContainedBy(jcas);
    ConstraintConstructorFindContainedWithin constraintConstructorFindContainedWithin = new ConstraintConstructorFindContainedWithin(jcas);
    
    //AnnotationIndex<Annotation> sentenceAnnotationIndex = jcas.getAnnotationIndex(sentenceTypeId);
    Type sentenceType = jcas.getTypeSystem().getType(Sentence.class.getName());
    Type baseTokenType = jcas.getTypeSystem().getType(BaseToken.class.getName());
    ///
    FSIterator<Annotation> filteredIterator =
        constraintConstructorFindContainedBy.createFilteredIterator(
          problemBegin, problemEnd, sentenceType);
    ///
    if (!filteredIterator.hasNext())
    {
      throw new RuntimeException("Surrounding sentence annotation not found!!");
    }
    Annotation sentenceAnnotation = filteredIterator.next();
    Sentence sentence = (Sentence)sentenceAnnotation;
    int lineNumber = sentence.getSentenceNumber() + 1;
    
    
    FSIterator<Annotation> tokensInSentenceIterator =
        jcas.getAnnotationIndex(baseTokenTypeId).subiterator(sentence);
    
    if (!tokensInSentenceIterator.hasNext())
    {
      throw new RuntimeException("First token in sentence not found!!");
    }
    Annotation firstTokenAnnotation = tokensInSentenceIterator.next();
    BaseToken firstToken = (BaseToken)firstTokenAnnotation;
    int firstTokenInSentenceNumber = firstToken.getTokenNumber();
    
    
    FSIterator<Annotation> beginTokenInSentenceIterator =
        constraintConstructorFindContainedWithin.createFilteredIterator(
          problemBegin, problemEnd, baseTokenType);
    
    if (!beginTokenInSentenceIterator.hasNext())
    {
      throw new RuntimeException("First token in sentence not found!!");
    }
    Annotation beginTokenAnnotation = beginTokenInSentenceIterator.next();
    BaseToken beginToken = (BaseToken)beginTokenAnnotation;
    int beginTokenNumber = beginToken.getTokenNumber();
    int beginTokenWordNumber = beginTokenNumber - firstTokenInSentenceNumber;
    
    
    beginTokenInSentenceIterator.moveToLast();
    if (!beginTokenInSentenceIterator.hasNext())
    {
      throw new RuntimeException("First token in sentence not found!!");
    }
    Annotation endTokenAnnotation = beginTokenInSentenceIterator.next();
    BaseToken endToken = (BaseToken)endTokenAnnotation;
    int endTokenNumber = endToken.getTokenNumber();
    int endTokenWordNumber = endTokenNumber - firstTokenInSentenceNumber;
    


    ArrayList<LineAndTokenPosition> list = new ArrayList<LineAndTokenPosition>();
    LineAndTokenPosition b = new LineAndTokenPosition();
    b.setLine(lineNumber);
    b.setTokenOffset(beginTokenWordNumber);
    list.add(b);
    LineAndTokenPosition e = new LineAndTokenPosition();
    e.setLine(lineNumber);
    e.setTokenOffset(endTokenWordNumber);
    System.out.println("Adding lineTokenEnding " + lineNumber + " offset = " + endTokenWordNumber);
    list.add(e);
    return list;
  }


}
Source Code of org.apache.ctakes.assertion.medfacts.i2b2.api.CharacterOffsetToLineTokenConverterCtakesImpl

Related Classes of org.apache.ctakes.assertion.medfacts.i2b2.api.CharacterOffsetToLineTokenConverterCtakesImpl