Source Code of org.apache.ctakes.coreference.util.AnaphoricityAttributeCalculator

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.coreference.util;


import java.util.ArrayList;
import java.util.List;


import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;


import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.coreference.type.Markable;


public class AnaphoricityAttributeCalculator extends AttributeCalculator {


  private Markable m;
  String s;
  String pos;
  


  public AnaphoricityAttributeCalculator (JCas jcas, Markable m) {
    super(jcas);
    this.m = m;
    s = m.getCoveredText();
    pos = getPOS();
  }


  private String getPOS(){
    try{
      TreebankNode node = MarkableTreeUtils.markableNode(jcas, m.getBegin(), m.getEnd());
//      TerminalTreebankNode wordNode = (TerminalTreebankNode) node.getRoot().getTerminals().get(node.getHeadIndex());
      TerminalTreebankNode wordNode = MarkableTreeUtils.getHead(node);
      return wordNode.getNodeType();
    }catch(Exception e){


      return null;
    }
  }
  
  public String calcmDefinite () {
    return isDefinite(s) ? "Y" : "N";
  }


  public String calcmDemonstrative () {
    return isDemonstrative(s) ? "Y" : "N";
  }


  public String calcmNumSing () {  
    // get the largest constituent covering this markable:
    if(pos == null) return "N";
    if(pos.equals("NN") || pos.equals("NNP")){
      return "Y";
    }
    return "N";
  }


  public String calcmNumPlu () {
    if(pos == null) return "N";
    if(pos.equals("NNS") || pos.equals("NNPS")){
      return "Y";
    }else{
      return "N";
    }
  }


  public String calcmNumUnk () {
//    ArrayList<BaseToken> l = containedTokens(m.getBegin(), m.getEnd());
//    for (int i = l.size()-1; i>=0; i--) {
//      String pos = l.get(i).getPartOfSpeech();
    if(pos == null) return "N";
      if (pos.startsWith("NN"))
        return "N";
//    }
    return "Y";
  }


  // FIX**ME - use result of constituency parser to get rightmost N* of NP head.
  // FIXED - not used, so the fixes above work fine.
  public String calcmNumber () {
    // use the underlying NE, instead of the expanded markable to find the number
    //ArrayList<BaseToken> l = containedTokens(m.getContent().getBegin(), m.getContent().getEnd());
    ArrayList<BaseToken> l = containedTokens(m.getBegin(), m.getEnd());
    for (BaseToken t : l) {
      String pos = t.getPartOfSpeech();
      if (pos.equals("NN") || pos.equals("NNP"))
        return "S";
      else if (pos.equals("NNS") || pos.equals("NNPS"))
        return "P";
    }
    return "U";
  }


  public String calcmIsDrug () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_DRUG)
      return "Y";
    else
      return "N";
  }


  public String calcmIsDisorder () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_DISORDER)
      return "Y";
    else
      return "N";
  }


  public String calcmIsFinding () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_FINDING)
      return "Y";
    else
      return "N";
  }


  public String calcmIsProcedure () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_PROCEDURE)
      return "Y";
    else
      return "N";
  }


  public String calcmIsAnatomicalSite () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_ANATOMICAL_SITE)
      return "Y";
    else
      return "N";
  }


  public String calcmWnClass () {
    if (m.getContent() instanceof IdentifiedAnnotation) {
      IdentifiedAnnotation ne = (IdentifiedAnnotation) m.getContent();
      return String.valueOf(ne.getTypeID());
    } else
      return "0";
  }


  public String calcmProStr () {
    return isPronominal(m) ? "Y" : "N";
  }


  public String calcmIndefinite () {
    if (s.toLowerCase().startsWith("a ") ||
      s.toLowerCase().startsWith("an "))
      return "Y";
    else
      return "N";
  }


  // FIXME use parser output instead of LWA
  public String calcmNPHead () {
    Annotation a = m.getContent();
    FSIterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(LookupWindowAnnotation.type).iterator();
    while (iter.hasNext()) {
      LookupWindowAnnotation lwa = (LookupWindowAnnotation) iter.next();
      if (lwa.getBegin()<=a.getBegin() && lwa.getEnd()==a.getEnd())
        return "yes";
    }
    return "no";
  }


  public String calcmSimilarStr () {
    List<Annotation> lm = FSIteratorToList.convert(
        jcas.getJFSIndexRepository().getAnnotationIndex(Sentence.type).iterator());


    List<String> contentTokens = contentWords(m.getBegin(), m.getEnd());


    int window = 3;
    Sentence[] sent = new Sentence[window+1];
    for (int i = 0; i <= window; i++) sent[i] = null;


    for (int i = 0; i < lm.size(); i++) {
      for (int j = 1; j <= window; j++) sent[j-1] = sent[j];
      Sentence ss = (Sentence) lm.get(i);
      sent[window] = ss;
      if (ss.getBegin() <= m.getBegin() && ss.getEnd() >= m.getEnd()) {
        for (int k = window-1; k >= 0 && sent[k] != null; k--) {
          List<String> lt = contentWords(sent[k].getBegin(), sent[k].getEnd());
          for (String s : lt)
            if (contentTokens.contains(s))
              return "Y";
        }
      } else if (ss.getBegin() > m.getBegin())
        return "N";
    }
    return "N";
  }


}
Source Code of org.apache.ctakes.coreference.util.AnaphoricityAttributeCalculator

Related Classes of org.apache.ctakes.coreference.util.AnaphoricityAttributeCalculator