Package org.apache.ctakes.coreference.util

Source Code of org.apache.ctakes.coreference.util.AnaphoricityAttributeCalculator

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.coreference.util;

import java.util.ArrayList;
import java.util.List;

import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;

import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.coreference.type.Markable;

public class AnaphoricityAttributeCalculator extends AttributeCalculator {

  private Markable m;
  String s;
  String pos;
 

  public AnaphoricityAttributeCalculator (JCas jcas, Markable m) {
    super(jcas);
    this.m = m;
    s = m.getCoveredText();
    pos = getPOS();
  }

  private String getPOS(){
    try{
      TreebankNode node = MarkableTreeUtils.markableNode(jcas, m.getBegin(), m.getEnd());
//      TerminalTreebankNode wordNode = (TerminalTreebankNode) node.getRoot().getTerminals().get(node.getHeadIndex());
      TerminalTreebankNode wordNode = MarkableTreeUtils.getHead(node);
      return wordNode.getNodeType();
    }catch(Exception e){

      return null;
    }
  }
 
  public String calcmDefinite () {
    return isDefinite(s) ? "Y" : "N";
  }

  public String calcmDemonstrative () {
    return isDemonstrative(s) ? "Y" : "N";
  }

  public String calcmNumSing () { 
    // get the largest constituent covering this markable:
    if(pos == null) return "N";
    if(pos.equals("NN") || pos.equals("NNP")){
      return "Y";
    }
    return "N";
  }

  public String calcmNumPlu () {
    if(pos == null) return "N";
    if(pos.equals("NNS") || pos.equals("NNPS")){
      return "Y";
    }else{
      return "N";
    }
  }

  public String calcmNumUnk () {
//    ArrayList<BaseToken> l = containedTokens(m.getBegin(), m.getEnd());
//    for (int i = l.size()-1; i>=0; i--) {
//      String pos = l.get(i).getPartOfSpeech();
    if(pos == null) return "N";
      if (pos.startsWith("NN"))
        return "N";
//    }
    return "Y";
  }

  // FIX**ME - use result of constituency parser to get rightmost N* of NP head.
  // FIXED - not used, so the fixes above work fine.
  public String calcmNumber () {
    // use the underlying NE, instead of the expanded markable to find the number
    //ArrayList<BaseToken> l = containedTokens(m.getContent().getBegin(), m.getContent().getEnd());
    ArrayList<BaseToken> l = containedTokens(m.getBegin(), m.getEnd());
    for (BaseToken t : l) {
      String pos = t.getPartOfSpeech();
      if (pos.equals("NN") || pos.equals("NNP"))
        return "S";
      else if (pos.equals("NNS") || pos.equals("NNPS"))
        return "P";
    }
    return "U";
  }

  public String calcmIsDrug () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_DRUG)
      return "Y";
    else
      return "N";
  }

  public String calcmIsDisorder () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_DISORDER)
      return "Y";
    else
      return "N";
  }

  public String calcmIsFinding () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_FINDING)
      return "Y";
    else
      return "N";
  }

  public String calcmIsProcedure () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_PROCEDURE)
      return "Y";
    else
      return "N";
  }

  public String calcmIsAnatomicalSite () {
    if (m.getContent() instanceof IdentifiedAnnotation &&
        ((IdentifiedAnnotation)m.getContent()).getTypeID() == CONST.NE_TYPE_ID_ANATOMICAL_SITE)
      return "Y";
    else
      return "N";
  }

  public String calcmWnClass () {
    if (m.getContent() instanceof IdentifiedAnnotation) {
      IdentifiedAnnotation ne = (IdentifiedAnnotation) m.getContent();
      return String.valueOf(ne.getTypeID());
    } else
      return "0";
  }

  public String calcmProStr () {
    return isPronominal(m) ? "Y" : "N";
  }

  public String calcmIndefinite () {
    if (s.toLowerCase().startsWith("a ") ||
      s.toLowerCase().startsWith("an "))
      return "Y";
    else
      return "N";
  }

  // FIXME use parser output instead of LWA
  public String calcmNPHead () {
    Annotation a = m.getContent();
    FSIterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(LookupWindowAnnotation.type).iterator();
    while (iter.hasNext()) {
      LookupWindowAnnotation lwa = (LookupWindowAnnotation) iter.next();
      if (lwa.getBegin()<=a.getBegin() && lwa.getEnd()==a.getEnd())
        return "yes";
    }
    return "no";
  }

  public String calcmSimilarStr () {
    List<Annotation> lm = FSIteratorToList.convert(
        jcas.getJFSIndexRepository().getAnnotationIndex(Sentence.type).iterator());

    List<String> contentTokens = contentWords(m.getBegin(), m.getEnd());

    int window = 3;
    Sentence[] sent = new Sentence[window+1];
    for (int i = 0; i <= window; i++) sent[i] = null;

    for (int i = 0; i < lm.size(); i++) {
      for (int j = 1; j <= window; j++) sent[j-1] = sent[j];
      Sentence ss = (Sentence) lm.get(i);
      sent[window] = ss;
      if (ss.getBegin() <= m.getBegin() && ss.getEnd() >= m.getEnd()) {
        for (int k = window-1; k >= 0 && sent[k] != null; k--) {
          List<String> lt = contentWords(sent[k].getBegin(), sent[k].getEnd());
          for (String s : lt)
            if (contentTokens.contains(s))
              return "Y";
        }
      } else if (ss.getBegin() > m.getBegin())
        return "N";
    }
    return "N";
  }

}
TOP

Related Classes of org.apache.ctakes.coreference.util.AnaphoricityAttributeCalculator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.