Package org.apache.uima.ruta.textruler.core

Source Code of org.apache.uima.ruta.textruler.core.TextRulerWordConstraint

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.ruta.textruler.core;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;

/**
*
* This class was moved from one of the algorithms to the core framework since it gets used in
* almost every algorithm. It encapsulates the word constraint e.g. of a Ruta rule item.
*/
public class TextRulerWordConstraint {

  // TODO add a preference for it or include it in the learner
  private static final boolean AUTO_REGEXP = true;

  protected TextRulerAnnotation tokenAnnotation;

  protected boolean isRegExpType; // indicates wheter this token can have

  // different contens according to its type

  // (PERIOD e.g. is NOT such a token, it is bound to the content '.'; NUM is
  // such a token, it can be any number!

  public TextRulerWordConstraint(TextRulerWordConstraint copyFrom) {
    super();
    tokenAnnotation = copyFrom.tokenAnnotation;
    isRegExpType = copyFrom.isRegExpType;
  }

  public TextRulerWordConstraint(TextRulerAnnotation tokenAnnotation) {
    super();
    this.tokenAnnotation = tokenAnnotation;
    if (AUTO_REGEXP) {
      CAS cas = tokenAnnotation.getDocument().getCAS();
      TypeSystem ts = cas.getTypeSystem();
      Type wType = ts.getType(TextRulerToolkit.RUTA_WORD_TYPE_NAME);
      Type numType = ts.getType(TextRulerToolkit.RUTA_NUM_TYPE_NAME);
      Type markupType = ts.getType(TextRulerToolkit.RUTA_MARKUP_TYPE_NAME);
      Type specialType = ts.getType(TextRulerToolkit.RUTA_SPECIAL_TYPE_NAME);
      isRegExpType = ts.subsumes(wType, tokenAnnotation.getType())
              || ts.subsumes(markupType, tokenAnnotation.getType())
              || ts.subsumes(numType, tokenAnnotation.getType())
              || ts.subsumes(specialType, tokenAnnotation.getType());
    }
  }

  protected TextRulerWordConstraint(TextRulerAnnotation tokenAnnotation, boolean isRegExpType) {
    this.tokenAnnotation = tokenAnnotation;
    this.isRegExpType = isRegExpType;
  }

  public Type annotationType() {
    return tokenAnnotation.getType();
  }

  public String typeShortName() {
    return tokenAnnotation.getType().getShortName();
  }

  @Override
  public boolean equals(Object o) {
    if (o == null)
      return false;
    return toString().equals(((TextRulerWordConstraint) o).toString());
  }

  @Override
  public int hashCode() {
    return toString().hashCode();
  }

  public boolean isRegExpConstraint() {
    return isRegExpType;
  }

  @Override
  public String toString() {
    if (isRegExpConstraint())
      return TextRulerToolkit.escapeForTMStringParameter(TextRulerToolkit
              .escapeForRegExp(tokenAnnotation.getCoveredText()));
    else
      return tokenAnnotation.getType().getShortName();
  }

  public TextRulerWordConstraint copy() {
    return new TextRulerWordConstraint(this);
  }

  public TextRulerAnnotation getTokenAnnotation() {
    return tokenAnnotation;
  }
}
TOP

Related Classes of org.apache.uima.ruta.textruler.core.TextRulerWordConstraint

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.