Package org.apache.uima.ruta.cde

Source Code of org.apache.uima.ruta.cde.RutaGEConstraint

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.ruta.cde;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;

import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.ruta.cde.utils.EvaluationMeasures;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;
import org.eclipse.core.runtime.Path;

public class RutaGEConstraint implements IRutaConstraint {

  private String constraintText;

  private String description;

  private AnalysisEngine ae;

  private boolean initalized = false;

  private HashMap<String, Double> rulesMap;;

  public RutaGEConstraint(String constraintText, String description) {
    this.constraintText = constraintText;
    this.description = description;
  }

  public void initialize() throws Exception {
    // Constraint Format: "Peter":Author 0.44, Editor 0.50

    rulesMap = createRuleSet();

    Iterator<Entry<String, Double>> rulesIterator = rulesMap.entrySet().iterator();
    StringBuilder sb = new StringBuilder();
    sb.append("PACKAGE org.apache.uima.ruta;\n\n");
    int counter = 0;
    while (rulesIterator.hasNext()) {

      Entry<String, Double> entry = rulesIterator.next();
      String rule = entry.getKey();
      // Double ratioInConstraint = (Double) entry.getValue();

      // if (!rule.endsWith(";")) {
      // rule = rule + ";";
      // }
      sb.append(rule);
      sb.append("\n");
      counter++;
      if (counter % 100 == 0) {
        sb.append("Document{-> LOG(\"" + counter + "/" + rulesMap.size() + "\")};");
      }

    }
    URL aedesc = RutaEngine.class.getResource("BasicEngine.xml");
    XMLInputSource inae = new XMLInputSource(aedesc);
    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(inae);
    ResourceManager resMgr = UIMAFramework.newDefaultResourceManager();
    AnalysisEngineDescription aed = (AnalysisEngineDescription) specifier;

    ae = UIMAFramework.produceAnalysisEngine(aed, resMgr, null);
    File tempFile = File.createTempFile("RutaCDE", RutaEngine.SCRIPT_FILE_EXTENSION);
    tempFile.deleteOnExit();
    FileUtils.saveString2File(sb.toString(), tempFile, "UTF-8");
    String portableString = Path.fromOSString(tempFile.getParentFile().getPath())
            .toPortableString();
    ae.setConfigParameterValue(RutaEngine.PARAM_SCRIPT_PATHS, new String[] { portableString });
    String name = tempFile.getName().substring(0, tempFile.getName().length() - 5);
    ae.setConfigParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, name);

    ae.setConfigParameterValue(RutaEngine.PARAM_DEBUG, true);
    ae.setConfigParameterValue(RutaEngine.PARAM_DEBUG_WITH_MATCHES, true);
    ae.setConfigParameterValue(RutaEngine.PARAM_PROFILE, false);
    ae.setConfigParameterValue(RutaEngine.PARAM_STATISTICS, false);
    ae.reconfigure();
  }

  public Double processConstraint(CAS cas) throws Exception {
    if (!initalized) {
      initialize();
    }
    int runCount = 0;
    int printCount = 0;
    ArrayList<Double[]> results = new ArrayList<Double[]>();
    Type matchedType = cas.getTypeSystem().getType(
            "org.apache.uima.ruta.type.DebugMatchedRuleMatch");
    Type ruleApplyType = cas.getTypeSystem().getType("org.apache.uima.ruta.type.DebugRuleApply");
    Type blockApplyType = cas.getTypeSystem().getType("org.apache.uima.ruta.type.DebugBlockApply");

    removeDebugAnnotations(cas, matchedType, ruleApplyType, blockApplyType);
    double applyAmount = 0;
    double triedAmount = 0;
    ae.process(cas);
    Feature innerApplyFeature = blockApplyType.getFeatureByBaseName("innerApply");
    Feature appliedFeature = ruleApplyType.getFeatureByBaseName("applied");
    Feature triedFeature = ruleApplyType.getFeatureByBaseName("tried");
    Feature elementFeature = ruleApplyType.getFeatureByBaseName("element");
    FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex(blockApplyType).iterator();
    if (iterator.isValid()) {
      AnnotationFS fs = iterator.get();
      if (fs.getType().equals(blockApplyType)) {
        FeatureStructure featureValue = fs.getFeatureValue(innerApplyFeature);
        FSArray array = (FSArray) featureValue;

        FeatureStructure[] fsArray = array.toArray();
        for (FeatureStructure featureStructure : fsArray) {
          AnnotationFS ruleApply = (AnnotationFS) featureStructure;
          if (ruleApply.getType().equals(ruleApplyType)) {
            applyAmount = ruleApply.getIntValue(appliedFeature);
            triedAmount = ruleApply.getIntValue(triedFeature);
            String ruleString = ruleApply.getStringValue(elementFeature);

            if (triedAmount == 0) {
            } else {
              double ratioInDocument = applyAmount / triedAmount;
              String key = ruleString.trim() + ";";
              Double ratioInConstraint = rulesMap.get(key);
              if (ratioInConstraint != null) {
                results.add(new Double[] { ratioInConstraint, ratioInDocument });
              } else {
                System.out.println("rule not found!!!: " + key);
              }
            }
          }

        }

      }
    }

    removeDebugAnnotations(cas, matchedType, ruleApplyType, blockApplyType);
    ae.destroy();

    runCount++;
    printCount++;
    if (printCount == 10) {
      System.out.println(runCount);
      System.out.println("time: " + System.currentTimeMillis());
      printCount = 0;
    }

    // calculate cosinus similarity for result values:
    return EvaluationMeasures.cosine(results);
  }

  private void removeDebugAnnotations(CAS cas, Type matchedType, Type ruleApplyType,
          Type blockApplyType) {
    Collection<AnnotationFS> toRemove = new ArrayList<AnnotationFS>();
    AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex(blockApplyType);
    for (AnnotationFS annotationFS : annotationIndex) {
      toRemove.add(annotationFS);
    }
    annotationIndex = cas.getAnnotationIndex(ruleApplyType);
    for (AnnotationFS annotationFS : annotationIndex) {
      toRemove.add(annotationFS);
    }
    annotationIndex = cas.getAnnotationIndex(matchedType);
    for (AnnotationFS annotationFS : annotationIndex) {
      toRemove.add(annotationFS);
    }
    for (AnnotationFS annotationFS : toRemove) {
      cas.removeFsFromIndexes(annotationFS);
    }
  }

  public String getDescription() {
    return this.description;
  }

  public void setDescription(String description) {
    this.description = description;
  }

  public HashMap<String, Double> createRuleSet() {
    HashMap<String, Double> rulesMap = new HashMap<String, Double>();
    try {
      String content = FileUtils.file2String(new File(constraintText));

      String[] constraintTextArray = content.split("\n");
      for (String constraintLine : constraintTextArray) {

        String[] patternAndEstimates = constraintLine.split(":");
        if (patternAndEstimates.length < 2) {
          continue;
        }
        String pattern = patternAndEstimates[0];
        pattern = pattern.trim();
        String estimates = patternAndEstimates[1];
        String[] singleEstimates = estimates.split(",");

        for (String singleEstimate : singleEstimates) {
          singleEstimate = singleEstimate.trim();
          String[] typeAndRatio = singleEstimate.split("\\s+");
          String typeName = typeAndRatio[0];
          String ratio = typeAndRatio[1];
          ratio = ratio.trim();
          String rule = "";

          if (pattern.startsWith("\"")) {
            rule = pattern + "{PARTOF(" + typeName + ")};";
          } else {
            rule = pattern + "{PARTOF(" + typeName + ")};";
          }
          rulesMap.put(rule, Double.valueOf(ratio));
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
    return rulesMap;
  }

  public String getData() {
    return constraintText;
  }

  public void setData(String data) {
    this.constraintText = data;
  }
}
TOP

Related Classes of org.apache.uima.ruta.cde.RutaGEConstraint

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.