Package org.apache.uima.ruta.textruler.learner.kep

Source Code of org.apache.uima.ruta.textruler.learner.kep.KEPLearner$KEPRuleComparator

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.ruta.textruler.learner.kep;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.ConstraintFactory;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FSMatchConstraint;
import org.apache.uima.cas.FSTypeConstraint;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.ruta.textruler.core.GlobalCASSource;
import org.apache.uima.ruta.textruler.core.TextRulerBasicLearner;
import org.apache.uima.ruta.textruler.core.TextRulerExample;
import org.apache.uima.ruta.textruler.core.TextRulerExampleDocument;
import org.apache.uima.ruta.textruler.core.TextRulerRuleItem;
import org.apache.uima.ruta.textruler.core.TextRulerRulePattern;
import org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector;
import org.apache.uima.ruta.textruler.core.TextRulerTarget;
import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
import org.apache.uima.ruta.textruler.learner.kep.KEPRuleItemCondition.Condition;

public class KEPLearner extends TextRulerBasicLearner {

  public static final String MAX_EXPAND_RULES = "maxExpandRules";

  public static final String MAX_INFILLER_RULES = "maxInfillerRules";

  public static final String FILLER_WINDOW = "fillerWindow";

  public static final String MAX_FILLER_LENGTH = "maxFillerLength";

  public static final int DEFAULT_MAX_EXPAND_RULES = 50;

  public static final int DEFAULT_MAX_INFILLER_RULES = 10;

  public static final int DEFAULT_FILLER_WINDOW = 5;

  public static final int DEFAULT_MAX_FILLER_LENGTH = 3;

  private int fillerWindow;

  private int maxFillerLength;

  private int maxInfillerRules;

  private int maxExpandRules;

  private Map<String, List<KEPRule>> ruleLists = new HashMap<String, List<KEPRule>>();

  private Map<String, List<KEPRule>> correctionRules = new HashMap<String, List<KEPRule>>();

  private Map<String, List<TextRulerExample>> coveredExamples = new HashMap<String, List<TextRulerExample>>();

  private Map<String, Type> blocks = new HashMap<String, Type>();

  private String[] slotNamesWithBoundaries;

  private Map<String, Boolean> hasPerfectRules = new HashMap<String, Boolean>();

  public KEPLearner(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
          Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
    super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, skip, delegate);
    supportBoundaries = true;
  }

  @Override
  protected void doRun() {

    long startTime = System.nanoTime();

    this.exampleDocuments.clearCurrentExamples();
    prepareCachedCASesWithBoundaries();
    this.slotNamesWithBoundaries = new String[slotNames.length * 3];
    for (int i = 0; i < this.slotNames.length; i++) {
      this.slotNamesWithBoundaries[i * 3] = slotNames[i] + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION;
      this.slotNamesWithBoundaries[i * 3 + 1] = slotNames[i]
              + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION;
      this.slotNamesWithBoundaries[i * 3 + 2] = slotNames[i];

    }
    for (int i = 0; i < this.slotNamesWithBoundaries.length; i++) {
      if (!filterSetWithSlotNames.contains(slotNamesWithBoundaries[i]))
        this.filterSetWithSlotNames.add(slotNamesWithBoundaries[i]);
      initializeMapEntries(this.slotNamesWithBoundaries[i]);
    }
    for (int i = 0; i < slotNamesWithBoundaries.length; i++) {
      String currentSlot = slotNamesWithBoundaries[i];
      if (isCompleteSlot(currentSlot)) {
        if (hasPerfectRules.get(slotNamesWithBoundaries[i - 1])
                && hasPerfectRules.get(slotNamesWithBoundaries[i - 2])) {
          filterSetWithSlotNames.remove(slotNamesWithBoundaries[i]);
          continue;
        }
      }
      runForSlot(currentSlot);
      if (hasPerfectRules.get(currentSlot)) {
        filterSetWithSlotNames.remove(currentSlot);
      }
      if (shouldAbort())
        return;
    }
    removeBadRules();
    for (int i = 0; i < this.slotNamesWithBoundaries.length; i++) {
      List<KEPRule> list = this.ruleLists.get(slotNamesWithBoundaries[i]);
      if (!shouldAbort() && list != null && !list.isEmpty()) {
        this.exampleDocuments.createExamplesForTarget(list.get(0).getTarget());
        if (!hasPerfectRules.get(slotNamesWithBoundaries[i]))
          makeRemovalRules(list.get(0).getTarget());
        list = getOptimalRuleCombination(list);
      }
    }
    removeBadRules();

    long estimatedTime = (System.nanoTime() - startTime) / 1000000000;
    System.out.println(estimatedTime + " seconds needed to learn all rules");
    sendStatusUpdateToDelegate("Done", TextRulerLearnerState.ML_DONE, true);

  }

  private boolean isCompleteSlot(String slot) {
    return !slot.endsWith(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)
            && !slot.endsWith(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION);
  }

  /**
   * Execute algorithms for slot denoted by slotName
   *
   * @param slotName
   *          the name of a slot
   */
  private void runForSlot(String slotName) {

    sendStatusUpdateToDelegate("Working on " + slotName, TextRulerLearnerState.ML_RUNNING, true);
    TextRulerTarget target = new TextRulerTarget(slotName,
            TextRulerTarget.MLTargetType.SINGLE_WHOLE_SLOT, this);
    this.exampleDocuments.createExamplesForTarget(target);
    if (!shouldAbort())
      blocks.put(slotName, getBlocks());
    if (!shouldAbort())
      learnRules(target);
    this.ruleLists.put(slotName, getOptimalRuleCombination(this.ruleLists.get(slotName)));
    sendStatusUpdateToDelegate(slotName + " done", TextRulerLearnerState.ML_RUNNING, true);
  }

  private Type getBlocks() {
    sendStatusUpdateToDelegate("Searching for Blocks", TextRulerLearnerState.ML_RUNNING, false);
    Map<String, List<TextRulerExample>> exampleMap = new HashMap<String, List<TextRulerExample>>();
    Map<String, Double> lengthMap = new HashMap<String, Double>();
    Map<String, Integer> countMap = new HashMap<String, Integer>();
    for (TextRulerExampleDocument exampleDocument : exampleDocuments.getDocuments()) {
      for (AnnotationFS annotation : exampleDocument.getCAS().getAnnotationIndex()) {
        for (TextRulerExample example : exampleDocument.getPositiveExamples()) {
          if (annotation.getBegin() <= example.getAnnotation().getBegin()
                  && annotation.getEnd() >= example.getAnnotation().getEnd()
                  && annotation.getEnd() - annotation.getBegin() > example.getAnnotation().getEnd()
                          - example.getAnnotation().getBegin()
                  && !filterSetWithSlotNames.contains(annotation.getType().getName())) {
            List<TextRulerExample> list = exampleMap.get(annotation.getType().getName());
            if (list == null)
              list = new ArrayList<TextRulerExample>();
            if (!list.contains(example))
              list.add(example);
            exampleMap.put(annotation.getType().getName(), list);
          }
        }
        double aLength = (double) (annotation.getEnd() - annotation.getBegin());
        lengthMap.put(
                annotation.getType().getName(),
                lengthMap.get(annotation.getType().getName()) == null ? aLength : lengthMap
                        .get(annotation.getType().getName()) + aLength);
        countMap.put(
                annotation.getType().getName(),
                countMap.get(annotation.getType().getName()) == null ? 1 : countMap.get(annotation
                        .getType().getName()) + 1);
      }
    }

    List<Type> result = new ArrayList<Type>();
    for (String typeString : exampleMap.keySet()) {
      if (exampleMap.get(typeString).size() == exampleDocuments.getAllPositiveExamples().size())
        result.add(exampleDocuments.getDocuments().get(0).getCAS().getTypeSystem()
                .getType(typeString));
    }
    double exLength = 0;
    for (TextRulerExample ex : exampleDocuments.getAllPositiveExamples()) {
      exLength += (double) (ex.getAnnotation().getEnd() - ex.getAnnotation().getBegin());
    }
    double bestRatio = 0;
    Type bestType = null;
    for (Type type : result) {
      if ((exLength / lengthMap.get(type.getName()) > bestRatio && countMap.get(type.getName()) <= exampleDocuments
              .getAllPositiveExamples().size())
              || (exLength / lengthMap.get(type.getName()) == bestRatio
                      && countMap.get(type.getName()) > countMap.get(bestType.getName()) && countMap
                      .get(type.getName()) <= exampleDocuments.getAllPositiveExamples().size())) {
        bestType = type;
        bestRatio = exLength / lengthMap.get(type.getName());
      }

    }
    sendStatusUpdateToDelegate("Searching for Blocks done", TextRulerLearnerState.ML_RUNNING, true);
    return bestType;
  }

  private void learnRules(TextRulerTarget target) {

    List<KEPRule> ruleList = this.ruleLists.get(target.getSingleSlotTypeName());
    List<TextRulerExample> coveredExamples = this.coveredExamples.get(target
            .getSingleSlotTypeName());
    List<TextRulerExample> positiveExamples = this.exampleDocuments.getAllPositiveExamples();

    for (TextRulerExample e : positiveExamples) {
      if (!coveredExamples.contains(e)) {
        ruleList.addAll(makeInFillerRulesForExample(e));
        // ruleList.addAll(generalizeForRepitition(ruleList));
      }
      for (KEPRule rule : ruleList)
        for (TextRulerExample ex : rule.getCoveringStatistics().getCoveredPositiveExamples())
          if (!coveredExamples.contains(ex))
            coveredExamples.add(ex);

      if (shouldAbort())
        return;
    }
    ruleList.addAll(getCandidateClassificationRules(target));
    ruleList = getBestAndOptimalRules(ruleList);
    ruleList.addAll(makePostFillers(ruleList, true));
    ruleList = getOptimalRuleCombination(ruleList);
    this.ruleLists.put(target.getSingleSlotTypeName(), ruleList);
  }

  private List<KEPRule> makeInFillerRulesForExample(TextRulerExample e) {
    sendStatusUpdateToDelegate(
            "Searching for Infiller Rules for "
                    + e.getTarget().getSingleSlotTypeName()
                            .substring(e.getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1),
            TextRulerLearnerState.ML_RUNNING, false);
    Collection<KEPRule> rules = new HashSet<KEPRule>();

    rules = new HashSet<KEPRule>();
    rules = expandInFillerRules(e, rules, true);
    if (rules.size() > maxInfillerRules) {
      List<KEPRule> list = new ArrayList<KEPRule>(rules);
      Collections.sort(list, new KEPRuleComparator(e.getDocumentCAS()));
      // TODO this is a parameter!
      rules = new HashSet<KEPRule>(list.subList(0, maxInfillerRules));
    }
    List<KEPRule> result = new ArrayList<KEPRule>(rules);
    if (shouldAbort())
      return result;

    testRulesOnDocumentSet(result, exampleDocuments);
    sendStatusUpdateToDelegate(
            "Searching for Infiller Rules for "
                    + e.getTarget().getSingleSlotTypeName()
                            .substring(e.getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1)
                    + " done", TextRulerLearnerState.ML_RUNNING, true);
    return new ArrayList<KEPRule>(rules);
  }

  private Collection<KEPRule> expandInFillerRules(TextRulerExample e, Collection<KEPRule> rules,
          boolean expanding) {

    if (rules.size() > maxExpandRules) {
      List<KEPRule> list = new ArrayList<KEPRule>(rules);
      Collections.sort(list, new KEPRuleComparator(e.getDocumentCAS()));
      rules = new HashSet<KEPRule>(list.subList(0, maxExpandRules));
    }
    if (!expanding) {
      return rules;
    }

    Collection<KEPRule> expandedRules = new HashSet<KEPRule>();
    if (rules.isEmpty()) {
      List<AnnotationFS> seeds = getAnnotationsStartingAt(e.getDocumentCAS(), e.getAnnotation()
              .getBegin(), e.getAnnotation().getEnd());
      if (seeds.isEmpty()) {
        expanding = false;
      }
      for (AnnotationFS each : seeds) {
        KEPRuleItem item = new KEPRuleItem(each);
        KEPRule rule = new KEPRule(this, e.getTarget());
        rule.addInFillerItem(item);
        expandedRules.add(rule);
      }
    } else {
      expanding = false;
      for (KEPRule eachRule : rules) {
        TextRulerRulePattern inFiller = eachRule.getInFiller();
        KEPRuleItem lastItem = (KEPRuleItem) inFiller.get(inFiller.size() - 1);
        int end = lastItem.getEnd();
        if (end >= e.getAnnotation().getEnd()) {
          if (!expandedRules.contains(eachRule)) {
            expandedRules.add(eachRule);
          }
        } else {
          List<AnnotationFS> annotationsStartingAt = getAnnotationsStartingAt(e.getDocumentCAS(),
                  end, e.getAnnotation().getEnd());
          if (annotationsStartingAt.isEmpty() && !expandedRules.contains(eachRule)) {
            expandedRules.add(eachRule);
          } else {
            expanding = true;
            for (AnnotationFS eachFS : annotationsStartingAt) {
              if (eachFS.getType().getName().equals(lastItem.getType().getName())) {
                lastItem.setAnnotation(eachFS);
                lastItem.setMax(lastItem.getMax() + 1).setReluctant(true);
                expandedRules.add(eachRule);
              } else {
                KEPRule newRule = new KEPRule(eachRule);
                KEPRuleItem newItem = new KEPRuleItem(eachFS);
                newRule.addInFillerItem(newItem);
                expandedRules.add(newRule);
              }
            }
          }
        }
      }
    }
    return expandInFillerRules(e, expandedRules, expanding);
  }

  private List<KEPRule> getCandidateClassificationRules(TextRulerTarget target) {
    sendStatusUpdateToDelegate(
            "Searching for Candidate Classification Rules for "
                    + target.getSingleSlotTypeName().substring(
                            target.getSingleSlotTypeName().lastIndexOf(".") + 1),
            TextRulerLearnerState.ML_RUNNING, false);
    List<KEPRule> result = new ArrayList<KEPRule>();
    List<Type> types = getTokensInNExamples(exampleDocuments.getAllPositiveExamples(),
            exampleDocuments.getAllPositiveExamples().size() / 2, true);

    for (Type type : types) {
      result.add(new KEPRule(this, target).addInFillerItem(new KEPRuleItem(type)));
    }
    testRulesOnDocumentSet(result, exampleDocuments);
    // result = getBestAndOptimalRules(result);
    result = addConditions(result, target);
    sendStatusUpdateToDelegate(
            "Searching for Candidate Classification Rules for "
                    + target.getSingleSlotTypeName().substring(
                            target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done",
            TextRulerLearnerState.ML_RUNNING, true);
    return result;
  }

  private List<KEPRule> addConditions(List<KEPRule> rules, TextRulerTarget target) {
    List<KEPRule> result = new ArrayList<KEPRule>();
    List<TextRulerExample> allCoveredExamples = getCoveredExamples(rules);

    List<Type> containedTypes = getTokensInNExamples(exampleDocuments.getAllPositiveExamples(),
            exampleDocuments.getAllPositiveExamples().size() / 3, false);
    if (!containedTypes.isEmpty()) {
      for (KEPRule rule : rules) {
        KEPRuleItem ruleItem = (KEPRuleItem) rule.getInFiller().get(0);
        for (Type type : containedTypes) {
          if (!type.getName().equals(ruleItem.getType().getName())
                  && !ruleItem.containsAndCondition(type)) {
            result.add(new KEPRule(this, target).addInFillerItem(ruleItem.copy().addAndCondition(
                    new KEPRuleItemCondition(type, Condition.CONTAINS, false))));
          }
        }
      }
    }

    testRulesOnDocumentSet(result, exampleDocuments);
    result = getBestAndOptimalRules(result);
    List<KEPRule> toRefine = new ArrayList<KEPRule>();
    List<KEPRule> toRemove = new ArrayList<KEPRule>();
    for (KEPRule rule : result) {
      if (rule.getCoveringStatistics().getCoveredPositivesCount() == 0)
        toRemove.add(rule);
      else if (rule.getCoveringStatistics().getCoveredNegativesCount() > 0
              && rule.getPostFiller().size() < 5)
        toRefine.add(rule);
    }
    result.removeAll(toRemove);
    result.removeAll(toRefine);
    result = getBestAndOptimalRules(result);
    if (getCoveredExamples(result).size() == allCoveredExamples.size()) {
      return result;
    }
    if (toRefine.size() > 0) {
      result.addAll(addConditions(toRefine, target));
    }
    sendStatusUpdateToDelegate(
            "Adding conditions to rules for "
                    + target.getSingleSlotTypeName().substring(
                            target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done",
            TextRulerLearnerState.ML_RUNNING, true);
    return result;
  }

  private List<KEPRule> makePostFillers(List<KEPRule> baseRules, boolean changed) {
    if (!baseRules.isEmpty() && !shouldAbort()) {
      sendStatusUpdateToDelegate(
              "Adding postfillers to rules for "
                      + baseRules
                              .get(0)
                              .getTarget()
                              .getSingleSlotTypeName()
                              .substring(
                                      baseRules.get(0).getTarget().getSingleSlotTypeName()
                                              .lastIndexOf(".") + 1),
              TextRulerLearnerState.ML_RUNNING, true);
    } else {
      return new ArrayList<KEPRule>();
    }
    // List<TextRulerExample> allCoveredExamples = getCoveredExamples(baseRules);
    Set<KEPRule> result = new HashSet<KEPRule>();
    for (KEPRule rule : baseRules) {
      for (TextRulerExample e : rule.getCoveringStatistics().getCoveredPositiveExamples()) {
        if (rule.getCoveringStatistics().getCoveredNegativesCount() > 0) {
          KEPRuleItem lastItem = ((KEPRuleItem) rule.getPostFiller().lastItem());
          int end = e.getAnnotation().getEnd();
          if (lastItem != null) {
            end = lastItem.getEnd();
          }
          List<AnnotationFS> annotations = getAnnotationsStartingAt(e.getDocumentCAS(), end, e
                  .getDocumentCAS().getDocumentText().length());
          boolean blockBoundaryHit = false;
          for (AnnotationFS annotationFS : annotations) {
            if (annotationFS.getType().equals(
                    blocks.get(baseRules.get(0).getTarget().getSingleSlotTypeName()))) {
              blockBoundaryHit = true;
              break;
            }
          }
          if (blockBoundaryHit) {
            continue;
          }
          for (AnnotationFS annotationFS : annotations) {
            if (annotationFS.getType().getName()
                    .equals(lastItem != null ? lastItem.getType().getName() : null)) {
              lastItem.setReluctant(true).setMax(lastItem.getMax() + 1).setAnnotation(annotationFS);
            } else {
              result.add(rule.copy().addPostFillerItem(new KEPRuleItem(annotationFS)));
            }
            if (rule.getPreFiller().isEmpty())
              result.add(rule);
          }
        } else {
          result.add(rule);
        }
      }
    }
    List<KEPRule> resultList = new ArrayList<KEPRule>(result);
    testRulesOnDocumentSet(resultList, exampleDocuments);
    resultList.addAll(baseRules);
    resultList = getBestAndOptimalRules(resultList);
    if (baseRules.containsAll(resultList)) {
      if (!changed)
        return resultList;
      else
        changed = false;
    } else {
      changed = true;
    }
    resultList.addAll(makePreFillers(resultList, changed));
    sendStatusUpdateToDelegate(
            "Adding postfillers to rules for "
                    + baseRules
                            .get(0)
                            .getTarget()
                            .getSingleSlotTypeName()
                            .substring(
                                    baseRules.get(0).getTarget().getSingleSlotTypeName()
                                            .lastIndexOf(".") + 1) + " done",
            TextRulerLearnerState.ML_RUNNING, true);
    return resultList;
  }

  private List<KEPRule> makePreFillers(List<KEPRule> baseRules, boolean changed) {
    if (!baseRules.isEmpty() && !shouldAbort()) {
      sendStatusUpdateToDelegate(
              "Adding prefillers to rules for "
                      + baseRules
                              .get(0)
                              .getTarget()
                              .getSingleSlotTypeName()
                              .substring(
                                      baseRules.get(0).getTarget().getSingleSlotTypeName()
                                              .lastIndexOf(".") + 1),
              TextRulerLearnerState.ML_RUNNING, true);
    } else {
      return new ArrayList<KEPRule>();
    }
    // List<TextRulerExample> allCoveredExamples = getCoveredExamples(baseRules);
    Set<KEPRule> result = new HashSet<KEPRule>();
    for (KEPRule rule : baseRules) {
      for (TextRulerExample e : rule.getCoveringStatistics().getCoveredPositiveExamples()) {
        if (rule.getCoveringStatistics().getCoveredNegativesCount() > 0) {
          int begin = e.getAnnotation().getBegin();
          KEPRuleItem firstItem = (KEPRuleItem) rule.getPreFiller().firstItem();
          if (firstItem != null) {
            begin = firstItem.getBegin();
          }
          List<AnnotationFS> annotations = getAnnotationsEndingAt(begin, e.getDocumentCAS());
          boolean blockBoundaryHit = false;
          for (AnnotationFS annotationFS : annotations) {
            if (annotationFS.getType().equals(
                    blocks.get(baseRules.get(0).getTarget().getSingleSlotTypeName()))) {
              blockBoundaryHit = true;
              break;
            }
          }
          if (blockBoundaryHit) {
            continue;
          }
          for (AnnotationFS annotationFS : annotations) {
            if (annotationFS.getType().getName()
                    .equals(firstItem != null ? firstItem.getType().getName() : null)) {
              firstItem.setReluctant(true).setMax(firstItem.getMax() + 1)
                      .setAnnotation(annotationFS);
            } else {
              result.add(rule.copy().addPreFillerItem(new KEPRuleItem(annotationFS)));
            }
          }
        } else {
          result.add(rule);
        }
      }
    }
    List<KEPRule> resultList = new ArrayList<KEPRule>(result);
    testRulesOnDocumentSet(resultList, exampleDocuments);
    resultList.addAll(baseRules);
    resultList = getBestAndOptimalRules(resultList);
    if (baseRules.containsAll(resultList)) {
      if (!changed)
        return resultList;
      else
        changed = false;
    } else {
      changed = true;
    }
    resultList.addAll(makePostFillers(resultList, changed));

    sendStatusUpdateToDelegate(
            "Adding prefillers to rules for "
                    + baseRules
                            .get(0)
                            .getTarget()
                            .getSingleSlotTypeName()
                            .substring(
                                    baseRules.get(0).getTarget().getSingleSlotTypeName()
                                            .lastIndexOf(".") + 1) + " done",
            TextRulerLearnerState.ML_RUNNING, true);
    return resultList;
  }

  private void removeBadRules() {
    for (int i = 0; i < slotNames.length; i++) {
      if (!hasPerfectRules.get(slotNames[i])
              && hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 1])
              && hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 2])) {
        List<KEPRule> list = new ArrayList<KEPRule>();
        for (KEPRule kepRule : ruleLists.get(slotNames[i])) {
          List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule
                  .getCoveringStatistics().getCoveredNegativeExamples());
          exList.removeAll(getCorrectedExamples(slotNames[i]));
          if (exList.size() == 0) {
            list.add(kepRule);
          }
        }
        ruleLists.put(slotNames[i], list);
      } else {
        if (!hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 1])) {
          List<KEPRule> list = new ArrayList<KEPRule>();
          for (KEPRule kepRule : ruleLists.get(slotNamesWithBoundaries[3 * i + 1])) {
            List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule
                    .getCoveringStatistics().getCoveredNegativeExamples());
            exList.removeAll(getCorrectedExamples(slotNamesWithBoundaries[3 * i + 1]));
            if (exList.size() == 0) {
              list.add(kepRule);
            }
          }
        }
        if (!hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 2])) {
          List<KEPRule> list = new ArrayList<KEPRule>();
          for (KEPRule kepRule : ruleLists.get(slotNamesWithBoundaries[3 * i + 2])) {
            List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule
                    .getCoveringStatistics().getCoveredNegativeExamples());
            exList.removeAll(getCorrectedExamples(slotNamesWithBoundaries[3 * i + 2]));
            if (exList.size() == 0) {
              list.add(kepRule);
            }
          }
        }
      }
    }
  }

  private List<KEPRule> makeRemovalRules(TextRulerTarget target) {
    sendStatusUpdateToDelegate(
            "Searching for Removal Rules for "
                    + target.getSingleSlotTypeName().substring(
                            target.getSingleSlotTypeName().lastIndexOf(".") + 1),
            TextRulerLearnerState.ML_RUNNING, false);
    if (!hasFalsePositives(target.getSingleSlotTypeName()))
      return new ArrayList<KEPRule>();
    List<KEPRule> result = correctionRules.get(target.getSingleSlotTypeName());
    Type targetType = exampleDocuments.getDocuments().get(0).getCAS().getTypeSystem()
            .getType(target.getSingleSlotTypeName());

    List<Type> containedTypes = getTokensInNExamples(exampleDocuments.getAllPositiveExamples(),
            exampleDocuments.getAllPositiveExamples().size(), false);
    List<Type> notContainedTypes = getTokensInNoExample(exampleDocuments.getAllPositiveExamples());
    // notContainedTypes.retainAll(getTokensInNExamples(getFalsePositives(target), 1, false));
    if (!containedTypes.isEmpty()) {
      KEPRuleItem containsRuleItem = new KEPRuleItem(targetType);
      for (Type type : containedTypes) {
        result.add(new KEPRule(this, target).addInFillerItem(
                containsRuleItem.copy().addAndCondition(
                        new KEPRuleItemCondition(type, Condition.CONTAINS, true)))
                .setCorrectionRule(true));
      }
    }
    if (!notContainedTypes.isEmpty()) {
      KEPRuleItem notContainsRuleItem = new KEPRuleItem(targetType);
      for (Type type : notContainedTypes) {
        result.add(new KEPRule(this, target).addInFillerItem(
                notContainsRuleItem.copy().addAndCondition(
                        new KEPRuleItemCondition(type, Condition.CONTAINS, false)))
                .setCorrectionRule(true));
      }
    }
    testCorrectionRules(target);
    List<KEPRule> toRemove = new ArrayList<KEPRule>();
    List<KEPRuleItemCondition> toMerge = new ArrayList<KEPRuleItemCondition>();
    for (KEPRule rule : result) {
      if (!(rule.getCoveringStatistics().getCoveredPositivesCount() == 0)
              || !(rule.getCoveringStatistics().getCoveredNegativesCount() > 0)) {
        toRemove.add(rule);
      } else {
        toMerge.addAll(((KEPRuleItem) rule.getInFiller().get(0)).getConditions().get(0));
        toRemove.add(rule);
      }
    }
    result.removeAll(toRemove);
    if (!toMerge.isEmpty()) {
      result.add(new KEPRule(this, target).addInFillerItem(
              new KEPRuleItem(targetType).addConditions(toMerge)).setCorrectionRule(true));
      testCorrectionRules(target);
    }
    sendStatusUpdateToDelegate(
            "Searching for Removal Rules for "
                    + target.getSingleSlotTypeName().substring(
                            target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done",
            TextRulerLearnerState.ML_RUNNING, true);
    return result;
  }

  private void initializeMapEntries(String slotName) {
    this.ruleLists.put(slotName, new ArrayList<KEPRule>());
    this.correctionRules.put(slotName, new ArrayList<KEPRule>());
    this.coveredExamples.put(slotName, new ArrayList<TextRulerExample>());
    this.hasPerfectRules.put(slotName, false);
  }

  private List<AnnotationFS> getAnnotationsEndingAt(int end, CAS cas) {
    List<AnnotationFS> result = new ArrayList<AnnotationFS>();
    FSIterator<AnnotationFS> it = cas.getAnnotationIndex(
            cas.getTypeSystem().getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME)).iterator();
    while (it.isValid() && it.get().getBegin() < end) {
      it.moveToNext();
    }
    do
      it.moveToPrevious();
    while (it.isValid()
            && (it.get().getBegin() >= end || filterSetWithSlotNames.contains(it.get().getType()
                    .getName())));
    if (!it.isValid())
      return result;
    end = it.get().getEnd();
    it = cas.getAnnotationIndex().iterator();
    while (it.isValid() && it.get().getBegin() <= end) {
      if (it.get().getEnd() == end
              && !filterSetWithSlotNames.contains(it.get().getType().getName()))
        result.add(it.get());
      it.moveToNext();
    }
    return result;
  }

  private List<AnnotationFS> getAnnotationsStartingAt(CAS cas, int begin, int till) {
    List<AnnotationFS> result = new ArrayList<AnnotationFS>();
    if (begin > cas.getDocumentText().length()) {
      return new ArrayList<AnnotationFS>();
    }
    if (begin == 0) {
      begin++;
    }
    AnnotationFS pointer = cas.createAnnotation(cas.getAnnotationType(), begin - 1, begin);
    FSIterator<AnnotationFS> it = cas.getAnnotationIndex().iterator(pointer);
    FSMatchConstraint constraint = getConstraint(cas);
    FSIterator<AnnotationFS> iterator = cas.createFilteredIterator(it, constraint);
    iterator.moveTo(pointer);

    int firstBegin = -1;
    while (iterator.isValid()) {
      AnnotationFS fs = iterator.get();
      if (firstBegin == -1 && fs.getBegin() >= begin) {
        firstBegin = fs.getBegin();
      }
      if (firstBegin >= 0) {
        if (fs.getBegin() > firstBegin) {
          break;
        } else if (fs.getBegin() == firstBegin && fs.getEnd() <= till) {
          if (!filterSetWithSlotNames.contains(fs.getType().getName())) {
            result.add(fs);
          }
        }
      }
      iterator.moveToNext();
    }
    return result;
  }

  private List<KEPRule> getOptimalRuleCombination(List<KEPRule> rules) {

    if (rules.isEmpty()) {
      return new ArrayList<KEPRule>();
    }
    List<KEPRule> tmpList = new ArrayList<KEPRule>();
    List<TextRulerExample> coveredExamples = new ArrayList<TextRulerExample>();
    List<TextRulerExample> positiveExamples = exampleDocuments.getAllPositiveExamples();
    List<TextRulerExample> correctedExamples = getCorrectedExamples(rules.get(0).getTarget()
            .getSingleSlotTypeName());
    for (KEPRule rule : rules) {
      List<TextRulerExample> uncorrectedExamples = new ArrayList<TextRulerExample>(rule
              .getCoveringStatistics().getCoveredNegativeExamples());
      uncorrectedExamples.removeAll(correctedExamples);
      if (uncorrectedExamples.size() == 0
              && rule.getCoveringStatistics().getCoveredPositivesCount() > 0)
        tmpList.add(rule);
    }
    tmpList = getBestRules(tmpList);
    for (KEPRule rule : tmpList) {
      coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());
    }
    if (coveredExamples.containsAll(exampleDocuments.getAllPositiveExamples()))
      hasPerfectRules.put(rules.get(0).getTarget().getSingleSlotTypeName(), true);
    else
      hasPerfectRules.put(rules.get(0).getTarget().getSingleSlotTypeName(), false);
    List<KEPRule> bestRules = getBestRules(rules);
    while (!coveredExamples.containsAll(positiveExamples) && !bestRules.isEmpty()) {
      KEPRule bestRule = bestRules.get(0);
      if (!coveredExamples.containsAll(bestRule.getCoveringStatistics()
              .getCoveredPositiveExamples())) {
        coveredExamples.removeAll(bestRule.getCoveringStatistics().getCoveredPositiveExamples());
        coveredExamples.addAll(bestRule.getCoveringStatistics().getCoveredPositiveExamples());
        tmpList.add(bestRule);
      }
      bestRules.remove(0);
    }
    return tmpList;

  }

  private List<KEPRule> getBestRules(List<KEPRule> rules) {
    if (rules.isEmpty())
      return new ArrayList<KEPRule>();
    final class AComparator implements Comparator<KEPRule> {
      public int compare(KEPRule r1, KEPRule r2) {
        if (r1.getCoveringStatistics().getCoveredPositivesCount() < r2.getCoveringStatistics()
                .getCoveredPositivesCount())
          return 1;
        else if (r1.getCoveringStatistics().getCoveredPositivesCount() > r2.getCoveringStatistics()
                .getCoveredPositivesCount())
          return -1;
        else if (r1.getCoveringStatistics().getCoveredNegativesCount() > r2.getCoveringStatistics()
                .getCoveredNegativesCount())
          return 1;
        else if (r1.getCoveringStatistics().getCoveredNegativesCount() < r2.getCoveringStatistics()
                .getCoveredNegativesCount())
          return -1;
        else if (r1.getPreFiller().size() + r1.getInFiller().size() + r1.getPostFiller().size() < r2
                .getPreFiller().size() + r2.getInFiller().size() + r2.getPostFiller().size())
          return -1;
        return 0;
      }
    }

    Collections.sort(rules, new AComparator());
    List<KEPRule> result = new ArrayList<KEPRule>();
    List<TextRulerExample> positiveExamples = exampleDocuments.getAllPositiveExamples();
    List<TextRulerExample> coveredExamples = new ArrayList<TextRulerExample>();
    for (int i = 0; i < rules.size(); i++) {
      KEPRule rule = rules.get(i);
      if ((3 * rule.getCoveringStatistics().getCoveredPositivesCount() >= rule
              .getCoveringStatistics().getCoveredNegativesCount())
              && (rule.getCoveringStatistics().getCoveredPositivesCount() >= positiveExamples
                      .size() || !coveredExamples.containsAll(rule.getCoveringStatistics()
                      .getCoveredPositiveExamples()))) {
        result.add(rule);
        coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());

        if (coveredExamples.containsAll(positiveExamples))
          return result;
      }
    }
    for (int i = 0; i < rules.size(); i++) {
      KEPRule rule = rules.get(i);
      if (rule.getCoveringStatistics().getCoveredPositivesCount() >= positiveExamples.size()
              || !coveredExamples.containsAll(rule.getCoveringStatistics()
                      .getCoveredPositiveExamples())) {
        result.add(rule);
        coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());

        if (coveredExamples.containsAll(positiveExamples))
          return result;
      }
    }
    return result;
  }

  private List<KEPRule> getBestAndOptimalRules(List<KEPRule> rules) {
    List<KEPRule> result = new ArrayList<KEPRule>();

    result.addAll(getBestRules(rules));
    List<KEPRule> tmp = getOptimalRuleCombination(rules);
    for (KEPRule rule : tmp)
      if (!result.contains(rule))
        result.add(rule);
    return result;
  }

  private List<Type> getTokensInNExamples(List<TextRulerExample> examples, int n,
          boolean countOnlyCoveringTokens) {
    if (examples.isEmpty())
      return new ArrayList<Type>();
    List<Type> result = new ArrayList<Type>();
    Map<String, List<TextRulerExample>> countMap = new HashMap<String, List<TextRulerExample>>();
    for (TextRulerExample example : examples) {
      for (AnnotationFS a : TextRulerToolkit.getAnnotationsWithinBounds(example.getDocumentCAS(),
              example.getAnnotation().getBegin(), example.getAnnotation().getEnd(),
              filterSetWithSlotNames, null)) {
        if (!filterSetWithSlotNames.contains(a.getType().getName()))
          if (((!countOnlyCoveringTokens) && (a.getBegin() >= example.getAnnotation().getBegin() && a
                  .getEnd() <= example.getAnnotation().getEnd()))
                  || (a.getBegin() == example.getAnnotation().getBegin() && a.getEnd() == example
                          .getAnnotation().getEnd())) {
            List<TextRulerExample> list = countMap.get(a.getType().getName());
            if (list == null) {
              list = new ArrayList<TextRulerExample>();
              list.add(example);
            } else if (!list.contains(example))
              list.add(example);
            countMap.put(a.getType().getName(), list);
          }
      }
    }
    for (String typeString : countMap.keySet()) {
      if (countMap.get(typeString).size() >= n)
        result.add(examples.get(0).getDocumentCAS().getTypeSystem().getType(typeString));
    }
    return result;
  }

  private List<Type> getTokensInNoExample(List<TextRulerExample> examples) {
    List<String> types = new ArrayList<String>();
    for (TextRulerExampleDocument doc : exampleDocuments.getDocuments()) {
      for (AnnotationFS a : doc.getCAS().getAnnotationIndex()) {
        if (!types.contains(a.getType().getName())
                && !filterSetWithSlotNames.contains(a.getType().getName()))
          types.add(a.getType().getName());
      }
    }
    List<Type> containedTypes = getTokensInNExamples(examples, 1, false);
    for (Type type : containedTypes) {
      types.remove(type.getName());
    }
    List<Type> result = new ArrayList<Type>();
    for (String typeString : types) {
      result.add(examples.get(0).getDocumentCAS().getTypeSystem().getType(typeString));
    }
    return result;
  }

  public String getResultString() {
    StringBuffer ruleStrings = new StringBuffer();
    if (slotNamesWithBoundaries == null || slotNamesWithBoundaries.length == 0)
      return "No results available yet!";

    for (int i = 0; i < slotNamesWithBoundaries.length; i++) {
      List<KEPRule> ruleList = this.ruleLists.get(slotNamesWithBoundaries[i]);
      Type blockType = blocks.get(slotNamesWithBoundaries[i]);
      if (blockType != null
              && !(i > 0 && blocks.get(slotNamesWithBoundaries[i - 1]) != null && blocks
                      .get(slotNamesWithBoundaries[i - 1]).getName().equals(blockType.getName()))) {
        ruleStrings.append("BLOCK(" + blockType.getShortName() + ") " + blockType.getShortName()
                + "{} { \n");
      }
      if (ruleList == null || ruleList.isEmpty()) {
        if (blockType != null
                && !(i < slotNamesWithBoundaries.length - 1
                        && blocks.get(slotNamesWithBoundaries[i + 1]) != null && blocks
                        .get(slotNamesWithBoundaries[i + 1]).getName().equals(blockType.getName())))
          ruleStrings.append("} \n");
        continue;
      }
      ruleStrings.append("// " + slotNamesWithBoundaries[i] + " RULES \n");
      for (KEPRule rule : new ArrayList<KEPRule>(ruleList)) {
        ruleStrings.append((blockType != null ? "\t" : "") + rule.getRuleString() + "\t// "
                + rule.getCoveringStatistics() + "\n");
      }
      if (blockType != null
              && !(i < slotNamesWithBoundaries.length - 1
                      && blocks.get(slotNamesWithBoundaries[i + 1]) != null && blocks
                      .get(slotNamesWithBoundaries[i + 1]).getName().equals(blockType.getName())))
        ruleStrings.append("}");
      ruleStrings.append("\n");
    }
    StringBuffer boundaryCorrectors = new StringBuffer();
    StringBuffer wholeSlotCorrectors = new StringBuffer();
    boundaryCorrectors.append("\n // BOUNDARY CORRECTION RULES: \n");
    wholeSlotCorrectors.append("\n // CORRECTION RULES: \n");
    for (int i = 0; i < slotNamesWithBoundaries.length; i++) {
      List<KEPRule> ruleList = this.correctionRules.get(slotNamesWithBoundaries[i]);
      if (ruleList == null || ruleList.isEmpty())
        continue;
      for (KEPRule rule : ruleList) {
        if (slotNamesWithBoundaries[i].contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION)
                || slotNamesWithBoundaries[i].contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)) {
          boundaryCorrectors.append(rule.getRuleString() + "\t// " + rule.getCoveringStatistics()
                  + "\n");
        } else {
          wholeSlotCorrectors.append(rule.getRuleString() + "\t// " + rule.getCoveringStatistics()
                  + "\n");
        }
      }
    }
    return getFileHeaderString(true) + ruleStrings + boundaryCorrectors + "\n // CONNECTORS: \n"
            + getConnectorsRuleString() + wholeSlotCorrectors;
  }

  private String getAnnotationRulesString(String slotName) {
    StringBuffer result = new StringBuffer();
    result.append(getPackageString());
    result.append("// " + slotName + " RULES \n");
    Type blockType = blocks.get(slotName);
    if (blockType != null) {
      result.append("BLOCK(" + blockType.getShortName() + ") " + blockType.getShortName()
              + "{} { \n");
    }
    List<KEPRule> ruleList = this.ruleLists.get(slotName);
    if (ruleList != null && !ruleList.isEmpty()) {

      for (KEPRule rule : ruleList) {
        String theRuleString = rule.getRuleString();
        result.append((blockType != null ? "\t" : "") + theRuleString + "\t// "
                + rule.getCoveringStatistics() + "\n");
      }
    }
    if (blockType != null) {
      result.append("}");
    }
    result.append("\n");

    if (!slotName.contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION)
            && !slotName.contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)) {
      result.append(getAnnotationRulesString(slotName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION));
      result.append(getAnnotationRulesString(slotName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION));
      if (blockType != null) {
        result.append("BLOCK(" + blockType.getShortName() + "Correction) "
                + blockType.getShortName() + "{} { \n");
      }
      String shortName = slotName.substring(slotName.lastIndexOf(".") + 1);
      String str = shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{->MARKONCE("
              + shortName + ",1,3)} ANY*? " + shortName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION
              + ";" + "\n";
      str += shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{IS(" + shortName
              + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ")->MARKONCE(" + shortName + ")} "
              + ";" + "\n";
      result.append(str);
      if (blockType != null) {
        result.append("}");
      }
    }

    return result.toString();
  }

  private String getConnectorsRuleString() {

    Map<Type, StringBuffer> connectorBlocks = new HashMap<Type, StringBuffer>();
    StringBuffer noBlockConnectorRules = new StringBuffer();
    StringBuffer result = new StringBuffer();

    for (int i = 0; i < this.slotNames.length; i++) {
      Type slotBlock = blocks.get(slotNames[i]);
      String shortName = slotNames[i].substring(slotNames[i].lastIndexOf(".") + 1);
      String str = (slotBlock == null ? "" : "\t") + shortName
              + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{->MARKONCE(" + shortName
              + ",1,3)} ANY*? " + shortName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ";"
              + "\n";
      str += (slotBlock == null ? "" : "\t") + shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION
              + "{IS(" + shortName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ")->MARKONCE("
              + shortName + ")} " + ";" + "\n";
      if (slotBlock != null) {
        if (connectorBlocks.get(slotBlock) == null) {
          connectorBlocks.put(slotBlock, new StringBuffer());
        }
        connectorBlocks.get(slotBlock).append(str);
      } else {
        noBlockConnectorRules.append(str);
      }
    }

    for (Type block : connectorBlocks.keySet()) {
      result.append("BLOCK(" + block.getShortName() + "Connectors) " + block.getShortName()
              + "{} { \n" + connectorBlocks.get(block) + "} \n");
    }
    result.append(noBlockConnectorRules);
    return result.toString();
  }

  private List<TextRulerExample> getCoveredExamples(List<KEPRule> rules) {
    List<TextRulerExample> result = new ArrayList<TextRulerExample>();
    for (KEPRule rule : rules)
      for (TextRulerExample ex : rule.getCoveringStatistics().getCoveredPositiveExamples())
        if (!result.contains(ex))
          result.add(ex);
    return result;
  }

  private List<TextRulerExample> getCorrectedExamples(String slotName) {
    List<TextRulerExample> result = new ArrayList<TextRulerExample>();
    for (KEPRule rule : correctionRules.get(slotName))
      for (TextRulerExample ex : rule.getCoveringStatistics().getCoveredNegativeExamples())
        if (!result.contains(ex))
          result.add(ex);
    return result;
  }

  private boolean hasFalsePositives(String singleSlotTypeName) {
    List<KEPRule> list = this.ruleLists.get(singleSlotTypeName);
    if (list == null || list.isEmpty())
      return false;
    for (KEPRule kepRule : list) {
      if (kepRule.getCoveringStatistics().getCoveredNegativesCount() > 0)
        return true;
    }
    return false;
  }

  public void testCorrectionRules(TextRulerTarget target) {
    if (shouldAbort())
      return;
    String rStr = getAnnotationRulesString(target.getSingleSlotTypeName());
    for (TextRulerExampleDocument doc : exampleDocuments.getDocuments()) {
      CAS processedCAS = applyScriptOnDocument(rStr, doc, target);
      TextRulerStatisticsCollector scriptStatistics = new TextRulerStatisticsCollector();
      compareOriginalDocumentWithTestCAS(doc, processedCAS, target, scriptStatistics,
              collectNegativeCoveredInstancesWhenTesting());
      for (KEPRule cRule : correctionRules.get(target.getSingleSlotTypeName())) {
        if (shouldAbort())
          break;
        if (cRule.getCoveringStatistics() == null) {
          cRule.setCoveringStatistics(new TextRulerStatisticsCollector());
        }
        processedCAS = applyScriptOnDocument(rStr, doc, target);
        TextRulerStatisticsCollector correctedStats = new TextRulerStatisticsCollector();
        testRuleOnDocument(cRule, doc, correctedStats, processedCAS);
        for (TextRulerExample ex : scriptStatistics.getCoveredNegativeExamples()) {
          if (!correctedStats.getCoveredNegativeExamples().contains(ex)) {
            cRule.getCoveringStatistics().addCoveredNegative(ex);
          }
        }
        for (TextRulerExample ex : scriptStatistics.getCoveredPositiveExamples()) {
          if (!correctedStats.getCoveredPositiveExamples().contains(ex)) {
            cRule.getCoveringStatistics().addCoveredPositive(ex);
          }
        }
        cRule.getCoveringStatistics().reflectCountsFromCoveredExamples();
      }
    }
  }

  private void prepareCASWithBoundaries(CAS cas) {
    for (String slotName : slotNames)
      if (!(slotName.contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION) || slotName
              .contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)))
        TextRulerExampleDocument.createBoundaryAnnotationsForCas(cas, slotName, filterSet);
  }

  @Override
  public CAS loadCAS(String fileName, CAS reuseCAS) {
    CAS cas = super.loadCAS(fileName, reuseCAS);
    prepareCASWithBoundaries(cas);
    return cas;
  }

  private void prepareCachedCASesWithBoundaries() {
    for (CAS cas : exampleDocuments.getCachedCASes())
      prepareCASWithBoundaries(cas);
  }

  @Override
  public boolean collectNegativeCoveredInstancesWhenTesting() {
    return true;
  }

  public void setParameters(Map<String, Object> params) {
    if (TextRulerToolkit.DEBUG)
      saveParametersToTempFolder(params);

    // TODO try catch
    if (params.containsKey(FILLER_WINDOW))
      fillerWindow = (Integer) params.get(FILLER_WINDOW);

    if (params.containsKey(MAX_EXPAND_RULES))
      maxExpandRules = (Integer) params.get(MAX_EXPAND_RULES);

    if (params.containsKey(MAX_FILLER_LENGTH))
      maxFillerLength = (Integer) params.get(MAX_FILLER_LENGTH);

    if (params.containsKey(MAX_INFILLER_RULES))
      maxInfillerRules = (Integer) params.get(MAX_INFILLER_RULES);

  }

  protected FSMatchConstraint getConstraint(CAS cas) {
    ConstraintFactory cf = cas.getConstraintFactory();
    final FSTypeConstraint constraint = cf.createTypeConstraint();

    for (String each : getFilterSet()) {
      constraint.add(each);
    }
    constraint.add(RutaEngine.BASIC_TYPE);
    // TODO check if this is a legal alternative to "new NotConstraint(constraint)":
    FSMatchConstraint result = new FSMatchConstraint() {
      private static final long serialVersionUID = -6744378612440830298L;

      private final FSTypeConstraint c = constraint;

      public boolean match(FeatureStructure fs) {
        return !c.match(fs);
      }
    };
    return result;
  }

  public class KEPRuleComparator implements Comparator<KEPRule> {

    private CAS cas;

    public KEPRuleComparator(CAS cas) {
      super();
      this.cas = cas;
    }

    public int compare(KEPRule o1, KEPRule o2) {
      ArrayList<TextRulerRuleItem> items1 = o1.getInFiller();
      items1.addAll(o1.getPostFiller());
      items1.addAll(o1.getPreFiller());
      double occ1 = 0;
      double occ2 = 0;
      for (TextRulerRuleItem each : items1) {
        KEPRuleItem eachItem = (KEPRuleItem) each;
        int ratio = exampleDocuments.getAllPositiveExamples().size()
                / cas.getAnnotationIndex(eachItem.getType()).size();
        occ1 += (ratio < 1) ? 1 : ratio;
      }
      ArrayList<TextRulerRuleItem> items2 = o2.getInFiller();
      items2.addAll(o2.getPostFiller());
      items2.addAll(o2.getPreFiller());
      for (TextRulerRuleItem each : items2) {
        KEPRuleItem eachItem = (KEPRuleItem) each;
        int ratio = exampleDocuments.getAllPositiveExamples().size()
                / cas.getAnnotationIndex(eachItem.getType()).size();
        occ2 += (ratio < 1) ? 1 : ratio;
      }
      double v1 = occ1;
      double v2 = occ2;
      if (v1 > v2) {
        return 1;
      } else if (v2 > v1) {
        return -1;
      } else {
        return 0;
      }
    }
  }

}
TOP

Related Classes of org.apache.uima.ruta.textruler.learner.kep.KEPLearner$KEPRuleComparator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.