Examples of TextRulerRulePattern

org.apache.uima.ruta.textruler.core.TextRulerRulePattern
TextRulerRulePattern is an ordered list of rule items and provides some special functionality for dealing with rule patterns like finding sub patterns or such. hint: this is a very basic implementation and could surely be optimized ;-)

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

    WhiskRule newRule = baseRule.copy();
    // int foundSlotNumber = -1; // debug info
    // String foundSlotPattern = "";
    int termBeginNumber = term.getWordConstraint().getTokenAnnotation().getBegin();
    int termEndNumber = term.getWordConstraint().getTokenAnnotation().getEnd();
    TextRulerRulePattern targetPattern = null;
    TextRulerRulePattern previousSlotPostFillerPattern = null;
    for (int i = 0; i < newRule.getPatterns().size(); i++) {
      TextRulerSlotPattern slotPattern = newRule.getPatterns().get(i);
      WhiskRuleItem it = (WhiskRuleItem) slotPattern.preFillerPattern.lastItem(); // look at the
      // prefiller
      // pattern

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

    }
  }


  protected List<LP2Rule> generalizeRule(LP2Rule baseRule) {
    List<LP2Rule> result = new ArrayList<LP2Rule>();
    TextRulerRulePattern rulePattern = new TextRulerRulePattern();
    TextRulerRulePattern prePattern = baseRule.getPreFillerPattern();


    for (int i = prePattern.size() - 1; i >= 0; i--) // we have to reverse
    // the order again!
    {
      rulePattern.add(prePattern.get(i));
    }
    rulePattern.addAll(baseRule.getPostFillerPattern());


    recursiveGeneralizeRule(baseRule, rulePattern, new TextRulerRulePattern(), result);
    TextRulerToolkit.log("GENERALIZATIONS: " + result.size());


    for (LP2Rule r : result)
      removeOutermostWildCardItemsFromRule(r);

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

    int foundSlotNumber = -1; // debug info
    String foundSlotPattern = "";
    int termNumber = term.getTermNumberInExample();
    // determine, where this term is located relatively to the slots we
    // have...
    TextRulerRulePattern targetPattern = null;
    TextRulerRulePattern previousSlotPostFillerPattern = null;
    for (int i = 0; i < newRule.getPatterns().size(); i++) {
      TextRulerSlotPattern slotPattern = newRule.getPatterns().get(i);
      WhiskRuleItem it = (WhiskRuleItem) slotPattern.preFillerPattern.lastItem(); // look at the
      // prefiller
      // pattern

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

      int shortest = Integer.MAX_VALUE;
      for (TextRulerRulePattern p : rightContexts)
        shortest = p.size() < shortest ? p.size() : shortest;
      boolean found = false;
      for (int len = 1; len <= shortest; len++) {
        TextRulerRulePattern subPattern = rightContexts.get(0).subPattern(0, len);
        if (testConstraint1(subPattern, k)) {
          // for (TextRulerRuleItem i : subPattern)
          // ((WienRuleItem)i).getWordConstraint().setGeneralizeLinkMarkUp(true);
          patternPairs.get(k).r = subPattern;
          TextRulerToolkit.log("right " + k + ": " + subPattern);

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

    for (int k = 1; k < slotNames.length; k++) {
      List<TextRulerRulePattern> leftContexts = getLeftContextForSlot(doc, k);
      int shortest = Integer.MAX_VALUE;
      for (TextRulerRulePattern p : leftContexts)
        shortest = p.size() < shortest ? p.size() : shortest;
      TextRulerRulePattern sourcePattern = leftContexts.get(0);
      boolean found = false;
      for (int len = 1; len <= shortest; len++) {
        // get suffix:
        TextRulerRulePattern subPattern = sourcePattern.subPattern(sourcePattern.size() - len, len);
        if (testConstraint2(subPattern, k)) {
          patternPairs.get(k).l = subPattern;
          for (TextRulerRuleItem i : subPattern)
            ((WienRuleItem) i).getWordConstraint().setGeneralizeLinkMarkUp(true);
          TextRulerToolkit.log("left " + k + ": " + subPattern);

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

  }


  protected boolean findHeadTailAndL1Patterns() {
    List<TextRulerExampleDocument> docs = exampleDocuments.getDocuments();
    TextRulerExampleDocument doc0 = docs.get(0);
    TextRulerRulePattern head = new TextRulerRulePattern();
    TextRulerRulePattern tail = new TextRulerRulePattern();
    getPageHeadAndTailPortion(doc0, head, tail);


    final class HLCandidate {
      public TextRulerRulePattern head = new TextRulerRulePattern();


      public TextRulerRulePattern l1 = new TextRulerRulePattern();
    }


    // a small optimization:
    // find out the maximum possible length for l1 in doc0 since l1 is much
    // smaller than the possible head length!
    List<TextRulerRulePattern> interTupleSeparators = getInterTupleSepatators(doc0);
    int shortestL1 = head.size() - 1;
    for (TextRulerRulePattern its : interTupleSeparators)
      shortestL1 = its.size() < shortestL1 ? its.size() : shortestL1;


    List<HLCandidate> hlCandidates = new ArrayList<HLCandidate>();
    // create candidates for each separation of the head and tail patterns:
    for (int separator = head.size() - 1; separator > 0; separator--) {
      HLCandidate c = new HLCandidate();
      for (int i = 0; i < head.size(); i++) {
        if (i < separator)
          c.head.add(head.get(i));
        else {
          WienRuleItem it = (WienRuleItem) head.get(i).copy();
          it.getWordConstraint().setGeneralizeLinkMarkUp(true);
          c.l1.add(it);
        }
      }
      hlCandidates.add(c);
      TextRulerToolkit.log(c.head.size() + " vs. " + c.l1.size());
      if (c.l1.size() >= shortestL1)
        break;
    }


    long total = 0;


    // get total h l1 t combination count:
    long tCand = (tail.size() * (tail.size() + 1)) / 2;
    for (HLCandidate c : hlCandidates) {
      total += ((c.head.size() - 1) * (c.head.size())) / 2;
    }
    total *= tCand;


    long current = 0;
    int oldPercent = -1;


    for (HLCandidate c : hlCandidates) {
      // for each "candidate" which represents a l1 suffix pattern of the
      // head tokens and a rest pattern for the h pattern,
      // we have to create every sub pattern of the remaining h pattern as
      // a h candidate:
      TextRulerRulePattern l1 = c.l1;
      TextRulerRulePattern h = null;


      boolean l1Sucks = false;


      for (int endI = c.head.size() - 1; endI > 0; endI--) {
        for (int startI = endI; startI > 0; startI--) {
          h = new TextRulerRulePattern();
          for (int i = startI; i <= endI; i++)
            h.add(c.head.get(i));


          // now for each h candidate we have to create each t
          // candidate:
          TextRulerRulePattern t = null;
          for (int tstartI = 0; tstartI < tail.size(); tstartI++) {
            for (int tendI = tstartI; tendI < tail.size(); tendI++) {
              int percent = Math.round(((float) current * 100 / total));
              if (percent != oldPercent) {
                oldPercent = percent;
                if (percent > 100)
                  percent = 100;
                // TextRulerToolkit.log(current+" / "+total);
                sendStatusUpdateToDelegate("Testing C3, " + percent + "%",
                        TextRulerLearnerState.ML_RUNNING, false);
              }
              if (shouldAbort())
                return false;
              current++;


              t = new TextRulerRulePattern();
              for (int i = tstartI; i <= tendI; i++)
                t.add(tail.get(i));


              // no we have a possible candidate triple: h, t and
              // l1:


              constraint3ReturnType c3Result = testConstraint3(h, t, l1);

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

        TextRulerAnnotation lastOf1 = exampleAnnotations1[exampleAnnotations1.length - 1];
        TextRulerAnnotation firstOf2 = exampleAnnotations2[0];
        List<AnnotationFS> theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, lastOf1
                .getEnd(), firstOf2.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(
                slotNames, filterSet), tokenType);
        TextRulerRulePattern thePattern = new TextRulerRulePattern();
        for (AnnotationFS afs : theTokens)
          thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
        if (thePattern.size() > 0)
          result.add(thePattern);


      }
      interTupelSeparatorsCache.put(key, result);
      return result;

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

                TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
      else
        theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, slotAnnotation.getEnd(),
                nextSlotAnnotation.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(
                        slotNames, filterSet), tokenType);
      TextRulerRulePattern thePattern = new TextRulerRulePattern();
      for (AnnotationFS afs : theTokens)
        thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
      if (thePattern.size() > 0)
        result.add(thePattern);
    }
    return result;
  }

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

                0, TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
      else
        theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, prevSlotAnnotation.getEnd(),
                slotAnnotation.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(slotNames,
                        filterSet), tokenType);
      TextRulerRulePattern thePattern = new TextRulerRulePattern();
      for (AnnotationFS afs : theTokens)
        thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc), true));
      if (thePattern.size() > 0)
        result.add(thePattern);
    }
    return result;
  }

View Full Code Here

Examples of org.apache.uima.ruta.textruler.core.TextRulerRulePattern

    for (TextRulerExample e : examples) {
      TextRulerAnnotation slotAnnotation = e.getAnnotations()[slotIndex];
      List<AnnotationFS> theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas,
              slotAnnotation.getBegin(), slotAnnotation.getEnd(), TextRulerToolkit
                      .getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
      TextRulerRulePattern thePattern = new TextRulerRulePattern();
      for (AnnotationFS afs : theTokens)
        thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
      if (thePattern.size() > 0)
        result.add(thePattern);
    }
    return result;
  }

View Full Code Here

0 1 2 3 4

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.