Package org.apache.uima.analysis_engine

Examples of org.apache.uima.analysis_engine.CasIterator


                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/NewlineSegmenter.xml")));
      AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
      CAS cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      CAS outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      outCas.release();
      assertFalse(iter.hasNext());

      // aggregate
      AnalysisEngineDescription aggSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);

      // nested aggregate
      AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/AggregateContainingAggregateSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);

      // two segmenters
      AnalysisEngineDescription twoSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWith2Segmenters.xml")));
      ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("One\tTwo\nThree\tFour");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("One", outCas.getDocumentText());
      assertEquals("One", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Two", outCas.getDocumentText());
      assertEquals("Two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Three", outCas.getDocumentText());
      assertEquals("Three", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Four", outCas.getDocumentText());
      assertEquals("Four", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Four", TestAnnotator.lastDocument);

      // dropping segments
      aggSegDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
              new XMLInputSource(JUnitExtension
                      .getFile("TextAnalysisEngineImplTest/AggregateSegmenterForDropTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nDROP\nLine two\nDROP\nLine three");
      // results should be the same as the first aggregate segmenter test.
      // segmetns whose text is DROP should not be output.
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);
     
      //with ParallelStep
      AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
        new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepCasMultiplierTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(desc);
      cas.reset();
      cas.setDocumentText("One\tTwo\nThree\tFour");
      iter = ae.processAndOutputNewCASes(cas);
      Set<String> expectedOutputs = new HashSet<String>();
      expectedOutputs.add("One");
      expectedOutputs.add("Two\nThree");
      expectedOutputs.add("Four");
      expectedOutputs.add("One\tTwo");
      expectedOutputs.add("Three\tFour");
      while (iter.hasNext()) {
        outCas = iter.next();
        assertTrue(expectedOutputs.remove(outCas.getDocumentText()));       
        outCas.release();
      }
      assertTrue(expectedOutputs.isEmpty());

     
      // test aggregate with 2 AEs sharing resource manager
      AnalysisEngineDescription aggregateSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
     
      ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager();
      Map<String, Object> params = new HashMap<String, Object>();
      AnalysisEngine ae1 = UIMAFramework.produceAnalysisEngine(aggregateSegDesc, rsrcMgr, params);
      AnalysisEngine ae2 = UIMAFramework.produceAnalysisEngine(aggregateSegDesc, rsrcMgr, params);
     
      // start with testing first ae
      CAS cas1 = ae1.newCAS();
      cas1.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter1 = ae1.processAndOutputNewCASes(cas1);
      assertTrue(iter1.hasNext());
      CAS outCas1 = iter1.next();
      assertEquals("Line one", outCas1.getDocumentText());
    
      // now test second ae
      CAS cas2 = ae2.newCAS();
      cas2.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter2 = ae2.processAndOutputNewCASes(cas2);
      assertTrue(iter2.hasNext());
      CAS outCas2 = iter2.next();
      assertEquals("Line one", outCas2.getDocumentText());
      outCas2.release();
      assertTrue(iter2.hasNext());
      outCas2 = iter2.next();
      assertEquals("Line two", outCas2.getDocumentText());
      outCas2.release();
      assertTrue(iter2.hasNext());
      outCas2 = iter2.next();
      assertEquals("Line three", outCas2.getDocumentText());
      outCas2.release();
      assertFalse(iter2.hasNext());
    
      // continue testing first ae
      outCas1.release();
      assertTrue(iter1.hasNext());
      outCas1 = iter1.next();
View Full Code Here


      CAS cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.reset();
        cas.setDocumentText("Line one\nLine two\nERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("Line one", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Line two", outCas.getDocumentText());
        outCas.release();
        try {
          UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
        }
        //check that FlowController was notified twice, once for the
        //segment's flow and once for the complete document's flow
        assertEquals(2, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));

        cas.reset();
      }

      // nested aggregate
      AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/NestedAggregateSegmenterForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
      cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.reset();
        cas.setDocumentText("Line one\nLine two\nERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("Line one", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Line two", outCas.getDocumentText());
        outCas.release();
        try {
          UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
        }
        //check that FlowController was notified three times, once for the
        //segment's flow and twice for the complete document's flow (once
        //in each aggregate)
        assertEquals(3, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
        FlowControllerForErrorTest.abortedDocuments.remove("Line one\nLine two\nERROR");
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
       
        cas.reset();
      }

      // 2 segmenters
      AnalysisEngineDescription twoSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/AggregateWith2SegmentersForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
      cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.abortedDocuments.clear();
        cas.setDocumentText("One\tTwo\nThree\tERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("One", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Two", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Three", outCas.getDocumentText());
        outCas.release();
        try {
          UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
        }
        //check that FlowController was notified three times, once for each level of granularity
        assertEquals(3, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Three\tERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("One\tTwo\nThree\tERROR"));
       
        cas.reset();
      }

      // segmenter that requests too many CASes
      AnalysisEngineDescription segmenterDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/BadSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      CAS outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(iter.hasNext());
      // next call should fail with AnalysisEngineProcessException
      try {
        UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
        iter.next();
        fail(); // should not get here
      } catch (AnalysisEngineProcessException e) {
        UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
      }
     
      // bad segmenter in an aggregate
      AnalysisEngineDescription aggWithBadSegmenterDesc = UIMAFramework.getXMLParser()
      .parseAnalysisEngineDescription(
              new XMLInputSource(JUnitExtension
                      .getFile("TextAnalysisEngineImplTest/AggregateWithBadSegmenterForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggWithBadSegmenterDesc);
      FlowControllerForErrorTest.reset();
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
      assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
      // next call should fail with AnalysisEngineProcessException
      try {
        UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
        if (iter.hasNext()) {
          iter.next();
        }
        fail(); // should not get here
      } catch (AnalysisEngineProcessException e) {
        UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
      }
      assertEquals(1, FlowControllerForErrorTest.abortedDocuments.size());
      assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nLine three"));
      assertEquals(1,FlowControllerForErrorTest.failedAEs.size());
      assertTrue(FlowControllerForErrorTest.failedAEs.contains("Segmenter"));

      //configure AE to continue after error
      ae = UIMAFramework.produceAnalysisEngine(aggWithBadSegmenterDesc);
      ae.setConfigParameterValue("ContinueOnFailure", Boolean.TRUE);
      ae.reconfigure();
      FlowControllerForErrorTest.reset();

      cas.reset();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
      assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
     
      //next call should not have aborted, but FC should have been notified of the failiure,
      // and no CAS should come back
      UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
      assertFalse(iter.hasNext());
      UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
      assertEquals(0, FlowControllerForErrorTest.abortedDocuments.size());
      assertEquals(1, FlowControllerForErrorTest.failedAEs.size());
      assertTrue(FlowControllerForErrorTest.failedAEs.contains("Segmenter"));
     
View Full Code Here

      FeatureStructure sdiFS3 = inputCas3.createFS(sdiType);
      sdiFS3.setStringValue(uriFeat, "cas3");
      inputCas3.getIndexRepository().addFS(sdiFS3);

      // input first CAS. Should be no segments yet.
      CasIterator iter = ae.processAndOutputNewCASes(inputCas1);
      assertFalse(iter.hasNext());
      // input second CAS. We should get back one segment.
      iter = ae.processAndOutputNewCASes(inputCas2);
      assertTrue(iter.hasNext());
      CAS outCas = iter.next();
      assertEquals("This is one.", outCas.getDocumentText());
      // -- check SourceDocumentInformation FSs
      Iterator<AnnotationFS> sdiIter = outCas.getAnnotationIndex(sdiType).iterator();
      assertTrue(sdiIter.hasNext());
      AnnotationFS outSdiFs = (AnnotationFS) sdiIter.next();
      assertEquals("This is", outSdiFs.getCoveredText());
      assertEquals("cas1", outSdiFs.getStringValue(uriFeat));
      assertTrue(sdiIter.hasNext());
      outSdiFs = (AnnotationFS) sdiIter.next();
      assertEquals(" one.", outSdiFs.getCoveredText());
      assertEquals("cas2", outSdiFs.getStringValue(uriFeat));
      assertFalse(sdiIter.hasNext());
      // --
      assertFalse(iter.hasNext());

      // input third CAS. We should get back one more segment.
      iter = ae.processAndOutputNewCASes(inputCas3);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("This is two.", outCas.getDocumentText());
      // -- check SourceDocumentInformation FSs
      sdiIter = outCas.getAnnotationIndex(sdiType).iterator();
      assertTrue(sdiIter.hasNext());
      outSdiFs = (AnnotationFS) sdiIter.next();
      assertEquals("This is", outSdiFs.getCoveredText());
      assertEquals("cas2", outSdiFs.getStringValue(uriFeat));
      assertTrue(sdiIter.hasNext());
      outSdiFs = (AnnotationFS) sdiIter.next();
      assertEquals(" two.", outSdiFs.getCoveredText());
      assertEquals("cas3", outSdiFs.getStringValue(uriFeat));
      assertFalse(sdiIter.hasNext());
      // --
      assertFalse(iter.hasNext());
    } catch (Exception e) {
      JUnitExtension.handleException(e);
    }
  }
View Full Code Here

                  if (rs != null) {
                    nextAe.setResultSpecification(rs);
                  }
                }
                // invoke next AE in flow
                CasIterator casIter = null;
                CAS outputCas = null; //used if the AE we call outputs a new CAS
                try {
                  casIter = nextAe.processAndOutputNewCASes(cas);
                  if (casIter.hasNext()) {
                    outputCas = casIter.next();
                  }
                }
                catch(Exception e) {
                  //ask the FlowController if we should continue
                  //TODO: should this be configurable?
                  if (!flow.continueOnFailure(nextAeKey, e)) {
                    throw e;
                  }
                  else {
                    UIMAFramework.getLogger(CLASS_NAME).logrb(Level.FINE, CLASS_NAME.getName(), "processUntilNextOutputCas",
                            LOG_RESOURCE_BUNDLE, "UIMA_continuing_after_exception__FINE", e);
                  }
                }
                if (outputCas != null) // new CASes are output
                {
                  // push the CasIterator, original CAS, and Flow onto a stack so we
                  // can get the other output CASes and the original CAS later
                  casIteratorStack.push(new StackFrame(casIter, cas, flow, nextAeKey));
                  // compute Flow for the output CAS
                  flow = flow.newCasProduced(outputCas, nextAeKey);
                  // now route the output CAS through the flow
                  cas = outputCas;
                  activeCASes.add(cas);
                } else {
                  // no new CASes are output; this cas is done being processed
                  // by that AnalysisEngine so clear the componentInfo
                  cas.setCurrentComponentInfo(null);
                }
              } else {
                throw new AnalysisEngineProcessException(
                        AnalysisEngineProcessException.UNKNOWN_ID_IN_SEQUENCE,
                        new Object[] { nextAeKey });
              }
            }
            //ParallelStep (TODO: refactor out common parts with SimpleStep?)
            else if (nextStep instanceof ParallelStep) {
              //create modifiable list of destinations
              List<String> destinations = new LinkedList<String>(((ParallelStep)nextStep).getAnalysisEngineKeys());
              //iterate over all destinations, removing them from the list as we go
              while (!destinations.isEmpty()) {
                String nextAeKey = destinations.get(0);
                destinations.remove(0);
                //execute this step as we would a single step
                AnalysisEngine nextAe = (AnalysisEngine) mComponentAnalysisEngineMap.get(nextAeKey);
                if (nextAe != null) {
                  // invoke next AE in flow
                  CasIterator casIter = null;
                  CAS outputCas = null; //used if the AE we call outputs a new CAS
                  try {
                    casIter = nextAe.processAndOutputNewCASes(cas);
                    if (casIter.hasNext()) {
                      outputCas = casIter.next();
                    }
                  }
                  catch(Exception e) {
                    //ask the FlowController if we should continue
                    //TODO: should this be configurable?
View Full Code Here

                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/NewlineSegmenter.xml")));
      AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
      CAS cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      CAS outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      outCas.release();
      assertFalse(iter.hasNext());

      // aggregate
      AnalysisEngineDescription aggSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);

      // nested aggregate
      AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/AggregateContainingAggregateSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);

      // two segmenters
      AnalysisEngineDescription twoSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWith2Segmenters.xml")));
      ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("One\tTwo\nThree\tFour");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("One", outCas.getDocumentText());
      assertEquals("One", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Two", outCas.getDocumentText());
      assertEquals("Two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Three", outCas.getDocumentText());
      assertEquals("Three", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Four", outCas.getDocumentText());
      assertEquals("Four", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Four", TestAnnotator.lastDocument);

      // dropping segments
      aggSegDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
              new XMLInputSource(JUnitExtension
                      .getFile("TextAnalysisEngineImplTest/AggregateSegmenterForDropTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nDROP\nLine two\nDROP\nLine three");
      // results should be the same as the first aggregate segmenter test.
      // segmetns whose text is DROP should not be output.
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);
     
      //with ParallelStep
      AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
        new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepCasMultiplierTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(desc);
      cas.reset();
      cas.setDocumentText("One\tTwo\nThree\tFour");
      iter = ae.processAndOutputNewCASes(cas);
      Set<String> expectedOutputs = new HashSet<String>();
      expectedOutputs.add("One");
      expectedOutputs.add("Two\nThree");
      expectedOutputs.add("Four");
      expectedOutputs.add("One\tTwo");
      expectedOutputs.add("Three\tFour");
      while (iter.hasNext()) {
        outCas = iter.next();
        assertTrue(expectedOutputs.remove(outCas.getDocumentText()));       
        outCas.release();
      }
      assertTrue(expectedOutputs.isEmpty());

     
      // test aggregate with 2 AEs sharing resource manager
      AnalysisEngineDescription aggregateSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
     
      ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager();
      Map<String, Object> params = new HashMap<String, Object>();
      AnalysisEngine ae1 = UIMAFramework.produceAnalysisEngine(aggregateSegDesc, rsrcMgr, params);
      AnalysisEngine ae2 = UIMAFramework.produceAnalysisEngine(aggregateSegDesc, rsrcMgr, params);
     
      // start with testing first ae
      CAS cas1 = ae1.newCAS();
      cas1.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter1 = ae1.processAndOutputNewCASes(cas1);
      assertTrue(iter1.hasNext());
      CAS outCas1 = iter1.next();
      assertEquals("Line one", outCas1.getDocumentText());
    
      // now test second ae
      CAS cas2 = ae2.newCAS();
      cas2.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter2 = ae2.processAndOutputNewCASes(cas2);
      assertTrue(iter2.hasNext());
      CAS outCas2 = iter2.next();
      assertEquals("Line one", outCas2.getDocumentText());
      outCas2.release();
      assertTrue(iter2.hasNext());
      outCas2 = iter2.next();
      assertEquals("Line two", outCas2.getDocumentText());
      outCas2.release();
      assertTrue(iter2.hasNext());
      outCas2 = iter2.next();
      assertEquals("Line three", outCas2.getDocumentText());
      outCas2.release();
      assertFalse(iter2.hasNext());
    
      // continue testing first ae
      outCas1.release();
      assertTrue(iter1.hasNext());
      outCas1 = iter1.next();
View Full Code Here

    String resourceName = getMetaData().getName();
    Logger logger = getLogger();
    logger.logrb(Level.FINE, CLASS_NAME.getName(), "process", LOG_RESOURCE_BUNDLE,
            "UIMA_analysis_engine_process_begin__FINE", resourceName);
    try {
      CasIterator iterator = _getASB().process(aCAS);

      // log end of event
      logger.logrb(Level.FINE, CLASS_NAME.getName(), "process", LOG_RESOURCE_BUNDLE,
              "UIMA_analysis_engine_process_end__FINE", resourceName);
      return iterator;
View Full Code Here

      CAS cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.reset();
        cas.setDocumentText("Line one\nLine two\nERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("Line one", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Line two", outCas.getDocumentText());
        outCas.release();
        try {
          UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
        }
        //check that FlowController was notified twice, once for the
        //segment's flow and once for the complete document's flow
        assertEquals(2, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));

        cas.reset();
      }

      // nested aggregate
      AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/NestedAggregateSegmenterForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
      cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.reset();
        cas.setDocumentText("Line one\nLine two\nERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("Line one", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Line two", outCas.getDocumentText());
        outCas.release();
        try {
          UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
        }
        //check that FlowController was notified three times, once for the
        //segment's flow and twice for the complete document's flow (once
        //in each aggregate)
        assertEquals(3, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
        FlowControllerForErrorTest.abortedDocuments.remove("Line one\nLine two\nERROR");
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
       
        cas.reset();
      }

      // 2 segmenters
      AnalysisEngineDescription twoSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/AggregateWith2SegmentersForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
      cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.abortedDocuments.clear();
        cas.setDocumentText("One\tTwo\nThree\tERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("One", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Two", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Three", outCas.getDocumentText());
        outCas.release();
        try {
          UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
        }
        //check that FlowController was notified three times, once for each level of granularity
        assertEquals(3, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Three\tERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("One\tTwo\nThree\tERROR"));
       
        cas.reset();
      }

      // segmenter that requests too many CASes
      AnalysisEngineDescription segmenterDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/BadSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      CAS outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(iter.hasNext());
      // next call should fail with AnalysisEngineProcessException
      try {
        UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
        iter.next();
        fail(); // should not get here
      } catch (AnalysisEngineProcessException e) {
        UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
      }
     
      // bad segmenter in an aggregate
      AnalysisEngineDescription aggWithBadSegmenterDesc = UIMAFramework.getXMLParser()
      .parseAnalysisEngineDescription(
              new XMLInputSource(JUnitExtension
                      .getFile("TextAnalysisEngineImplTest/AggregateWithBadSegmenterForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggWithBadSegmenterDesc);
      FlowControllerForErrorTest.reset();
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
      assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
      // next call should fail with AnalysisEngineProcessException
      try {
        UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
        if (iter.hasNext()) {
          iter.next();
        }
        fail(); // should not get here
      } catch (AnalysisEngineProcessException e) {
        UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
      }
      assertEquals(1, FlowControllerForErrorTest.abortedDocuments.size());
      assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nLine three"));
      assertEquals(1,FlowControllerForErrorTest.failedAEs.size());
      assertTrue(FlowControllerForErrorTest.failedAEs.contains("Segmenter"));

      //configure AE to continue after error
      ae = UIMAFramework.produceAnalysisEngine(aggWithBadSegmenterDesc);
      ae.setConfigParameterValue("ContinueOnFailure", Boolean.TRUE);
      ae.reconfigure();
      FlowControllerForErrorTest.reset();

      cas.reset();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
      assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
     
      //next call should not have aborted, but FC should have been notified of the failiure,
      // and no CAS should come back
      UIMAFramework.getLogger().setLevel(Level.OFF)// Suppress logging of expected exception
      assertFalse(iter.hasNext());
      UIMAFramework.getLogger().setLevel(Level.INFO); // Restore to apparent default of INFO
      assertEquals(0, FlowControllerForErrorTest.abortedDocuments.size());
      assertEquals(1, FlowControllerForErrorTest.failedAEs.size());
      assertTrue(FlowControllerForErrorTest.failedAEs.contains("Segmenter"));
     
View Full Code Here

      FeatureStructure sdiFS3 = inputCas3.createFS(sdiType);
      sdiFS3.setStringValue(uriFeat, "cas3");
      inputCas3.getIndexRepository().addFS(sdiFS3);

      // input first CAS. Should be no segments yet.
      CasIterator iter = ae.processAndOutputNewCASes(inputCas1);
      assertFalse(iter.hasNext());
      // input second CAS. We should get back one segment.
      iter = ae.processAndOutputNewCASes(inputCas2);
      assertTrue(iter.hasNext());
      CAS outCas = iter.next();
      assertEquals("This is one.", outCas.getDocumentText());
      // -- check SourceDocumentInformation FSs
      Iterator<AnnotationFS> sdiIter = outCas.getAnnotationIndex(sdiType).iterator();
      assertTrue(sdiIter.hasNext());
      AnnotationFS outSdiFs = (AnnotationFS) sdiIter.next();
      assertEquals("This is", outSdiFs.getCoveredText());
      assertEquals("cas1", outSdiFs.getStringValue(uriFeat));
      assertTrue(sdiIter.hasNext());
      outSdiFs = (AnnotationFS) sdiIter.next();
      assertEquals(" one.", outSdiFs.getCoveredText());
      assertEquals("cas2", outSdiFs.getStringValue(uriFeat));
      assertFalse(sdiIter.hasNext());
      // --
      assertFalse(iter.hasNext());

      // input third CAS. We should get back one more segment.
      iter = ae.processAndOutputNewCASes(inputCas3);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("This is two.", outCas.getDocumentText());
      // -- check SourceDocumentInformation FSs
      sdiIter = outCas.getAnnotationIndex(sdiType).iterator();
      assertTrue(sdiIter.hasNext());
      outSdiFs = (AnnotationFS) sdiIter.next();
      assertEquals("This is", outSdiFs.getCoveredText());
      assertEquals("cas2", outSdiFs.getStringValue(uriFeat));
      assertTrue(sdiIter.hasNext());
      outSdiFs = (AnnotationFS) sdiIter.next();
      assertEquals(" two.", outSdiFs.getCoveredText());
      assertEquals("cas3", outSdiFs.getStringValue(uriFeat));
      assertFalse(sdiIter.hasNext());
      // --
      assertFalse(iter.hasNext());
    } catch (Exception e) {
      JUnitExtension.handleException(e);
    }
  }
View Full Code Here

                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/NewlineSegmenter.xml")));
      AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
      CAS cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      CAS outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      outCas.release();
      assertFalse(iter.hasNext());

      // aggregate
      AnalysisEngineDescription aggSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);

      // nested aggregate
      AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/AggregateContainingAggregateSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);

      // two segmenters
      AnalysisEngineDescription twoSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWith2Segmenters.xml")));
      ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("One\tTwo\nThree\tFour");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("One", outCas.getDocumentText());
      assertEquals("One", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Two", outCas.getDocumentText());
      assertEquals("Two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Three", outCas.getDocumentText());
      assertEquals("Three", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Four", outCas.getDocumentText());
      assertEquals("Four", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Four", TestAnnotator.lastDocument);

      // dropping segments
      aggSegDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
              new XMLInputSource(JUnitExtension
                      .getFile("TextAnalysisEngineImplTest/AggregateSegmenterForDropTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggSegDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nDROP\nLine two\nDROP\nLine three");
      // results should be the same as the first aggregate segmenter test.
      // segmetns whose text is DROP should not be output.
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line one", outCas.getDocumentText());
      assertEquals("Line one", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line two", outCas.getDocumentText());
      assertEquals("Line two", TestAnnotator.lastDocument);
      outCas.release();
      assertTrue(iter.hasNext());
      outCas = iter.next();
      assertEquals("Line three", outCas.getDocumentText());
      assertEquals("Line three", TestAnnotator.lastDocument);
      outCas.release();
      assertFalse(iter.hasNext());
      // Annotator should NOT get the original CAS according to the default flow
      assertEquals("Line three", TestAnnotator.lastDocument);
     
      //with ParallelStep
      AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
        new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepCasMultiplierTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(desc);
      cas.reset();
      cas.setDocumentText("One\tTwo\nThree\tFour");
      iter = ae.processAndOutputNewCASes(cas);
      Set<String> expectedOutputs = new HashSet<String>();
      expectedOutputs.add("One");
      expectedOutputs.add("Two\nThree");
      expectedOutputs.add("Four");
      expectedOutputs.add("One\tTwo");
      expectedOutputs.add("Three\tFour");
      while (iter.hasNext()) {
        outCas = iter.next();
        assertTrue(expectedOutputs.remove(outCas.getDocumentText()));       
        outCas.release();
      }
      assertTrue(expectedOutputs.isEmpty());

     
      // test aggregate with 2 AEs sharing resource manager
      AnalysisEngineDescription aggregateSegDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/AggregateWithSegmenter.xml")));
     
      ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager();
      Map<String, Object> params = new HashMap<String, Object>();
      AnalysisEngine ae1 = UIMAFramework.produceAnalysisEngine(aggregateSegDesc, rsrcMgr, params);
      AnalysisEngine ae2 = UIMAFramework.produceAnalysisEngine(aggregateSegDesc, rsrcMgr, params);
     
      // start with testing first ae
      CAS cas1 = ae1.newCAS();
      cas1.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter1 = ae1.processAndOutputNewCASes(cas1);
      assertTrue(iter1.hasNext());
      CAS outCas1 = iter1.next();
      assertEquals("Line one", outCas1.getDocumentText());
    
      // now test second ae
      CAS cas2 = ae2.newCAS();
      cas2.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter2 = ae2.processAndOutputNewCASes(cas2);
      assertTrue(iter2.hasNext());
      CAS outCas2 = iter2.next();
      assertEquals("Line one", outCas2.getDocumentText());
      outCas2.release();
      assertTrue(iter2.hasNext());
      outCas2 = iter2.next();
      assertEquals("Line two", outCas2.getDocumentText());
      outCas2.release();
      assertTrue(iter2.hasNext());
      outCas2 = iter2.next();
      assertEquals("Line three", outCas2.getDocumentText());
      outCas2.release();
      assertFalse(iter2.hasNext());
    
      // continue testing first ae
      outCas1.release();
      assertTrue(iter1.hasNext());
      outCas1 = iter1.next();
View Full Code Here

      CAS cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.reset();
        cas.setDocumentText("Line one\nLine two\nERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("Line one", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Line two", outCas.getDocumentText());
        outCas.release();
        try {
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          //do nothing
        }
        //check that FlowController was notified twice, once for the
        //segment's flow and once for the complete document's flow
        assertEquals(2, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));

        cas.reset();
      }

      // nested aggregate
      AnalysisEngineDescription nestedAggSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/NestedAggregateSegmenterForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(nestedAggSegDesc);
      cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.reset();
        cas.setDocumentText("Line one\nLine two\nERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("Line one", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Line two", outCas.getDocumentText());
        outCas.release();
        try {
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          //do nothing
        }
        //check that FlowController was notified three times, once for the
        //segment's flow and twice for the complete document's flow (once
        //in each aggregate)
        assertEquals(3, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
        FlowControllerForErrorTest.abortedDocuments.remove("Line one\nLine two\nERROR");
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nERROR"));
       
        cas.reset();
      }

      // 2 segmenters
      AnalysisEngineDescription twoSegDesc = UIMAFramework
              .getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(
                              JUnitExtension
                                      .getFile("TextAnalysisEngineImplTest/AggregateWith2SegmentersForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(twoSegDesc);
      cas = ae.newCAS();
      for (int i = 0; i < 2; i++) // verify we can do this more than once
      {
        FlowControllerForErrorTest.abortedDocuments.clear();
        cas.setDocumentText("One\tTwo\nThree\tERROR");
        CasIterator iter = ae.processAndOutputNewCASes(cas);
        assertTrue(iter.hasNext());
        CAS outCas = iter.next();
        assertEquals("One", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Two", outCas.getDocumentText());
        outCas.release();
        assertTrue(iter.hasNext());
        outCas = iter.next();
        assertEquals("Three", outCas.getDocumentText());
        outCas.release();
        try {
          assertTrue(iter.hasNext());
          outCas = iter.next();
          fail(); // the above should throw an exception
        } catch (AnalysisEngineProcessException e) {
          //do nothing
        }
        //check that FlowController was notified three times, once for each level of granularity
        assertEquals(3, FlowControllerForErrorTest.abortedDocuments.size());
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("ERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Three\tERROR"));
        assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("One\tTwo\nThree\tERROR"));
       
        cas.reset();
      }

      // segmenter that requests too many CASes
      AnalysisEngineDescription segmenterDesc = UIMAFramework.getXMLParser()
              .parseAnalysisEngineDescription(
                      new XMLInputSource(JUnitExtension
                              .getFile("TextAnalysisEngineImplTest/BadSegmenter.xml")));
      ae = UIMAFramework.produceAnalysisEngine(segmenterDesc);
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      CasIterator iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      CAS outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(iter.hasNext());
      // next call should fail with AnalysisEngineProcessException
      try {
        iter.next();
        fail(); // should not get here
      } catch (AnalysisEngineProcessException e) {
        // should get here
      }
     
      // bad segmenter in an aggregate
      AnalysisEngineDescription aggWithBadSegmenterDesc = UIMAFramework.getXMLParser()
      .parseAnalysisEngineDescription(
              new XMLInputSource(JUnitExtension
                      .getFile("TextAnalysisEngineImplTest/AggregateWithBadSegmenterForErrorTest.xml")));
      ae = UIMAFramework.produceAnalysisEngine(aggWithBadSegmenterDesc);
      FlowControllerForErrorTest.reset();
      cas = ae.newCAS();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
      assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
      // next call should fail with AnalysisEngineProcessException
      try {
        if (iter.hasNext()) {
          iter.next();
        }
        fail(); // should not get here
      } catch (AnalysisEngineProcessException e) {
        // should get here
      }
      assertEquals(1, FlowControllerForErrorTest.abortedDocuments.size());
      assertTrue(FlowControllerForErrorTest.abortedDocuments.contains("Line one\nLine two\nLine three"));
      assertEquals(1,FlowControllerForErrorTest.failedAEs.size());
      assertTrue(FlowControllerForErrorTest.failedAEs.contains("Segmenter"));

      //configure AE to continue after error
      ae = UIMAFramework.produceAnalysisEngine(aggWithBadSegmenterDesc);
      ae.setConfigParameterValue("ContinueOnFailure", Boolean.TRUE);
      ae.reconfigure();
      FlowControllerForErrorTest.reset();

      cas.reset();
      cas.setDocumentText("Line one\nLine two\nLine three");
      iter = ae.processAndOutputNewCASes(cas);
      assertTrue(iter.hasNext());
      outCas = iter.next(); // first call OK
      outCas.release();
      assertTrue(FlowControllerForErrorTest.abortedDocuments.isEmpty());
      assertTrue(FlowControllerForErrorTest.failedAEs.isEmpty());
     
      //next call should not have aborted, but FC should have been notified of the failiure,
      // and no CAS should come back
      assertFalse(iter.hasNext());
      assertEquals(0, FlowControllerForErrorTest.abortedDocuments.size());
      assertEquals(1, FlowControllerForErrorTest.failedAEs.size());
      assertTrue(FlowControllerForErrorTest.failedAEs.contains("Segmenter"));
     
     
View Full Code Here

TOP

Related Classes of org.apache.uima.analysis_engine.CasIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.