Examples of org.apache.uima.examples.SourceDocumentInformation

org.apache.uima.examples.SourceDocumentInformation
Stores detailed information about the original source document from which the current CAS was initialized. All information (like size) refers to the source document and not to the document in the CAS which may be converted and filtered by a CAS Initializer. For example this information will be written to the Semantic Search index so that the original document contents can be retrieved by queries. Updated by JCasGen Wed Nov 22 16:51:13 EST 2006 XML source: C:/alally/dev/workspace_apache/uimaj-examples/src/main/resources/org/apache/uima/examples/SourceDocumentInformation.xml @generated

      mBuf.append(mDoc.substring(mCurIndex));
      if (mJCases[mActiveJCas] == null) {
        mJCases[mActiveJCas] = getEmptyJCas();
      }
      // add SourceDocumentInformation to active JCas
      SourceDocumentInformation sdi = new SourceDocumentInformation(mJCases[mActiveJCas]);
      sdi.setBegin(begin);
      sdi.setEnd(mBuf.length());
      sdi.setUri(getCasSourceUri(mCurrentInputCas));
      sdi.addToIndexes();
      mHasNext = false; // we need to see another input CAS before we can create output
      mCurIndex = 0;
    } else // yes, newline
    {
      // append doc up to newline
      int begin = mBuf.length(); // record start offset of new text
      mBuf.append(mDoc.substring(mCurIndex, nlIndex));
      if (mJCases[mActiveJCas] == null) {
        mJCases[mActiveJCas] = getEmptyJCas();
      }
      // add SourceDocumentInformation to active JCas
      SourceDocumentInformation sdi = new SourceDocumentInformation(mJCases[mActiveJCas]);
      sdi.setBegin(begin);
      sdi.setEnd(mBuf.length());
      sdi.setUri(getCasSourceUri(mCurrentInputCas));
      sdi.addToIndexes();
      // set doc text
      mJCases[mActiveJCas].setDocumentText(mBuf.toString());
      mBuf.setLength(0);
      mCurIndex = nlIndex + 1;
      mHasNext = true; // ready to output!

View Full Code Here


  private String getCasSourceUri(JCas jcas) {
    Iterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(SourceDocumentInformation.type)
            .iterator();
    if (iter.hasNext()) {
      SourceDocumentInformation sdi = (SourceDocumentInformation) iter.next();
      return sdi.getUri();
    } else {
      return "unknown";
    }
  }

View Full Code Here

    // Also store location of source document in CAS. This information is critical
    // if CAS Consumers will need to know where the original document contents are located.
    // For example, the Semantic Search CAS Indexer writes this information into the
    // search index that it creates, which allows applications that use the search index to
    // locate the documents that satisfy their semantic queries.
    SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
    srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
    srcDocInfo.setOffsetInSource(0);
    srcDocInfo.setDocumentSize((int) file.length());
    srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
    srcDocInfo.addToIndexes();
  }

View Full Code Here


    // retreive the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (MalformedURLException e1) {

View Full Code Here


    // retreive the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (MalformedURLException e1) {

View Full Code Here

      // Also store location of source document in CAS. This information is critical
      // if CAS Consumers will need to know where the original document contents are located.
      // For example, the Semantic Search CAS Indexer writes this information into the
      // search index that it creates, which allows applications that use the search index to
      // locate the documents that satisfy their semantic queries.
      SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
      srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
      srcDocInfo.setOffsetInSource(0);
      srcDocInfo.setDocumentSize((int) file.length());
      srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
      srcDocInfo.addToIndexes();
    }
    // XCAS input files
    else {
      try {
        if (mXCAS.equalsIgnoreCase("xmi")) {

View Full Code Here

        if (it.hasNext()) {
          // get the output file name from the annotation in the CAS ...
          // ... note this is a little flakey if processing an XCAS file,
          // which could have such an annotation with a different name than the input XCAS file!
          // So we don't do this if XCAS output is specified.
          SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
          File inFile;
          inFile = new File(new URL(fileLoc.getUri()).getPath());
          outFile = new File(mOutputDir, inFile.getName());
        }
      } catch (CASRuntimeException e) {
        // default Sofa name does not exist, use default processing below
      } catch (CASException e) {

View Full Code Here

      // Also store location of source document in CAS. This information is critical
      // if CAS Consumers will need to know where the original document contents are located.
      // For example, the Semantic Search CAS Indexer writes this information into the
      // search index that it creates, which allows applications that use the search index to
      // locate the documents that satisfy their semantic queries.
      SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
      srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
      srcDocInfo.setOffsetInSource(0);
      srcDocInfo.setDocumentSize((int) file.length());
      srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
      srcDocInfo.addToIndexes();
    }
    // XCAS input files
    else {
      try {
        XCASDeserializer.deserialize(fis, aCAS);

View Full Code Here


    // retreive the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (MalformedURLException e1) {

View Full Code Here

    {
      logger.info("AnnotationIndex's iterator returned empty iterator. continuing...");
      return;
    }
    
    SourceDocumentInformation sdi = (SourceDocumentInformation)iterator.next();
    String fullUriString = sdi.getUri();
    URI fullUri;
    URL fullUrl;
    try
    {
      fullUri = new URI(fullUriString);

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.apache.uima.examples.SourceDocumentInformation

org.apache.uima.ae.noop.SimpleAnnotator

org.apache.uima.analysis_engine.impl.NewlineResegmenter

org.apache.uima.examples.casMultiplier.SimpleTextMerger

org.apache.uima.examples.casMultiplier.SimpleTextSegmenter

org.apache.uima.examples.cpe.AnnotationPrinter

org.apache.uima.examples.cpe.FileSystemCollectionReader

org.apache.uima.examples.cpe.InlineXmlCasConsumer

org.apache.uima.examples.cpe.PersonTitleDBWriterCasConsumer

org.apache.uima.examples.cpe.XCasWriterCasConsumer

org.apache.uima.examples.xmi.XmiEcoreCasConsumer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.