Package org.apache.uima.examples

Examples of org.apache.uima.examples.SourceDocumentInformation


      mBuf.append(mDoc.substring(mCurIndex));
      if (mJCases[mActiveJCas] == null) {
        mJCases[mActiveJCas] = getEmptyJCas();
      }
      // add SourceDocumentInformation to active JCas
      SourceDocumentInformation sdi = new SourceDocumentInformation(mJCases[mActiveJCas]);
      sdi.setBegin(begin);
      sdi.setEnd(mBuf.length());
      sdi.setUri(getCasSourceUri(mCurrentInputCas));
      sdi.addToIndexes();
      mHasNext = false; // we need to see another input CAS before we can create output
      mCurIndex = 0;
    } else // yes, newline
    {
      // append doc up to newline
      int begin = mBuf.length(); // record start offset of new text
      mBuf.append(mDoc.substring(mCurIndex, nlIndex));
      if (mJCases[mActiveJCas] == null) {
        mJCases[mActiveJCas] = getEmptyJCas();
      }
      // add SourceDocumentInformation to active JCas
      SourceDocumentInformation sdi = new SourceDocumentInformation(mJCases[mActiveJCas]);
      sdi.setBegin(begin);
      sdi.setEnd(mBuf.length());
      sdi.setUri(getCasSourceUri(mCurrentInputCas));
      sdi.addToIndexes();
      // set doc text
      mJCases[mActiveJCas].setDocumentText(mBuf.toString());
      mBuf.setLength(0);
      mCurIndex = nlIndex + 1;
      mHasNext = true; // ready to output!
View Full Code Here


  private String getCasSourceUri(JCas jcas) {
    Iterator iter = jcas.getJFSIndexRepository().getAnnotationIndex(SourceDocumentInformation.type)
            .iterator();
    if (iter.hasNext()) {
      SourceDocumentInformation sdi = (SourceDocumentInformation) iter.next();
      return sdi.getUri();
    } else {
      return "unknown";
    }
  }
View Full Code Here

    // Also store location of source document in CAS. This information is critical
    // if CAS Consumers will need to know where the original document contents are located.
    // For example, the Semantic Search CAS Indexer writes this information into the
    // search index that it creates, which allows applications that use the search index to
    // locate the documents that satisfy their semantic queries.
    SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
    srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
    srcDocInfo.setOffsetInSource(0);
    srcDocInfo.setDocumentSize((int) file.length());
    srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
    srcDocInfo.addToIndexes();
  }
View Full Code Here

    // retreive the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (MalformedURLException e1) {
View Full Code Here

    // retreive the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (MalformedURLException e1) {
View Full Code Here

      // Also store location of source document in CAS. This information is critical
      // if CAS Consumers will need to know where the original document contents are located.
      // For example, the Semantic Search CAS Indexer writes this information into the
      // search index that it creates, which allows applications that use the search index to
      // locate the documents that satisfy their semantic queries.
      SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
      srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
      srcDocInfo.setOffsetInSource(0);
      srcDocInfo.setDocumentSize((int) file.length());
      srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
      srcDocInfo.addToIndexes();
    }
    // XCAS input files
    else {
      try {
        if (mXCAS.equalsIgnoreCase("xmi")) {
View Full Code Here

        if (it.hasNext()) {
          // get the output file name from the annotation in the CAS ...
          // ... note this is a little flakey if processing an XCAS file,
          // which could have such an annotation with a different name than the input XCAS file!
          // So we don't do this if XCAS output is specified.
          SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
          File inFile;
          inFile = new File(new URL(fileLoc.getUri()).getPath());
          outFile = new File(mOutputDir, inFile.getName());
        }
      } catch (CASRuntimeException e) {
        // default Sofa name does not exist, use default processing below
      } catch (CASException e) {
View Full Code Here

      // Also store location of source document in CAS. This information is critical
      // if CAS Consumers will need to know where the original document contents are located.
      // For example, the Semantic Search CAS Indexer writes this information into the
      // search index that it creates, which allows applications that use the search index to
      // locate the documents that satisfy their semantic queries.
      SourceDocumentInformation srcDocInfo = new SourceDocumentInformation(jcas);
      srcDocInfo.setUri(file.getAbsoluteFile().toURL().toString());
      srcDocInfo.setOffsetInSource(0);
      srcDocInfo.setDocumentSize((int) file.length());
      srcDocInfo.setLastSegment(mCurrentIndex == mFiles.size());
      srcDocInfo.addToIndexes();
    }
    // XCAS input files
    else {
      try {
        XCASDeserializer.deserialize(fis, aCAS);
View Full Code Here

    // retreive the filename of the input file from the CAS
    FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
    File outFile = null;
    if (it.hasNext()) {
      SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next();
      File inFile;
      try {
        inFile = new File(new URL(fileLoc.getUri()).getPath());
        String outFileName = inFile.getName();
        if (fileLoc.getOffsetInSource() > 0) {
          outFileName += ("_" + fileLoc.getOffsetInSource());
        }
        outFileName += ".xmi";
        outFile = new File(mOutputDir, outFileName);
        modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore";
      } catch (MalformedURLException e1) {
View Full Code Here

    {
      logger.info("AnnotationIndex's iterator returned empty iterator. continuing...");
      return;
    }
   
    SourceDocumentInformation sdi = (SourceDocumentInformation)iterator.next();
    String fullUriString = sdi.getUri();
    URI fullUri;
    URL fullUrl;
    try
    {
      fullUri = new URI(fullUriString);
View Full Code Here

TOP

Related Classes of org.apache.uima.examples.SourceDocumentInformation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.