Package org.apache.uima.util

Source Code of org.apache.uima.util.CasCopier

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.util;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.apache.uima.UIMARuntimeException;
import org.apache.uima.cas.ArrayFS;
import org.apache.uima.cas.BooleanArrayFS;
import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.DoubleArrayFS;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.FloatArrayFS;
import org.apache.uima.cas.IntArrayFS;
import org.apache.uima.cas.LongArrayFS;
import org.apache.uima.cas.ShortArrayFS;
import org.apache.uima.cas.SofaFS;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.cas.impl.LowLevelCAS;
import org.apache.uima.cas.text.AnnotationFS;

/**
* Utility class for doing deep copies of FeatureStructures from one CAS to another. To handle cases
* where the source CAS has multiple references to the same FS, you can create one instance of
* CasCopier and use it to copy multiple FeatureStructures. The CasCopier will remember previously
* copied FeatureStructures, so if you later copy another FS that has a reference to a previously
* copied FS, it will not duplicate the multiply-referenced FS.
*/
public class CasCopier {
  private CAS mSrcCas;
  private CAS mDestCas;
  private LowLevelCAS mLowLevelDestCas;
  private Feature mDestSofaFeature;

  private Map<FeatureStructure, FeatureStructure> mFsMap = new HashMap<FeatureStructure, FeatureStructure>();

  /**
   * Creates a new CasCopier that can be used to copy FeatureStructures from one CAS to another.
   * Note that if you are merging data from multiple CASes, you must create a new CasCopier
   * for each source CAS.
   *
   * @param aSrcCas
   *          the CAS to copy from.
   * @param aDestCas
   *          the CAS to copy into.
   */
  public CasCopier(CAS aSrcCas, CAS aDestCas) {
    mSrcCas = aSrcCas;
    mDestCas = aDestCas;
    mLowLevelDestCas = aDestCas.getLowLevelCAS();
    mDestSofaFeature = aDestCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);   
  }
 
  /**
   * Does a complete deep copy of one CAS into another CAS.  The contents of each view
   * in the source CAS will be copied to the same-named view in the destination CAS.  If
   * the view does not already exist it will be created.  All FeatureStructures that are
   * indexed in a view in the source CAS will become indexed in the same-named view in the
   * destination CAS.
   *
   * @param aSrcCas
   *          the CAS to copy from
   * @param aDestCas
   *          the CAS to copy to
   * @param aCopySofa
   *          if true, the sofa data and mimeType of each view will be copied. If false they will not.
   */ 
  public static void copyCas(CAS aSrcCas, CAS aDestCas, boolean aCopySofa) {
    CasCopier copier = new CasCopier(aSrcCas, aDestCas);
   
    Iterator<SofaFS> sofaIter = aSrcCas.getSofaIterator();
    while (sofaIter.hasNext()) {
      SofaFS sofa = sofaIter.next();
      CAS view = aSrcCas.getView(sofa);
      copier.copyCasView(view, aCopySofa);
    }
  }

  /**
   * Does a deep copy of the contents of one CAS View into another CAS.
   * If a view with the same name as <code>aSrcCasView</code> exists in the destination CAS,
   * then it will be the target of the copy.  Otherwise, a new view will be created with
   * that name and will become the target of the copy.  All FeatureStructures that are indexed
   * in the source CAS view will become indexed in the target view.
   *
   * @param aSrcCasView
   *          the CAS to copy from
   * @param aCopySofa
   *          if true, the sofa data and mimeType will be copied. If false they will not.
   */
  public void copyCasView(CAS aSrcCasView, boolean aCopySofa) {
    //get or create the target view
    CAS targetView = getOrCreateView(mDestCas, aSrcCasView.getViewName());
   
    if (aCopySofa) {
      // can't copy the SofaFS - just copy the sofa data and mime type
      String sofaMime = aSrcCasView.getSofa().getSofaMime();
      if (aSrcCasView.getDocumentText() != null) {
        targetView.setSofaDataString(aSrcCasView.getDocumentText(), sofaMime);
      } else if (aSrcCasView.getSofaDataURI() != null) {
        targetView.setSofaDataURI(aSrcCasView.getSofaDataURI(), sofaMime);
      } else if (aSrcCasView.getSofaDataArray() != null) {
        targetView.setSofaDataArray(copyFs(aSrcCasView.getSofaDataArray()), sofaMime);
      }
    }

    // now copy indexed FS, but keep track so we don't index anything more than once
    Set<FeatureStructure> indexedFs = new HashSet<FeatureStructure>();
    Iterator<FSIndex<FeatureStructure>> indexes = aSrcCasView.getIndexRepository().getIndexes();
    while (indexes.hasNext()) {
      FSIndex<FeatureStructure> index = indexes.next();
      Iterator<FeatureStructure> iter = index.iterator();
      while (iter.hasNext()) {
        FeatureStructure fs = iter.next();
        if (!indexedFs.contains(fs)) {
          FeatureStructure copyOfFs = copyFs(fs);
          //check for annotations with null Sofa reference - this can happen
          //if the annotations were created with the Low Level CAS API.  If the
          //Sofa reference isn't set, attempting to add the FS to the indexes
          //will fail.
          if (fs instanceof AnnotationFS) {
              FeatureStructure sofa =((AnnotationFS)copyOfFs).getFeatureValue(mDestSofaFeature);
              if (sofa == null) {
                copyOfFs.setFeatureValue(mDestSofaFeature, targetView.getSofa());              
              }
          }
          // also don't index the DocumentAnnotation (it's indexed by default)
          if (!isDocumentAnnotation(copyOfFs)) {
            targetView.addFsToIndexes(copyOfFs);
          }
          indexedFs.add(fs);
        }
      }
    }
  }


  /**
   * Copies an FS from the source CAS to the destination CAS. Also copies any referenced FS, except
   * that previously copied FS will not be copied again.
   *
   * @param aFS
   *          the FS to copy. Must be contained within the source CAS.
   * @return the copy of <code>aFS</code> in the target CAS.
   */
  public FeatureStructure copyFs(FeatureStructure aFS) {
    //FS must be in the source CAS
    assert ((CASImpl)aFS.getCAS()).getBaseCAS() == ((CASImpl)mSrcCas).getBaseCAS();

    // check if we already copied this FS
    FeatureStructure copy = (FeatureStructure) mFsMap.get(aFS);
    if (copy != null)
      return copy;

    // get the type of the FS
    Type srcType = aFS.getType();

    // Certain types need to be handled specially

    // Sofa - cannot be created by normal methods.  Instead, we return the Sofa with the
    // same Sofa ID in the target CAS.  If it does not exist it will be created.
    if (aFS instanceof SofaFS) {
      String sofaId = ((SofaFS)aFS).getSofaID();
      return getOrCreateView(mDestCas, sofaId).getSofa();
    }

    // DocumentAnnotation - instead of creating a new instance, reuse the automatically created
    // instance in the destination view.
    if (isDocumentAnnotation(aFS)) {        
      String viewName = ((AnnotationFS)aFS).getView().getViewName();
      CAS destView = mDestCas.getView(viewName);
      FeatureStructure destDocAnnot = destView.getDocumentAnnotation();
      if (destDocAnnot != null) {
        copyFeatures(aFS, destDocAnnot);
      }
      return destDocAnnot;
    }

    // Arrays - need to be created a populated differently than "normal" FS
    if (aFS.getType().isArray()) {
      copy = copyArray(aFS);
      mFsMap.put(aFS, copy);
      return copy;
    }

    // create a new FS of the same type in the target CAS
    Type destType;
    if (mDestCas.getTypeSystem() == mSrcCas.getTypeSystem()) {
      //optimize type lookup if type systems are identical
      destType = srcType;
    } else {
      destType = mDestCas.getTypeSystem().getType(srcType.getName());
    }
    if (destType == null) {
      throw new UIMARuntimeException(UIMARuntimeException.TYPE_NOT_FOUND_DURING_CAS_COPY,
              new Object[] { srcType.getName() });
    }
    //We need to use the LowLevel CAS interface to create the FS, because the usual
    //CAS.createFS() call doesn't allow us to create subtypes of AnnotationBase from
    //a base CAS.  In any case we don't need the Sofa reference to be automatically
    //set because we'll set it manually when in the copyFeatures method.
    int typeCode = mLowLevelDestCas.ll_getTypeSystem().ll_getCodeForType(destType);
    int destFsAddr = mLowLevelDestCas.ll_createFS(typeCode);
    FeatureStructure destFs = mDestCas.getLowLevelCAS().ll_getFSForRef(destFsAddr);

    // add to map so we don't try to copy this more than once
    mFsMap.put(aFS, destFs);

    copyFeatures(aFS, destFs);
    return destFs;
  }

  /**
   * Copy feature values from one FS to another. For reference-valued features, this does a deep
   * copy.
   *
   * @param aSrcFS
   *          FeatureStructure to copy from
   * @param aDestFS
   *          FeatureStructure to copy to
   */
  private void copyFeatures(FeatureStructure aSrcFS, FeatureStructure aDestFS) {
    // set feature values
    Type srcType = aSrcFS.getType();
    Type destType = aDestFS.getType();
    for (Feature srcFeat : srcType.getFeatures()) {     
      Feature destFeat;
      if (destType == aSrcFS.getType()) {
        // sharing same type system, so destFeat == srcFeat
        destFeat = srcFeat;
      } else {
        // not sharing same type system, so do a name loop up in destination type system
        destFeat = destType.getFeatureByBaseName(srcFeat.getShortName());
        if (destFeat == null) {
          throw new UIMARuntimeException(UIMARuntimeException.TYPE_NOT_FOUND_DURING_CAS_COPY,
                  new Object[] { srcFeat.getName() });
        }
      }

      // copy primitive values using their string representation
      // TODO: could be optimized but this code would be very messy if we have to
      // enumerate all possible primitive types. Maybe LowLevel CAS API could help?
      if (srcFeat.getRange().isPrimitive()) {
        aDestFS.setFeatureValueFromString(destFeat, aSrcFS.getFeatureValueAsString(srcFeat));
      } else {
        // recursive copy
        FeatureStructure refFS = aSrcFS.getFeatureValue(srcFeat);
        if (refFS != null) {
          FeatureStructure copyRefFs = copyFs(refFS);
          aDestFS.setFeatureValue(destFeat, copyRefFs);
        }
      }
    }
  }

  /**
   * Returns whether the given FS has already been copied using this CasCopier.
   *
   * @param aFS
   * @return
   */
  public boolean alreadyCopied(FeatureStructure aFS) {
    return mFsMap.containsKey(aFS);
  }

  /**
   * @param arrayFS
   * @return
   */
  private FeatureStructure copyArray(FeatureStructure aSrcFs) {
    // TODO: there should be a way to do this without enumerating all the array types!
    if (aSrcFs instanceof StringArrayFS) {
      StringArrayFS arrayFs = (StringArrayFS) aSrcFs;
      int len = arrayFs.size();
      StringArrayFS destFS = mDestCas.createStringArrayFS(len);
      for (int i = 0; i < len; i++) {
        destFS.set(i, arrayFs.get(i));
      }
      return destFS;
    }
    if (aSrcFs instanceof IntArrayFS) {
      IntArrayFS arrayFs = (IntArrayFS) aSrcFs;
      int len = arrayFs.size();
      IntArrayFS destFS = mDestCas.createIntArrayFS(len);
      for (int i = 0; i < len; i++) {
        destFS.set(i, arrayFs.get(i));
      }
      return destFS;
    }
    if (aSrcFs instanceof ByteArrayFS) {
      ByteArrayFS arrayFs = (ByteArrayFS) aSrcFs;
      int len = arrayFs.size();
      ByteArrayFS destFS = mDestCas.createByteArrayFS(len);
      for (int i = 0; i < len; i++) {
        destFS.set(i, arrayFs.get(i));
      }
      return destFS;
    }
    if (aSrcFs instanceof ShortArrayFS) {
      ShortArrayFS arrayFs = (ShortArrayFS) aSrcFs;
      int len = arrayFs.size();
      ShortArrayFS destFS = mDestCas.createShortArrayFS(len);
      for (int i = 0; i < len; i++) {
        destFS.set(i, arrayFs.get(i));
      }
      return destFS;
    }
    if (aSrcFs instanceof LongArrayFS) {
      LongArrayFS arrayFs = (LongArrayFS) aSrcFs;
      int len = arrayFs.size();
      LongArrayFS destFS = mDestCas.createLongArrayFS(len);
      for (int i = 0; i < len; i++) {
        destFS.set(i, arrayFs.get(i));
      }
      return destFS;
    }
    if (aSrcFs instanceof FloatArrayFS) {
      FloatArrayFS arrayFs = (FloatArrayFS) aSrcFs;
      int len = arrayFs.size();
      FloatArrayFS destFS = mDestCas.createFloatArrayFS(len);
      for (int i = 0; i < len; i++) {
        destFS.set(i, arrayFs.get(i));
      }
      return destFS;
    }
    if (aSrcFs instanceof DoubleArrayFS) {
      DoubleArrayFS arrayFs = (DoubleArrayFS) aSrcFs;
      int len = arrayFs.size();
      DoubleArrayFS destFS = mDestCas.createDoubleArrayFS(len);
      for (int i = 0; i < len; i++) {
        destFS.set(i, arrayFs.get(i));
      }
      return destFS;
    }
    if (aSrcFs instanceof BooleanArrayFS) {
      BooleanArrayFS arrayFs = (BooleanArrayFS) aSrcFs;
      int len = arrayFs.size();
      BooleanArrayFS destFS = mDestCas.createBooleanArrayFS(len);
      for (int i = 0; i < len; i++) {
        destFS.set(i, arrayFs.get(i));
      }
      return destFS;
    }
    if (aSrcFs instanceof ArrayFS) {
      ArrayFS arrayFs = (ArrayFS) aSrcFs;
      int len = arrayFs.size();
      ArrayFS destFS = mDestCas.createArrayFS(len);
      for (int i = 0; i < len; i++) {
        FeatureStructure srcElem = arrayFs.get(i);
        if (srcElem != null) {
          FeatureStructure copyElem = copyFs(arrayFs.get(i));
          destFS.set(i, copyElem);
        }
      }
      return destFS;
    }
    assert false; // the set of array types should be exhaustive, so we should never get here
    return null;
  }
 
  /**
   * Gets the named view; if the view doesn't exist it will be created.
   */
  private static CAS getOrCreateView(CAS aCas, String aViewName) {
    //TODO: there should be some way to do this without the try...catch
    try {
      return aCas.getView(aViewName);
    }
    catch(CASRuntimeException e) {
      //create the view
      return aCas.createView(aViewName);
    }
  } 
 
  /**
   * Determines whether the given FS is the DocumentAnnotation for its view. 
   * This is more than just a type check; we actually check if it is the one "special"
   * DocumentAnnotation that CAS.getDocumentAnnotation() would return.
   */
  private static boolean isDocumentAnnotation(FeatureStructure aFS) {
    return (aFS instanceof AnnotationFS) &&
      aFS.equals(((AnnotationFS)aFS).getView().getDocumentAnnotation());
  }
}
TOP

Related Classes of org.apache.uima.util.CasCopier

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.