Package org.apache.uima.cas.impl

Source Code of org.apache.uima.cas.impl.XmiCasDeserializer$XmiCasDeserializerHandler

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.cas.impl;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.TreeMap;

import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.FSIndexRepository;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.SofaFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
import org.apache.uima.internal.util.I18nUtil;
import org.apache.uima.internal.util.IntVector;
import org.apache.uima.internal.util.XmlAttribute;
import org.apache.uima.internal.util.XmlElementName;
import org.apache.uima.internal.util.XmlElementNameAndContents;
import org.apache.uima.internal.util.rb_trees.RedBlackTree;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/**
* XMI CAS deserializer. Used to read in a CAS from XML Metadata Interchange (XMI) format.
*/

public class XmiCasDeserializer {

  private class XmiCasDeserializerHandler extends DefaultHandler {
    // ///////////////////////////////////////////////////////////////////////
    // Internal states for the parser.

    // Expect the start of the XML document.
    private static final int DOC_STATE = 0;

    // At the top level. Expect a FS, or document text element, or the end of the
    // XML input.
    private static final int FS_STATE = 1;

    // Inside a FS. Expect features, or the end of the FS.
    private static final int FEAT_STATE = 2;

    // Inside a feature element. We expect the feature value.
    private static final int FEAT_CONTENT_STATE = 3;
   
    // Inside an element with the XMI namespace - indicating content that's
    // not part of the typesystem and should be ignored.
    private static final int IGNORING_XMI_ELEMENTS_STATE = 4;

    // Inside a reference feature element (e.g. <feat href="#1").
    // We expect no content, just the end of the element.
    private static final int REF_FEAT_STATE = 5;


    // End parser states.
    // ///////////////////////////////////////////////////////////////////////

    // For error message printing, if the Locator object can't provide source
    // of XML input.
    private static final String unknownXMLSource = "<unknown>";

    private String ID_ATTR_NAME = "xmi:id";

    // SAX locator. Used for error message generation.
    private Locator locator;

    // The CAS we're filling.
    private CASImpl casBeingFilled;

    // Store address of every FS we've deserialized, since we need to back
    // and apply fix-ups afterwards.
    private IntVector deserializedFsAddrs;

    // Store a separate vector of FSList nodes that were deserialized from multivalued properties.
    // These are special because their "head" feature needs remapping but their "tail" feature
    // doesn't.
    private IntVector fsListNodesFromMultivaluedProperties;

    // What we expect next.
    private int state;

    // StringBuffer to accumulate text.
    private StringBuffer buffer;

    // The address of the most recently created FS. Needed for embedded
    // feature values.
    private int currentAddr;

    // The type of the most recently created FS. Needed for arrays, also
    // useful for embedded feature values.
    private TypeImpl currentType;

    // the ID and values of arrays are stored on startElement, then used on
    // endElement to actually create the array. This is because in the case of
    // String arrays serialized with the values as child elements, we can't create
    // the array until we've seen all of the child elements.
    private int currentArrayId;

    private List<String> currentArrayElements;

    // Used for keeping track of multi-valued features read from subelements.
    // Keys are feature names, values are ArrayLists of strings,
    // where each String is one of the values to be assigned to the feature.
    private Map<String, List<String>> multiValuedFeatures = new TreeMap<String, List<String>>();

    // SofaFS type
    private int sofaTypeCode;

    // Sofa number feature code
    private int sofaNumFeatCode;

    // Annotation:sofa feature code
    private int sofaFeatCode;

    // Store IndexRepositories in a vector;
    private List<FSIndexRepository> indexRepositories;

    // and views too
    private List<CAS> views;

    // utilities for handling CAS list types
    private ListUtils listUtils;

    // type of each feature, according to constants below
    private int[] featureType;

    // true if unknown types should be ignored; false if they should cause an error
    boolean lenient;

    // number of oustanding startElement events that we are ignoring
    // we add 1 when an ignored element starts and subtract 1 when an ignored
    // element ends
    private int ignoreDepth = 0;

    // map from namespace prefixes to URIs. Allows namespace resolution even
    // with a non-namespace-enabled SAX parser.
    private Map<String, String> nsPrefixToUriMap = new HashMap<String, String>();

    // container for data shared between the XmiCasSerialier and
    // XmiDeserializer, to support things such as consistency of IDs across
    // multiple serializations.  This is also where the map from xmi:id to
    // FS address is stored.
    private XmiSerializationSharedData sharedData;

    // number of Sofas found so far
    private int nextSofaNum;
   
    //used for merging multiple XMI CASes into one CAS object.
    private int mergePoint;
   
    //Current out-of-typesystem element, if any
    private OotsElementData outOfTypeSystemElement = null;

    //local map from xmi:id to FS address, used when merging multiple XMI CASes
    //into one CAS object.
    private RedBlackTree<Integer> localXmiIdToFsAddrMap = new RedBlackTree<Integer>();
   
    //if mergepoint is set, are preexisting FS allowed, disallowed or ignored.
    AllowPreexistingFS allowPreexistingFS;
   
    //When deserializing delta CAS  preexisting FS, keep track of features that
    //have been deserialized. This is then compared to the all features for the
    //type and features that are not in the xmi are set to null.
    IntVector featsSeen = null;
   
    //set this flag if preexisting FS is encountered when deserializing
    //delta cas View referenceing disallowed preexisting FS member.
    //The preexisting members are ignored and deserialization allowed
    //to complete so that the CAS being filled is not corrupted.
    //An exception is thrown at the end.
    //NOTE: Since preexisting FSs are serialized first, when deserializing
    //of delta CAS with a disallowed preexisting FS, the error will be
    //caught and reported before any updates are made to the CAS being filled.
   
    boolean disallowedViewMemberEncountered;
   
    /**
     * Creates a SAX handler used for deserializing an XMI CAS.
     * @param aCAS CAS to deserialize into
     * @param lenient if true, unknown types/features result in an
     *   exception.  If false, unknown types/features are ignored.
     * @param sharedData data structure used to allow the XmiCasSerializer and
     *   XmiCasDeserializer to share information.
     * @param mergePoint used to support merging multiple XMI CASes.  If the
     *   mergePoint is negative, "normal" deserialization will be done,
     *   meaning the target CAS will be reset and the entire XMI content will
     *   be deserialized.  If the mergePoint is nonnegative (including 0), the
     *   target CAS will not be reset, and only Feature Structures whose
     *   xmi:id is strictly greater than the mergePoint value will be
     *   deserialized.
     */
    private XmiCasDeserializerHandler(CASImpl aCAS, boolean lenient,
            XmiSerializationSharedData sharedData, int mergePoint, AllowPreexistingFS allowPreexistingFS) {
      super();
      this.casBeingFilled = aCAS.getBaseCAS();
      this.lenient = lenient;
      this.sharedData =
        sharedData != null ? sharedData : new XmiSerializationSharedData();
      this.mergePoint = mergePoint;
      this.allowPreexistingFS = allowPreexistingFS;
      this.featsSeen = null;
      this.disallowedViewMemberEncountered = false;
      if (mergePoint < 0) {
        //If not merging, reset the CAS.
        //Necessary to get Sofas to work properly.
        casBeingFilled.resetNoQuestions();
       
        // clear ID mappings stored in the SharedData (from previous deserializations)
        this.sharedData.clearIdMap();
        //new Sofas start at 2
        this.nextSofaNum = 2;
      } else {
        this.nextSofaNum = ((CASImpl)this.casBeingFilled).getBaseSofaCount() + 1;
      }
      this.deserializedFsAddrs = new IntVector();
      this.fsListNodesFromMultivaluedProperties = new IntVector();
      this.buffer = new StringBuffer();
      this.indexRepositories = new ArrayList<FSIndexRepository>();
      this.views = new ArrayList<CAS>();
      indexRepositories.add(this.casBeingFilled.getBaseIndexRepository());
      // There should always be another index for the Initial View
      indexRepositories.add(this.casBeingFilled.getView(CAS.NAME_DEFAULT_SOFA).getIndexRepository());
      //add an entry to indexRepositories for each Sofa in the CAS (which can only happen if
      //a mergePoint was specified)
      FSIterator<SofaFS> sofaIter = this.casBeingFilled.getSofaIterator();
      while(sofaIter.hasNext()) {
        SofaFS sofa = (SofaFS)sofaIter.next();
        if (sofa.getSofaRef() == 1) {
          casBeingFilled.registerInitialSofa();
        } else {
          // add indexRepo for views other than the initial view
          indexRepositories.add(casBeingFilled.getSofaIndexRepository(sofa));
        }       
      }     
      final TypeSystemImpl tsOfReceivingCas = casBeingFilled.getTypeSystemImpl();
      this.sofaTypeCode = tsOfReceivingCas.ll_getCodeForTypeName(CAS.TYPE_NAME_SOFA);
      this.sofaNumFeatCode = tsOfReceivingCas.ll_getCodeForFeatureName(CAS.FEATURE_FULL_NAME_SOFANUM);
      this.sofaFeatCode = tsOfReceivingCas.ll_getCodeForFeatureName(CAS.FEATURE_FULL_NAME_SOFA);
      this.listUtils = new ListUtils(casBeingFilled, UIMAFramework.getLogger(XmiCasDeserializer.class), null);

      // populate feature type table
      this.featureType = new int[tsOfReceivingCas.getNumberOfFeatures() + 1];
      FeatureImpl feat;
      Iterator<Feature> it = tsOfReceivingCas.getFeatures();
      while (it.hasNext()) {
        feat = (FeatureImpl) it.next();
        featureType[feat.getCode()] = classifyType(tsOfReceivingCas.range(feat.getCode()));
      }
    }

    private final void resetBuffer() {
      this.buffer = new StringBuffer();
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ContentHandler#startDocument()
     */
    public void startDocument() throws SAXException {
      // Do setup work in the constructor.
      this.state = DOC_STATE;
      // System.out.println("Starting to read document.");
      // time = System.currentTimeMillis();
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
     *      java.lang.String, org.xml.sax.Attributes)
     */
    public void startElement(String nameSpaceURI, String localName, String qualifiedName,
            Attributes attrs) throws SAXException {
      // org.apache.vinci.debug.Debug.p("startElement: " + qualifiedName);
      // if (attrs != null) {
      // for (int i=0; i<attrs.getLength(); i++) {
      // org.apache.vinci.debug.Debug.p("a: " + attrs.getQName(i) + " v: " + attrs.getValue(i));
      // }
      // }
      resetBuffer();
      switch (state) {
        case DOC_STATE: {
          // allow any root element name
          // extract xmlns:prefix=uri attributes into a map, which we can use to
          // resolve the prefixes even with a non-namespace-aware parser
          if (attrs != null) {
            for (int i = 0; i < attrs.getLength(); i++) {
              String attrName = attrs.getQName(i);
              if (attrName.startsWith("xmlns:")) {
                String prefix = attrName.substring(6);
                String uri = attrs.getValue(i);
                nsPrefixToUriMap.put(prefix, uri);
              }
            }
          }
          this.state = FS_STATE;
          break;
        }
        case FS_STATE: {
          // ignore elements with XMI prefix (such as XMI annotations)
          if (qualifiedName.startsWith("xmi")) {
            this.state = IGNORING_XMI_ELEMENTS_STATE;
            this.ignoreDepth++;
            return;
          }
         
          // if Delta CAS check if preexisting FS check if allowed
          if (this.mergePoint >= 0) {
            String id = attrs.getValue(ID_ATTR_NAME);
            if (id != null) {
              int idInt = Integer.parseInt(id);
              if (idInt > 0 && !this.isNewFS(idInt)) { //preexisting FS
              if (this.allowPreexistingFS == AllowPreexistingFS.ignore) { //skip elements whose ID is <= mergePoint
                this.state = IGNORING_XMI_ELEMENTS_STATE;
                this.ignoreDepth++;
                return;
              } else if (this.allowPreexistingFS == AllowPreexistingFS.disallow) { //fail
                CASRuntimeException e = new CASRuntimeException(
                        CASRuntimeException.DELTA_CAS_PREEXISTING_FS_DISALLOWED,
                        new String[] {ID_ATTR_NAME + "=" + id,
                                  nameSpaceURI,
                                  localName,
                                  qualifiedName});
                  throw e;
              }
              }
            }
          }
         
          if (nameSpaceURI == null || nameSpaceURI.length() == 0) {
            // parser may not be namespace-enabled, so try to resolve NS ourselves
            int colonIndex = qualifiedName.indexOf(':');
            if (colonIndex != -1) {
              String prefix = qualifiedName.substring(0, colonIndex);
              nameSpaceURI = (String) nsPrefixToUriMap.get(prefix);
              if (nameSpaceURI == null) {
                // unbound namespace. Rather than failing, just assume a reasonable default.
                nameSpaceURI = "http:///" + prefix + ".ecore";
              }
              localName = qualifiedName.substring(colonIndex + 1);
            } else // no prefix. Use default URI
            {
              nameSpaceURI = XmiCasSerializer.DEFAULT_NAMESPACE_URI;
            }
          }

          readFS(nameSpaceURI, localName, qualifiedName, attrs);
        
          multiValuedFeatures.clear();
          state = FEAT_STATE;
          break;
        }
        case FEAT_STATE: {
          //parsing a feature recorded as a child element
          //check for an "href" feature, used for references
          String href = attrs.getValue("href");
          if (href != null && href.startsWith("#")) {          
            //for out-of-typesystem objects, there's special handling here
            //to keep track of the fact this was an href so we re-serialize
            //correctly.
            if (this.outOfTypeSystemElement != null) {
              XmlElementName elemName = new XmlElementName(nameSpaceURI, localName, qualifiedName);
              List<XmlAttribute> ootsAttrs = new ArrayList<XmlAttribute>();
              ootsAttrs.add(new XmlAttribute("href", href));
              XmlElementNameAndContents elemWithContents = new XmlElementNameAndContents(elemName, null, ootsAttrs);
              this.outOfTypeSystemElement.childElements.add(elemWithContents);
            }
            else {
              //In-typesystem FS, so we can forget this was an href and just add
              //the integer value, which will be interpreted as a reference later.
              //NOTE: this will end up causing it to be reserialized as an attribute
              //rather than an element, but that is not in violation of the XMI spec.
              List<String> valueList = this.multiValuedFeatures.get(qualifiedName);
              if (valueList == null) {
                valueList = new ArrayList<String>();
                this.multiValuedFeatures.put(qualifiedName, valueList);
              }
              valueList.add(href.substring(1));
            }                        
            state = REF_FEAT_STATE;
          }
          else {
            //non-reference feature, expecting feature value as character content
            state = FEAT_CONTENT_STATE;
          }
          break;
        }
        case IGNORING_XMI_ELEMENTS_STATE: {
          ignoreDepth++;
          break;
        }
        default: {
          // If we're not in an element expecting state, raise an error.
          throw createException(XCASParsingException.TEXT_EXPECTED, qualifiedName);
        }
      }
    }

    // Create a new FS.
    private void readFS(String nameSpaceURI, String localName, String qualifiedName,
            Attributes attrs) throws SAXException {
      String typeName = xmiElementName2uimaTypeName(nameSpaceURI, localName);
     
      currentType = (TypeImpl) ts.getType(typeName);
      if (currentType == null) {
        // ignore NULL type
        if ("uima.cas.NULL".equals(typeName)) {
          return;
        }
        // special processing for uima.cas.View (encodes indexed FSs)
        if ("uima.cas.View".equals(typeName)) {
          processView(attrs.getValue("sofa"), attrs.getValue("members"));
          String added = attrs.getValue("added_members");
          String deleted = attrs.getValue("deleted_members");
          String reindexed = attrs.getValue("reindexed_members");
          processView(attrs.getValue("sofa"), added,deleted,reindexed);
          return;
        }
        // type is not in our type system
        if (!lenient) {
          throw createException(XCASParsingException.UNKNOWN_TYPE, typeName);
        } else {
          addToOutOfTypeSystemData(
              new XmlElementName(nameSpaceURI, localName, qualifiedName), attrs);                 
        }
        return;
      } else if (casBeingFilled.isArrayType(currentType)) {
        // store ID and array values (if specified as attribute).
        // we will actually create the array later, in endElement.
        String idStr = attrs.getValue(ID_ATTR_NAME);
        currentArrayId = idStr == null ? -1 : Integer.parseInt(idStr);
        String elements = attrs.getValue("elements");

        // special parsing for byte arrays (they are serialized as a hex
        // string. And we create them here instead of parsing to a string
        // array, for efficiency.
        if (casBeingFilled.isByteArrayType(currentType)) {
          createByteArray(elements, currentArrayId, 0);
        } else {
          if (elements != null) {
            String[] parsedElements = parseArray(elements);
            currentArrayElements = Arrays.asList(parsedElements);
          } else {
            currentArrayElements = null;
          }
        }
      } else {
      String idStr = attrs.getValue(ID_ATTR_NAME);
      int xmiId = idStr == null ? -1 : Integer.parseInt(idStr);

      if (isNewFS(xmiId)) {  //new FS so create it. 
        final int addr = casBeingFilled.ll_createFS(currentType.getCode());
          readFS(addr, attrs);
      } else //preexisting
        if (this.allowPreexistingFS == AllowPreexistingFS.disallow) {
          CASRuntimeException e = new CASRuntimeException(
                      CASRuntimeException.DELTA_CAS_PREEXISTING_FS_DISALLOWED,
                      new String[] {ID_ATTR_NAME + "=" + idStr,
                                nameSpaceURI,
                                localName,
                                qualifiedName});
          throw e;
        } else if (this.allowPreexistingFS == AllowPreexistingFS.allow) { //get the FS
          final int addr = getFsAddrForXmiId(xmiId);
               readFS(addr,attrs);
        } // otherwise ignore
      }
      }
    }

    /**
     * Handles the processing of a cas:View element in the XMI. The cas:View element encodes indexed
     * FSs.
     *
     * @param sofa
     *          xmi:id of the sofa for this view, null indicates base CAS "view"
     * @param membersString
     *          whitespace-separated string of FS addresses. Each FS is to be added to the specified
     *          sofa's index repository
     */
    private void processView(String sofa, String membersString) throws SAXParseException {
      // TODO: this requires View to come AFTER all of its members
      if (membersString != null) {
        // a view with no Sofa will be added to the 1st, _InitialView, index
        int sofaNum = 1;
        boolean newview = false;
        if (sofa != null) {
          // translate sofa's xmi:id into its sofanum
          int sofaXmiId = Integer.parseInt(sofa);
          newview = isNewFS(sofaXmiId);
          int sofaAddr;
          try {
            sofaAddr = getFsAddrForXmiId(sofaXmiId);
          } catch (NoSuchElementException e) {
            throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(sofaXmiId));
          }
          sofaNum = casBeingFilled.getFeatureValue(sofaAddr, sofaNumFeatCode);
        }
        FSIndexRepositoryImpl indexRep = (FSIndexRepositoryImpl) indexRepositories.get(sofaNum);

        // TODO: optimize by going straight to int[] without going through
        // intermediate String[]?
        String[] members = parseArray(membersString);
        for (int i = 0; i < members.length; i++) {
          int id = Integer.parseInt(members[i]);
          // special handling for merge operations ...
          if (!newview && !isNewFS(id)) {
            // a pre-existing FS is indexed in a pre-existing view
            if (this.allowPreexistingFS == AllowPreexistingFS.ignore) {
              // merging with full CAS: ignore anything below the high water mark
              continue;
            }
            if (this.allowPreexistingFS == AllowPreexistingFS.disallow) {
              // merging with delta CAS: flag it
              this.disallowedViewMemberEncountered = true;
              continue;
            }
          }
          // have to map each ID to its "real" address (TODO: optimize?)
          //TODO: currently broken, can't use XmiSerializationSharedData for
          //this id mapping when merging, need local map
          try {
            int addr = getFsAddrForXmiId(id);
            indexRep.addFS(addr);
          } catch (NoSuchElementException e) {
            if (!lenient) {
              throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(id));
            }
            else {
              //unknown view member may be an OutOfTypeSystem FS
              this.sharedData.addOutOfTypeSystemViewMember(sofa, members[i]);
            }
          }
        }
      }
    }
   
   
    /**
     * Handles the processing of a cas:View element in the XMI. The cas:View element encodes indexed
     * FSs.
     *
     * @param sofa
     *          xmi:id of the sofa for this view, null indicates base CAS "view"
     * @param membersString
     *          whitespace-separated string of FS addresses. Each FS is to be added to the specified
     *          sofa's index repository
     */
    private void processView(String sofa, String addmembersString,
        String delmemberString, String reindexmemberString) throws SAXParseException {
      // TODO: this requires View to come AFTER all of its members
      if (addmembersString != null) {
        processView(sofa, addmembersString);
      }
      int sofaNum = 1;
      FSIndexRepositoryImpl indexRep = null;
      if (delmemberString != null || reindexmemberString != null) {
      if (sofa != null) {
              // translate sofa's xmi:id into its sofanum
              int sofaXmiId = Integer.parseInt(sofa);
              int sofaAddr = getFsAddrForXmiId(sofaXmiId);
              sofaNum = casBeingFilled.getFeatureValue(sofaAddr, sofaNumFeatCode);
        }
        indexRep = (FSIndexRepositoryImpl) indexRepositories.get(sofaNum);

        // TODO: optimize by going straight to int[] without going through
        // intermediate String[]?
        if (delmemberString != null) {
          String[] members = parseArray(delmemberString);
          for (int i = 0; i < members.length; i++) {
            int id = Integer.parseInt(members[i]);
            if (!isNewFS(id)) {  //preexisting FS
              if (this.allowPreexistingFS == AllowPreexistingFS.disallow) {
                this.disallowedViewMemberEncountered = true//ignore but flag it.
                continue;
              } else if (this.allowPreexistingFS == AllowPreexistingFS.ignore) {
                continue//ignore
              }
            }
          // have to map each ID to its "real" address (TODO: optimize?)
          //TODO: currently broken, can't use XmiSerializationSharedData for
          //this id mapping when merging, need local map
          try {
            int addr = getFsAddrForXmiId(id);
            indexRep.removeFS(addr);
          } catch (NoSuchElementException e) {
            if (!lenient) {
                throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(id));
              } else {
            //unknown view member may be an OutOfTypeSystem FS
            this.sharedData.addOutOfTypeSystemViewMember(sofa, members[i]);
            }
          }
          } 
        }
        if (reindexmemberString != null) {
          String[] members = parseArray(reindexmemberString);
          for (int i = 0; i < members.length; i++) {
            int id = Integer.parseInt(members[i]);
            if (!isNewFS(id)) { //preexising FS
                if (this.allowPreexistingFS == AllowPreexistingFS.disallow) {
                  this.disallowedViewMemberEncountered = true; //ignore but flag it.
                  continue;
                } else if (this.allowPreexistingFS == AllowPreexistingFS.ignore) {
                  continue
                }
            }
            // have to map each ID to its "real" address (TODO: optimize?)
            //TODO: currently broken, can't use XmiSerializationSharedData for
            //this id mapping when merging, need local map
            try {
              int addr = getFsAddrForXmiId(id);
              indexRep.removeFS(addr);
              indexRep.addFS(addr);
            } catch (NoSuchElementException e) {
              if (!lenient) {
                throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(id));
              } else {
              //unknown view member may be an OutOfTypeSystem FS
              this.sharedData.addOutOfTypeSystemViewMember(sofa, members[i]);
              }
            }
          } 
        }
      }
    }

    /**
     *
     * @param addr
     * @param attrs
     * @throws SAXException
     */
    private void readFS(final int addr, Attributes attrs) throws SAXException {
      // Hang on to address for handle features encoded as child elements
      this.currentAddr = addr;
      int id = -1;
      String attrName, attrValue;
      final int typeCode = casBeingFilled.getHeapValue(addr);
      final Type type = casBeingFilled.getTypeSystemImpl().ll_getTypeForCode(typeCode);
      int thisSofaNum = 0;
     
      //is it a new FS
      try {
        id = Integer.parseInt(attrs.getValue(ID_ATTR_NAME));
      } catch (NumberFormatException e) {
        throw createException(XCASParsingException.ILLEGAL_ID, attrs.getValue(ID_ATTR_NAME));
      }
      boolean newFS = this.isNewFS(id);
     
      if (sofaTypeCode == typeCode) {
        String sofaID = attrs.getValue(CAS.FEATURE_BASE_NAME_SOFAID);
        if (sofaID.equals(CAS.NAME_DEFAULT_SOFA) || sofaID.equals("_DefaultTextSofaName")) {
          // initial view Sofa always has sofaNum = 1
          thisSofaNum = 1;
        } else {
          if (newFS) { 
            thisSofaNum = this.nextSofaNum++;
          } else {
          thisSofaNum = Integer.parseInt(attrs.getValue(CAS.FEATURE_BASE_NAME_SOFANUM));
          }
        }
      }
     
      this.featsSeen = null;
      for (int i = 0; i < attrs.getLength(); i++) {
        attrName = attrs.getQName(i);
        attrValue = attrs.getValue(i);
        if (attrName.equals(ID_ATTR_NAME)) {
          try {
            id = Integer.parseInt(attrValue);
            newFS = this.isNewFS(id);
            if (sofaTypeCode != typeCode && !newFS) {
              this.featsSeen = new IntVector(attrs.getLength());
            } else {
              this.featsSeen = null;
            }
          } catch (NumberFormatException e) {
            throw createException(XCASParsingException.ILLEGAL_ID, attrValue);
          }
        } else {
          if (sofaTypeCode == typeCode && attrName.equals(CAS.FEATURE_BASE_NAME_SOFAID)) {
            if (attrValue.equals("_DefaultTextSofaName")) {
              // First change old default Sofa name into the new one
              attrValue = CAS.NAME_DEFAULT_SOFA;
            }
          } else if (sofaTypeCode == typeCode && attrName.equals(CAS.FEATURE_BASE_NAME_SOFANUM)) {
            attrValue = Integer.toString(thisSofaNum);
          }
          int featCode = handleFeature(type, addr, attrName, attrValue, newFS);
          //if processing delta cas preexisting FS, keep track of features that have
          //been deserialized.
          if (this.featsSeen != null && !newFS && featCode != -1) {
            this.featsSeen.add(featCode);
          }
        }
      }
     
      if (sofaTypeCode == typeCode && newFS) {
        // If a Sofa, create CAS view to get new indexRepository
        SofaFS sofa = (SofaFS) casBeingFilled.createFS(addr);
        // also add to indexes so we can retrieve the Sofa later
        casBeingFilled.getBaseIndexRepository().addFS(sofa);
        CAS view = casBeingFilled.getView(sofa);
        if (sofa.getSofaRef() == 1) {
          casBeingFilled.registerInitialSofa();
        } else {
          // add indexRepo for views other than the initial view
          indexRepositories.add(casBeingFilled.getSofaIndexRepository(sofa));
        }
        ((CASImpl) view).registerView(sofa);
        views.add(view);
      }
      deserializedFsAddrs.add(addr);
      addFsAddrXmiIdMapping(addr, id);
    }

    // The definition of a null value. Any other value must be in the expected
    // format.
    private final boolean emptyVal(String val) {
      return ((val == null) || (val.length() == 0));
    }

    private int handleFeature(final Type type, int addr, String featName, String featVal, boolean newFS) throws SAXException {
      final FeatureImpl feat = (FeatureImpl) type.getFeatureByBaseName(featName);
      if (feat == null) {
        if (!this.lenient) {
          throw createException(XCASParsingException.UNKNOWN_FEATURE, featName);
        }
        else {
          sharedData.addOutOfTypeSystemAttribute(addr, featName, featVal);
        }
        return -1;
      }
     
      //Sofa FS 
      //only update Sofa data features and mime type feature. skip other features.
      //skip Sofa data features if Sofa data is already set.
      //these features may not be modified.
      if (sofaTypeCode == casBeingFilled.getHeapValue(addr) && !isNewFS(addr) ) {
      if (featName.equals(CAS.FEATURE_BASE_NAME_SOFAID) ||
          featName.equals(CAS.FEATURE_BASE_NAME_SOFANUM))   {
          return feat.getCode();
      } else if (featName.equals(CAS.FEATURE_BASE_NAME_SOFASTRING||
               featName.equals(CAS.FEATURE_BASE_NAME_SOFAURI) ||
               featName.equals(CAS.FEATURE_BASE_NAME_SOFAARRAY)) {
          int currVal = casBeingFilled.getFeatureValue(addr, feat.getCode());
        if (currVal != 0)
          return feat.getCode();
      }
      }   
      handleFeature(addr, feat.getCode(), featVal);
      return feat.getCode();
    }

    private int handleFeature(final Type type, int addr, String featName, List<String> featVals) throws SAXException {
      final FeatureImpl feat = (FeatureImpl) type.getFeatureByBaseName(featName);
      if (feat == null) {
        if (!this.lenient) {
          throw createException(XCASParsingException.UNKNOWN_FEATURE, featName);
        }
        else {
          sharedData.addOutOfTypeSystemChildElements(addr, featName, featVals);
        }
        return -1;
      }
      handleFeature(addr, feat.getCode(), featVals);
      return feat.getCode();
    }

    /**
     * Set a CAS feature from an XMI attribute.
     *
     * @param addr
     *          address of FS containing the feature
     * @param featCode
     *          code of feature to set
     * @param featVal
     *          string representation of the feature value
     * @throws SAXException
     */
    private void handleFeature(int addr, int featCode, String featVal) throws SAXException {
      switch (featureType[featCode]) {
        case LowLevelCAS.TYPE_CLASS_INT: {
          try {
            if (!emptyVal(featVal)) {
              if (featCode == sofaFeatCode) {
                // special handling for "sofa" feature of annotation. Need to change
                // it from a sofa reference into a sofa number
                int sofaXmiId = Integer.parseInt(featVal);
                int sofaAddr = getFsAddrForXmiId(sofaXmiId);
                int sofaNum = casBeingFilled.getFeatureValue(sofaAddr, sofaNumFeatCode);
                casBeingFilled.setFeatureValue(addr, featCode, sofaNum);
              } else {
                casBeingFilled.setFeatureValue(addr, featCode, Integer.parseInt(featVal));
              }
            }
          } catch (NumberFormatException e) {
            throw createException(XCASParsingException.INTEGER_EXPECTED, featVal);
          }
          break;
        }
        case LowLevelCAS.TYPE_CLASS_FLOAT:
        case LowLevelCAS.TYPE_CLASS_BOOLEAN:
        case LowLevelCAS.TYPE_CLASS_BYTE:
        case LowLevelCAS.TYPE_CLASS_SHORT:
        case LowLevelCAS.TYPE_CLASS_LONG:
        case LowLevelCAS.TYPE_CLASS_DOUBLE: {
          try {
            if (!emptyVal(featVal)) {
              casBeingFilled.setFeatureValueFromString(addr, featCode, featVal);
              // cas.setFloatValue(addr, featCode, Float.parseFloat(featVal));
            }
          } catch (NumberFormatException e) {
            throw createException(XCASParsingException.FLOAT_EXPECTED, featVal);
          }
          break;
        }
        case LowLevelCAS.TYPE_CLASS_STRING: {
          if (featVal != null) // do not use empty value since that would filter out ""
          {
          //if (newFS) {
            //  casBeingFilled.setStringValue(addr, featCode, featVal);
          //} else {
            //preexisting FS, compare with original value in CAS and set only if different.
            String origValue = casBeingFilled.getStringValue(addr, featCode);
            if (origValue == null || !featVal.equals(origValue)) {
              casBeingFilled.setStringValue(addr, featCode, featVal);
            }
          //}
          }
          break;
        }
        case LowLevelCAS.TYPE_CLASS_FS: {
          try {
            if (!emptyVal(featVal)) { this.
              casBeingFilled.setFeatureValue(addr, featCode, Integer.parseInt(featVal));
            }
          } catch (NumberFormatException e) {
            throw createException(XCASParsingException.INTEGER_EXPECTED, featVal);
          }
          break;
        }

          // For array types and list features, there are two kinds of serializations.
          // If the feature has multipleReferencesAllowed = true, then it should have been
          // serialized as a normal FS. If it has multipleReferencesAllowed = false, then
          // it should have been serialized as a multi-valued property.
        case LowLevelCAS.TYPE_CLASS_INTARRAY:
        case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
        case LowLevelCAS.TYPE_CLASS_STRINGARRAY:
        case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
        case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
        case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
        case LowLevelCAS.TYPE_CLASS_LONGARRAY:
        case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
        case LowLevelCAS.TYPE_CLASS_FSARRAY: {
          if (ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
            // do the usual FS deserialization
            try {
              if (!emptyVal(featVal)) {
                casBeingFilled.setFeatureValue(addr, featCode, Integer.parseInt(featVal));
              }
            } catch (NumberFormatException e) {
              throw createException(XCASParsingException.INTEGER_EXPECTED, featVal);
            }
          } else {
            // Do the multivalued property deserialization.
            // However, byte arrays have a special serialization (as hex digits)
            if (featureType[featCode] == LowLevelCAS.TYPE_CLASS_BYTEARRAY) {
              int currFeatVal = casBeingFilled.getFeatureValue(addr, featCode);
              int casArray = 0;
              casArray = createByteArray(featVal,-1,currFeatVal);
              if (casArray != currFeatVal) {
                casBeingFilled.setFeatureValue(addr, featCode, casArray);      
              }
            
            } else {
              String[] arrayVals = parseArray(featVal);
              handleFeature(addr, featCode, Arrays.asList(arrayVals));
            }
          }
          break;
        }
          // For list types, we do the same as for array types UNLESS we're dealing with
          // the tail feature of another list node. In that case we do the usual FS deserialization.
        case XmiCasSerializer.TYPE_CLASS_INTLIST:
        case XmiCasSerializer.TYPE_CLASS_FLOATLIST:
        case XmiCasSerializer.TYPE_CLASS_STRINGLIST:
        case XmiCasSerializer.TYPE_CLASS_FSLIST: {
          if (ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
            // do the usual FS deserialization
            try {
              if (!emptyVal(featVal)) {
                casBeingFilled.setFeatureValue(addr, featCode, Integer.parseInt(featVal));
              }
            } catch (NumberFormatException e) {
              throw createException(XCASParsingException.INTEGER_EXPECTED, featVal);
            }
          } else // do the multivalued property deserialization, like arrays
          {
            String[] arrayVals = parseArray(featVal);
            handleFeature(addr, featCode, Arrays.asList(arrayVals));
          }
          break;
        }
        default: {
          assert false; // this should be an exhaustive case block
        }
      }
    }

    /**
     * Parse an XMI multi-valued attribute into a String array, by splitting on whitespace.
     *
     * @param val
     *          XMI attribute value
     * @return an array with each array value as an element
     */
    private String[] parseArray(String val) {
      String[] arrayVals;
      val = val.trim();
      if (emptyVal(val)) {
        arrayVals = new String[0];
      } else {
        arrayVals = val.split("\\s+");
      }
      return arrayVals;
    }

    /**
     * Set a CAS feature from an array of Strings. This supports the XMI syntax where each value is
     * listed as a separate subelement.
     *
     * @param addr
     *          address of FS containing the feature
     * @param featCode
     *          code of feature to set
     * @param featVals
     *          List of Strings, each String representing one value for the feature
     * @throws SAXException
     */
    private void handleFeature(int addr, int featCode, List<String> featVals) throws SAXException {
      switch (featureType[featCode]) {
        case LowLevelCAS.TYPE_CLASS_INT:
        case LowLevelCAS.TYPE_CLASS_FLOAT:
        case LowLevelCAS.TYPE_CLASS_STRING:
        case LowLevelCAS.TYPE_CLASS_BOOLEAN:
        case LowLevelCAS.TYPE_CLASS_BYTE:
        case LowLevelCAS.TYPE_CLASS_SHORT:
        case LowLevelCAS.TYPE_CLASS_LONG:
        case LowLevelCAS.TYPE_CLASS_DOUBLE:
        case LowLevelCAS.TYPE_CLASS_FS:
          if (featVals.size() != 1) {
            throw new SAXParseException(I18nUtil.localizeMessage(
                    UIMAException.STANDARD_MESSAGE_CATALOG, Locale.getDefault(),
                    "multiple_values_unexpected",
                    new Object[] { ts.ll_getFeatureForCode(featCode).getName() }), locator);
          } else {
            handleFeature(addr, featCode, featVals.get(0));
          }
          break;
        case LowLevelCAS.TYPE_CLASS_INTARRAY:
        case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
        case LowLevelCAS.TYPE_CLASS_STRINGARRAY:
        case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
        case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
        case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
        case LowLevelCAS.TYPE_CLASS_LONGARRAY:
        case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
        case LowLevelCAS.TYPE_CLASS_FSARRAY: {
          int casArray = 0;
          int currVal = casBeingFilled.getFeatureValue(addr, featCode);
          casArray = createArray(casBeingFilled.getTypeSystemImpl().range(featCode), featVals, -1, currVal);
          if (currVal != casArray) {
            casBeingFilled.setFeatureValue(addr, featCode, casArray);
          }
          //add to nonshared fs to encompassing FS map
          if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
            addNonsharedFSToEncompassingFSMapping(casArray, addr);
          }
          break;
        }
        /**
        case LowLevelCAS.TYPE_CLASS_FSARRAY: {
          int casArray = createArray(casBeingFilled.getTypeSystemImpl().range(featCode), featVals, -1);
          casBeingFilled.setFeatureValue(addr, featCode, casArray);
          break;
        }
        **/
        case XmiCasSerializer.TYPE_CLASS_INTLIST: {
          int listFS = casBeingFilled.getFeatureValue(addr, featCode);
          if (listFS == 0) {
            listFS = listUtils.createIntList(featVals);
            casBeingFilled.setFeatureValue(addr, featCode, listFS);
          } else {
          listUtils.updateIntList(listFS, featVals);
          }
          //add to nonshared fs to encompassing FS map
          if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
            addNonsharedFSToEncompassingFSMapping(listFS, addr);
          }
          break;
        }
        case XmiCasSerializer.TYPE_CLASS_FLOATLIST: {
          int listFS = casBeingFilled.getFeatureValue(addr, featCode);
          if (listFS == 0) {
            listFS = listUtils.createFloatList(featVals);
            casBeingFilled.setFeatureValue(addr, featCode, listFS);
          } else {
          listUtils.updateFloatList(listFS, featVals);
          }
          //add to nonshared fs to encompassing FS map
          if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
            addNonsharedFSToEncompassingFSMapping(listFS, addr);
          }
          break;
        }
        case XmiCasSerializer.TYPE_CLASS_STRINGLIST: {
          int listFS = casBeingFilled.getFeatureValue(addr, featCode);
          if (listFS == 0) {
            listFS = listUtils.createStringList(featVals);
            casBeingFilled.setFeatureValue(addr, featCode, listFS);
          } else {
          listUtils.updateStringList(listFS, featVals);
          }
          //add to nonshared fs to encompassing FS map
          if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
            addNonsharedFSToEncompassingFSMapping(listFS, addr);
          }
          break;
        }
        case XmiCasSerializer.TYPE_CLASS_FSLIST: {
          // this call, in addition to creating the list in the CAS, also
          // adds each list node ID to the fsListNodesFromMultivaluedProperties list.
          // We need this so we can go back through later and reset the addresses of the
          // "head" features of these lists nodes (but not reset the tail features).
          // It also adds a mapping between the nodes and the encompassing FS in order
          // to properly serialize in delta xmi format.
          int listFS = casBeingFilled.getFeatureValue(addr, featCode);
          IntVector fslistnodes = new IntVector();
          if (listFS == 0) {
            listFS = listUtils.createFsList(featVals, fslistnodes);
            casBeingFilled.setFeatureValue(addr, featCode, listFS);
          } else {
          listUtils.updateFsList(listFS, featVals, fslistnodes);
          }
          //add to multivaluedproperties fs list.
          for (int i=0; i < fslistnodes.size();  i++) {
            fsListNodesFromMultivaluedProperties.add(fslistnodes.get(i));
          }
          //add to nonshared fs to encompassing FS map.
          if (!ts.ll_getFeatureForCode(featCode).isMultipleReferencesAllowed()) {
          for (int i=0; i < fslistnodes.size(); i++) {
            addNonsharedFSToEncompassingFSMapping(fslistnodes.get(i), addr);
          }
          }
          break;
        }
        default: {
          assert false; // this should be an exhaustive case block
        }
      }
    }

    /**
     * Create or update an array in the CAS
     *
     * @param arrayType
     *          CAS type code for the array
     * @param values
     *          List of strings, each representing an element in the array
     * @param xmiId
     *          xmi:id assigned to the array object.
     * @param addr
     *          address of preexisting non-shared array
     * @return
     */
    private int createArray(int arrayType, List<String> values, int xmiId, int addr) {
      int casArray = -1;
      if (addr > 0) { //non-shared preexisting
      if (values.size() == casBeingFilled.getLowLevelCAS().ll_getArraySize(addr)) {
      casArray = addr;
      updateExistingArray(arrayType, values, casArray);
      } else {
      casArray = createNewArray(arrayType, values);
      }
      }  else  if (xmiId == -1) { //non-shared new
        casArray = createNewArray(arrayType, values);
      else if (isNewFS(xmiId)) {  //shared new
        casArray = createNewArray(arrayType,values);
      else { //shared preexisting
        casArray = getFsAddrForXmiId(xmiId);
        if (values.size() == casBeingFilled.getLowLevelCAS().ll_getArraySize(casArray)) {
          updateExistingArray(arrayType, values, casArray);
          } else {
          casArray = createNewArray(arrayType, values);
          }
      }

      deserializedFsAddrs.add(casArray);
      addFsAddrXmiIdMapping(casArray, xmiId);
      return casArray;
    }
   
    /**
     * Create an array in the CAS.
     *
     * @param arrayType
     *    CAS type code for the array
     * @param values
     *      List of strings, each containing the value of an element of the array.
     * @return
     */
    private int createNewArray(int arrayType, List<String> values) {
      FeatureStructureImpl fs;
      int casArray = -1;
      if (casBeingFilled.isIntArrayType(arrayType)) {
    fs = (FeatureStructureImpl) casBeingFilled.createIntArrayFS(values.size());
    } else if (casBeingFilled.isFloatArrayType(arrayType)) {
      fs = (FeatureStructureImpl) casBeingFilled.createFloatArrayFS(values.size());       
    } else if (casBeingFilled.isStringArrayType(arrayType)) {
    fs = (FeatureStructureImpl) casBeingFilled.createStringArrayFS(values.size());               
    } else if (casBeingFilled.isBooleanArrayType(arrayType)) {
    fs = (FeatureStructureImpl) casBeingFilled.createBooleanArrayFS(values.size());
    } else if (casBeingFilled.isByteArrayType(arrayType)) {
    fs = (FeatureStructureImpl) casBeingFilled.createByteArrayFS(values.size());
    } else if (casBeingFilled.isShortArrayType(arrayType)) {
    fs = (FeatureStructureImpl) casBeingFilled.createShortArrayFS(values.size());
    } else if (casBeingFilled.isLongArrayType(arrayType)) {
    fs = (FeatureStructureImpl) casBeingFilled.createLongArrayFS(values.size());
    } else if (casBeingFilled.isDoubleArrayType(arrayType)) {
    fs = (FeatureStructureImpl) casBeingFilled.createDoubleArrayFS(values.size());
    } else {
    fs = (FeatureStructureImpl) casBeingFilled.createArrayFS(values.size());
    }
      casArray = fs.getAddress();
     
      for (int i = 0; i < values.size(); i++) {
        String stringVal = (String) values.get(i);
        casBeingFilled.setArrayValueFromString(casArray, i, stringVal);
      }
     
      return casArray;
    }
   
    private void updateExistingArray(int arrayType, List<String> values, int casArray) {
      for (int i = 0; i < values.size(); i++) {
        String stringVal = values.get(i);
        if (casBeingFilled.isStringArrayType(arrayType)) {
           String currVal = casBeingFilled.getLowLevelCAS().ll_getStringArrayValue(casArray, i)
           if (currVal != null && currVal.equals(stringVal)) {
             continue;       
          
        }
        casBeingFilled.setArrayValueFromString(casArray, i, stringVal);
      }
    }

    /**
     * Create a byte array in the CAS.
     *
     * @param hexString
     *          value of the byte array as a hex string
     * @param xmiId
     *          xmiId - this will be -1 if this is a non-shared byte array FS.
     * @param addr
     *          the current address of the non-shared ByteArrayFS used when processing
     *          a Delta CAS.
     * @return
     */
    private int createByteArray(String hexString, int xmiId, int addr) {
      int arrayLen = hexString.length() / 2;
      ByteArrayFS fs = null;
     
      if (addr > 0) {   //non-shared and fs exists
      fs = (ByteArrayFS) casBeingFilled.createFS(addr);
        if (fs.size() != arrayLen) {  //if length changes, create newFS
          fs = casBeingFilled.createByteArrayFS(arrayLen);
        }
      } else if (xmiId == -1) { //non-shared and no fs
      fs = casBeingFilled.createByteArrayFS(arrayLen);
      } else  {       //shared
      if (isNewFS(xmiId)) {
        fs = casBeingFilled.createByteArrayFS(arrayLen);
      } else {
        addr = getFsAddrForXmiId(xmiId);
        fs = (ByteArrayFS) casBeingFilled.createFS(addr);
        if (fs.size() != arrayLen) {
          fs = casBeingFilled.createByteArrayFS(arrayLen);
        }
      }
      }
     
      for (int i = 0; i < arrayLen; i++) {
        byte high = hexCharToByte(hexString.charAt(i * 2));
        byte low = hexCharToByte(hexString.charAt(i * 2 + 1));
        byte b = (byte) ((high << 4) | low);
        fs.set(i, b);
      }

      int arrayAddr = ((FeatureStructureImpl) fs).getAddress();
      deserializedFsAddrs.add(arrayAddr);
      addFsAddrXmiIdMapping(arrayAddr, xmiId);
      return arrayAddr;
    }

    private byte hexCharToByte(char c) {
      if ('0' <= c && c <= '9')
        return (byte) (c - '0');
      else if ('A' <= c && c <= 'F')
        return (byte) (c - 'A' + 10);
      else if ('1' <= c && c <= 'f')
        return (byte) (c - '1' + 10);
      else
        throw new NumberFormatException("Invalid hex char: " + c);
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ContentHandler#characters(char[], int, int)
     */
    public void characters(char[] chars, int start, int length) throws SAXException {
      switch (this.state) {
        case FEAT_CONTENT_STATE:
          buffer.append(chars, start, length);
          break;
        default:
      }
    }

    boolean isAllWhitespace(StringBuffer b) {
      final int len = b.length();
      for (int i = 0; i < len; i++) {
        if (!Character.isWhitespace(b.charAt(i))) {
          return false;
        }
      }
      return true;
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String,
     *      java.lang.String)
     */
    public void endElement(String nsURI, String localName, String qualifiedName)
            throws SAXException {
      switch (this.state) {
        case DOC_STATE: {
          // Do nothing.
          break;
        }
        case FS_STATE: {
          this.state = DOC_STATE;
          break;
        }
        case FEAT_CONTENT_STATE: {
          // We have just processed one of possibly many values for a feature.
          // Store this value in the multiValuedFeatures map for later use.
          List<String> valueList = this.multiValuedFeatures.get(qualifiedName);
          if (valueList == null) {
            valueList = new ArrayList<String>();
            this.multiValuedFeatures.put(qualifiedName, valueList);
          }
          valueList.add(buffer.toString());

          // go back to the state where we're expecting a feature
          this.state = FEAT_STATE;
          break;
        }
        case REF_FEAT_STATE: {
          this.state = FEAT_STATE;
          break;
        }
        case FEAT_STATE: {
          // end of FS. Process multi-valued features or array elements that were
          // encoded as subelements
          if (this.outOfTypeSystemElement != null) {
            if (!this.multiValuedFeatures.isEmpty()) {
              for (Map.Entry<String, List<String>> entry : this.multiValuedFeatures.entrySet()) {
                String featName = entry.getKey();
                List<String> featVals = entry.getValue();
                addOutOfTypeSystemFeature(outOfTypeSystemElement, featName, featVals);
              }
            }
            this.outOfTypeSystemElement = null;
          }
          else if (currentType != null) {
            if (casBeingFilled.isArrayType(currentType) && !casBeingFilled.isByteArrayType(currentType)) {
              // create the array now. elements may have been provided either as
              // attributes or child elements, but not both.
              // BUT - not byte arrays! They are created immediately, to avoid
              // the overhead of parsing into a String array first
              if (currentArrayElements == null) // were not specified as attributes
              {
                currentArrayElements = this.multiValuedFeatures.get("elements");
                if (currentArrayElements == null) {
                  currentArrayElements = Collections.emptyList();
                }
              }
              createArray(currentType.getCode(), currentArrayElements, currentArrayId, 0);
            } else if (!this.multiValuedFeatures.isEmpty()) {
              for (Map.Entry<String, List<String>> entry : this.multiValuedFeatures.entrySet()) {
                String featName = entry.getKey();
                List<String> featVals = entry.getValue();
                int featcode = handleFeature(currentType, currentAddr, featName, featVals);
                if (featcode != -1 && this.featsSeen != null ) {
                  this.featsSeen.add(featcode);
                }
              }
            }
            //if this is a preexisting FS which is not a Sofa FS,
            //set the features that were not deserialized to null.
            if (sofaTypeCode != currentType.getCode() && this.featsSeen != null) {
              int[] feats = casBeingFilled.getTypeSystemImpl().ll_getAppropriateFeatures(currentType.getCode());
              for (int i=0; i<  feats.length; i++) {
                  if ( !this.featsSeen.contains(feats[i]) ) {
                  casBeingFilled.setFeatureValue(currentAddr, feats[i], CASImpl.NULL);
                }
              }
              this.featsSeen = null;
            }
           
          }
          this.state = FS_STATE;
          break;
        }
        case IGNORING_XMI_ELEMENTS_STATE: {
          ignoreDepth--;
          if (ignoreDepth == 0) {
            this.state = FS_STATE;
          }
          break;
        }
      }
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ContentHandler#endDocument()
     */
    public void endDocument() throws SAXException {
      // Resolve ID references, and add FSs to indexes
      for (int i = 0; i < deserializedFsAddrs.size(); i++) {
        finalizeFS(deserializedFsAddrs.get(i));
      }
      for (int i = 0; i < fsListNodesFromMultivaluedProperties.size(); i++) {
        remapFSListHeads(fsListNodesFromMultivaluedProperties.get(i));
      }
      // time = System.currentTimeMillis() - time;
      // System.out.println("Done in " + new TimeSpan(time));

      for (int i = 0; i < views.size(); i++) {
        ((CASImpl) views.get(i)).updateDocumentAnnotation();
      }
     
      //check if disallowed fs was encoutered]
      if (this.disallowedViewMemberEncountered) {
        CASRuntimeException e = new CASRuntimeException(
                CASRuntimeException.DELTA_CAS_PREEXISTING_FS_DISALLOWED,
                new String[] {"Preexisting FS view member encountered." });
        throw e;
      }
    }

    /**
     * Adds this FS to the appropriate index, and applies ID remappings. For each nonprimitive,
     * non-multivalued-property feature, we need to update the feature value to point to the correct
     * heap address of the target FS.
     *
     * @param fsInfo
     */
    private void finalizeFS(int addr) throws SAXParseException {
      final int type = casBeingFilled.getHeapValue(addr);
      if (casBeingFilled.isArrayType(type)) {
        finalizeArray(type, addr);
        return;
      }
      // remap IDs for all nonprimtive, non-multivalued-property features
      int[] feats = casBeingFilled.getTypeSystemImpl().ll_getAppropriateFeatures(type);
      Feature feat;
      for (int i = 0; i < feats.length; i++) {
        feat = ts.ll_getFeatureForCode(feats[i]);
        int typeCode = ts.ll_getRangeType(feats[i]);
        if (casBeingFilled.ll_isRefType(typeCode)
                && (featureType[feats[i]] == LowLevelCAS.TYPE_CLASS_FS || feat
                        .isMultipleReferencesAllowed())) {
          int featVal = casBeingFilled.getFeatureValue(addr, feats[i]);
          if (featVal != CASImpl.NULL ) {
            int fsValAddr = CASImpl.NULL;
            try {
              fsValAddr = getFsAddrForXmiId(featVal);
            } catch (NoSuchElementException e) {
              if (!lenient) {
                throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(featVal));
              }
              else {
                // we may not have deserialized the value of this feature because it
                // was of unknown type.  We set it to null, and record in the
                // out-of-typesystem data.
                this.sharedData.addOutOfTypeSystemAttribute(
                        addr, feat.getShortName(), Integer.toString(featVal));
              }
            }
            casBeingFilled.setFeatureValue(addr, feats[i], fsValAddr);
          }
        }
      }
    }

    /**
     * Rempas ID for the "head" feature of NonEmptyFSList, but not the "tail" feature. Used for
     * FSList nodes deserialized from multi-valued properties, which already have their tail set
     * correctly.
     *
     * @param i
     */
    private void remapFSListHeads(int addr) throws SAXParseException {
      final int type = casBeingFilled.getHeapValue(addr);
      if (!listUtils.isFsListType(type))
        return;
      int[] feats = casBeingFilled.getTypeSystemImpl().ll_getAppropriateFeatures(type);
      if (feats.length == 0)
        return;
      int headFeat = feats[0];
      int featVal = casBeingFilled.getFeatureValue(addr, headFeat);
      if (featVal != CASImpl.NULL) {
        int fsValAddr = CASImpl.NULL;
        try {
          fsValAddr = getFsAddrForXmiId(featVal);
        } catch (NoSuchElementException e) {
          if (!lenient) {
            throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(featVal));
          }
          else {
            //this may be a reference to an out-of-typesystem FS
            this.sharedData.addOutOfTypeSystemAttribute(addr, CAS.FEATURE_BASE_NAME_HEAD, Integer.toString(featVal));
          }
        }
        casBeingFilled.setFeatureValue(addr, headFeat, fsValAddr);
      }
    }

    /**
     * Walk an array, remapping IDs. If called on a primitive array,this method does nothing.
     *
     * @param type
     *          CAS type code for the array
     * @param addr
     *          address of the array
     */
    private void finalizeArray(int type, int addr) throws SAXParseException {
      if (!casBeingFilled.isFSArrayType(type)) {
        // Nothing to do.
        return;
      }
      final int size = casBeingFilled.ll_getArraySize(addr);
      for (int i = 0; i < size; i++) {
        int arrayVal = casBeingFilled.getArrayValue(addr, i);
        if (arrayVal != CASImpl.NULL) {
          int arrayValAddr = CASImpl.NULL;
          try {
            arrayValAddr = getFsAddrForXmiId(arrayVal);
          } catch (NoSuchElementException e) {
            if (!lenient) {
              throw createException(XCASParsingException.UNKNOWN_ID, Integer.toString(arrayVal));
            }
            else
              // the array element may be out of typesystem.  In that case set it
              // to null, but record the id so we can add it back on next serialization.
              this.sharedData.addOutOfTypeSystemArrayElement(addr, i, arrayVal);
            }
          }
          casBeingFilled.setArrayValue(addr, i, arrayValAddr);
        }
      }
    }

    private XCASParsingException createException(int code) {
      XCASParsingException e = new XCASParsingException(code);
      String source = unknownXMLSource;
      String line = unknownXMLSource;
      String col = unknownXMLSource;
      if (locator != null) {
        source = locator.getSystemId();
        if (source == null) {
          source = locator.getPublicId();
        }
        if (source == null) {
          source = unknownXMLSource;
        }
        line = Integer.toString(locator.getLineNumber());
        col = Integer.toString(locator.getColumnNumber());
      }
      e.addArgument(source);
      e.addArgument(line);
      e.addArgument(col);
      return e;
    }

    private XCASParsingException createException(int code, String arg) {
      XCASParsingException e = createException(code);
      e.addArgument(arg);
      return e;
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
     */
    public void error(SAXParseException e) throws SAXException {
      throw e;
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
     */
    public void fatalError(SAXParseException e) throws SAXException {
      throw e;
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
     */
    public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException {
      // Since we're not validating, we don't need to do anything; this won't
      // be called.
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
     */
    public void setDocumentLocator(Locator loc) {
      // System.out.println("Setting document locator.");
      this.locator = loc;
    }

    /*
     * (non-Javadoc)
     *
     * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
     */
    public void warning(SAXParseException e) throws SAXException {
      throw e;
    }

    /**
     * Classifies a type. This returns an integer code identifying the type as one of the primitive
     * types, one of the array types, one of the list types, or a generic FS type (anything else).
     * <p>
     * The {@link LowLevelCAS#ll_getTypeClass(int)} method classifies primitives and array types,
     * but does not have a special classification for list types, which we need for XMI
     * serialization. Therefore, in addition to the type codes defined on {@link LowLevelCAS}, this
     * method can return one of the type codes TYPE_CLASS_INTLIST, TYPE_CLASS_FLOATLIST,
     * TYPE_CLASS_STRINGLIST, or TYPE_CLASS_FSLIST defined on {@link XmiCasSerializer} interface.
     *
     * @param type
     *          the type to classify
     * @return one of the TYPE_CLASS codes defined on {@link LowLevelCAS} or on the
     *         {@link XmiCasSerializer} interface.
     */
    private final int classifyType(int type) {
      // For most most types
      if (listUtils.isIntListType(type)) {
        return XmiCasSerializer.TYPE_CLASS_INTLIST;
      }
      if (listUtils.isFloatListType(type)) {
        return XmiCasSerializer.TYPE_CLASS_FLOATLIST;
      }
      if (listUtils.isStringListType(type)) {
        return XmiCasSerializer.TYPE_CLASS_STRINGLIST;
      }
      if (listUtils.isFsListType(type)) {
        return XmiCasSerializer.TYPE_CLASS_FSLIST;
      }
      return casBeingFilled.ll_getTypeClass(type);
    }
   
    private void addFsAddrXmiIdMapping(int fsAddr, int xmiId) {
      if (xmiId > 0) {
        if (mergePoint < 0) {
          //if we are not doing a merge, update the map in the XmiSerializationSharedData
          sharedData.addIdMapping(fsAddr, xmiId);
        } else {
          //if we're doing a merge, we can't update the shared map because we could
          //have duplicate xmi:id values in the different parts of the merge.
          //instead we keep a local mapping used only within this deserialization.
          localXmiIdToFsAddrMap.put(xmiId, Integer.valueOf(fsAddr));
        }
      }
    }
   
    /**
     * Gets the FS address into which the XMI element with the given ID
     * was deserialized.  This method supports merging multiple XMI documents
     * into a single CAS, by checking the XmiSerializationSharedData
     * structure to get the address of elements that are below the mergePoint
     * and are expected to already be present in the CAS.
     *
     * @param xmiId
     * @return
     */
    private int getFsAddrForXmiId(int xmiId) {
      //first check shared data (but if we're doing a merge, do so only
      //for xmi:ids below the merge point)
      if (mergePoint < 0 || !isNewFS(xmiId) ) {
        int addr = sharedData.getFsAddrForXmiId(xmiId);
        if (addr > 0) {
          return addr;
        } else {
          throw new java.util.NoSuchElementException();
        }
      } else {
        //if we're merging, then we use a local id map for FSs above the
        //merge point, since each of the different XMI CASes being merged
        //can use these same ids for different FSs.
        Integer localAddr = (Integer)localXmiIdToFsAddrMap.get(xmiId);
        if (localAddr != null) {
          return localAddr.intValue();
        } else {
          throw new java.util.NoSuchElementException();
        }
      }
    }
   
    /**
     * Adds a feature sturcture to the out-of-typesystem data.  Also sets the
     * this.outOfTypeSystemElement field, which is referred to later if we have to
     * handle features recorded as child elements.
     */
    private void addToOutOfTypeSystemData(XmlElementName xmlElementName, Attributes attrs)
            throws XCASParsingException {
      this.outOfTypeSystemElement = new OotsElementData();
      this.outOfTypeSystemElement.elementName = xmlElementName;
      String attrName, attrValue;
      for (int i = 0; i < attrs.getLength(); i++) {
        attrName = attrs.getQName(i);
        attrValue = attrs.getValue(i);
        if (attrName.equals(ID_ATTR_NAME)) {
          this.outOfTypeSystemElement.xmiId = attrValue;
        }
        else {
          this.outOfTypeSystemElement.attributes.add(
                  new XmlAttribute(attrName, attrValue));
        }
      }
      this.sharedData.addOutOfTypeSystemElement(this.outOfTypeSystemElement);
    }   

    /**
     * Adds a feature to the out-of-typesystem features list.
     * @param ootsElem object to which to add the feature
     * @param featName name of feature
     * @param featVals feature values, as a list of strings
     */
    private void addOutOfTypeSystemFeature(OotsElementData ootsElem, String featName, List<String> featVals) {
      Iterator<String> iter = featVals.iterator();
      XmlElementName elemName = new XmlElementName(null,featName,featName);
      while (iter.hasNext()) {
        ootsElem.childElements.add(new XmlElementNameAndContents(elemName, (String)iter.next()));
      }
    }
   
    private boolean isNewFS(int id) {
      return (id > this.mergePoint);
   

    private void addNonsharedFSToEncompassingFSMapping(int nonsharedFS, int encompassingFS ) {
      //System.out.println("addNonsharedFSToEncompassingFSMapping" + nonsharedFS + " " + encompassingFS);
      this.sharedData.addNonsharedRefToFSMapping(nonsharedFS, encompassingFS);
    }
  }
 
  private TypeSystemImpl ts;

  private Map<String, String> xmiNamespaceToUimaNamespaceMap = new HashMap<String, String>();
 
  /**
   * Create a new deserializer from a type system.
   * <p>
   * Note: all CAS arguments later supplied to
   * <code>getXCASHandler()</code> must have this type system as their type system.
   *
   * @param ts
   *          The type system of the CASes to be deserialized.
   */
  public XmiCasDeserializer(TypeSystem ts, UimaContext uimaContext) {
    super();
    this.ts = (TypeSystemImpl) ts;
  }

  public XmiCasDeserializer(TypeSystem ts) {
    this(ts, null);
  }

  /**
   * Create a default handler for deserializing a CAS from XMI.
   *
   * @param cas
   *          This CAS will be used to hold the data deserialized from the XMI
   *
   * @return The <code>DefaultHandler</code> to pass to the SAX parser.
   */
  public DefaultHandler getXmiCasHandler(CAS cas) {
    return getXmiCasHandler(cas, false);
  }

  /**
   * Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
   * meaning that if the XMI references Types that are not in the Type System, an Exception will be
   * thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
   * and ignore any unknown types.
   *
   * @param cas
   *          This CAS will be used to hold the data deserialized from the XMI
   * @param lenient
   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
   *          exception. The default is false.
   *
   * @return The <code>DefaultHandler</code> to pass to the SAX parser.
   */
  public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient) {
    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, null, -1, AllowPreexistingFS.ignore);
  }

  /**
   * Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
   * meaning that if the XMI references Types that are not in the Type System, an Exception will be
   * thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
   * and ignore any unknown types.
   *
   * @param cas
   *          This CAS will be used to hold the data deserialized from the XMI
   * @param lenient
   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
   *          exception. The default is false.
   * @param sharedData
   *          data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
   *          information.
   *
   * @return The <code>DefaultHandler</code> to pass to the SAX parser.
   */
  public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
          XmiSerializationSharedData sharedData) {
    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, -1, AllowPreexistingFS.ignore);
  }
 
  /**
   * Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
   * meaning that if the XMI references Types that are not in the Type System, an Exception will be
   * thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
   * and ignore any unknown types.
   *
   * @param cas
   *          This CAS will be used to hold the data deserialized from the XMI
   * @param lenient
   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
   *          exception. The default is false.
   * @param sharedData
   *          data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
   *          information.
   * @param mergePoint
   *          used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
   *          deserialization will be done, meaning the target CAS will be reset and the entire XMI
   *          content will be deserialized. If the mergePoint is nonnegative (including 0), the
   *          target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
   *          greater than the mergePoint value will be deserialized.
   * @return The <code>DefaultHandler</code> to pass to the SAX parser.
   */
  public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
          XmiSerializationSharedData sharedData, int mergePoint) {
    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, mergePoint, AllowPreexistingFS.ignore);
 
 
  /**
   * Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
   * meaning that if the XMI references Types that are not in the Type System, an Exception will be
   * thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
   * and ignore any unknown types.
   *
   * @param cas
   *          This CAS will be used to hold the data deserialized from the XMI
   * @param lenient
   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
   *          exception. The default is false.
   * @param sharedData
   *          data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
   *          information.
   * @param mergePoint
   *          used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
   *          deserialization will be done, meaning the target CAS will be reset and the entire XMI
   *          content will be deserialized. If the mergePoint is nonnegative (including 0), the
   *          target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
   *          greater than the mergePoint value will be deserialized.
   * @return The <code>DefaultHandler</code> to pass to the SAX parser.
   */
  public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
          XmiSerializationSharedData sharedData, int mergePoint, AllowPreexistingFS allow) {
    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, mergePoint, allow);
 

  /**
   * Deserializes a CAS from XMI.
   *
   * @param aStream
   *          input stream from which to read the XMI document
   * @param aCAS
   *          CAS into which to deserialize. This CAS must be set up with a type system that is
   *          compatible with that in the XMI
   *
   * @throws SAXException
   *           if an XML Parsing error occurs
   * @throws IOException
   *           if an I/O failure occurs
   */
  public static void deserialize(InputStream aStream, CAS aCAS) throws SAXException, IOException {
    XmiCasDeserializer.deserialize(aStream, aCAS, false, null, -1);
  }

  /**
   * Deserializes a CAS from XMI.
   *
   * @param aStream
   *          input stream from which to read the XCMI document
   * @param aCAS
   *          CAS into which to deserialize. This CAS must be set up with a type system that is
   *          compatible with that in the XMI
   * @param aLenient
   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
   *          exception. The default is false.
   *
   * @throws SAXException
   *           if an XML Parsing error occurs
   * @throws IOException
   *           if an I/O failure occurs
   */
  public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient)
          throws SAXException, IOException {
    deserialize(aStream, aCAS, aLenient, null, -1);
  }

  /**
   * Deserializes a CAS from XMI.
   *
   * @param aStream
   *          input stream from which to read the XCMI document
   * @param aCAS
   *          CAS into which to deserialize. This CAS must be set up with a type system that is
   *          compatible with that in the XMI
   * @param aLenient
   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
   *          exception. The default is false.
   * @param aSharedData
   *          an optional container for data that is shared between the {@link XmiCasSerializer} and the
   *          {@link XmiCasDeserializer}.  See the JavaDocs for {@link XmiSerializationSharedData} for details.
   *
   * @throws SAXException
   *           if an XML Parsing error occurs
   * @throws IOException
   *           if an I/O failure occurs
   */
  public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
          XmiSerializationSharedData aSharedData)
          throws SAXException, IOException {
    deserialize(aStream, aCAS, aLenient, aSharedData, -1);
  }
 
  /**
   * Deserializes a CAS from XMI.  This version of this method supports merging multiple XMI documents into a single CAS.
   *
   * @param aStream
   *          input stream from which to read the XCMI document
   * @param aCAS
   *          CAS into which to deserialize. This CAS must be set up with a type system that is
   *          compatible with that in the XMI
   * @param aLenient
   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
   *          exception. The default is false.
   * @param aSharedData
   *          a container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
   *          See the JavaDocs for {@link XmiSerializationSharedData} for details.
   * @param aMergePoint
   *          used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
   *          deserialization will be done, meaning the target CAS will be reset and the entire XMI
   *          content will be deserialized. If the mergePoint is nonnegative (including 0), the
   *          target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
   *          greater than the mergePoint value will be deserialized.
   * @throws SAXException
   *           if an XML Parsing error occurs
   * @throws IOException
   *           if an I/O failure occurs
   */
  public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
          XmiSerializationSharedData aSharedData, int aMergePoint)
          throws SAXException, IOException {
    XMLReader xmlReader = XMLReaderFactory.createXMLReader();
    XmiCasDeserializer deser = new XmiCasDeserializer(aCAS.getTypeSystem());
    ContentHandler handler = deser.getXmiCasHandler(aCAS, aLenient, aSharedData, aMergePoint);
    xmlReader.setContentHandler(handler);
    xmlReader.parse(new InputSource(aStream));
 
 
  /**
   * Deserializes a CAS from XMI. This version of this method supports deserializing
   * XMI document containing only deltas.  The Delta CAS XMI is in the same form
   * as a complete CAS XMI but only consists of new and modified FSs and updates
   * to Views. 
   *
   * This API is for reducing the overhead associated with serialization when calling
   * a remote service.  The service can send back only the deltas which are deserialized
   * into the original outgoing CAS.
   *
   *
   * @param aStream
   *            input stream from which to read the XCMI document
   * @param aCAS
   *            CAS into which to deserialize. This CAS must be set up with a
   *            type system that is compatible with that in the XMI
   * @param aLenient
   *            if true, unknown Types will be ignored. If false, unknown
   *            Types will cause an exception. The default is false.
   * @param aSharedData
   *            a container for data that is shared between the
   *            {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
   *            See the JavaDocs for {@link XmiSerializationSharedData} for
   *            details.
   * @param aMergePoint
   *            used to support merging multiple XMI CASes. If the mergePoint
   *            is negative, "normal" deserialization will be done, meaning
   *            the target CAS will be reset and the entire XMI content will
   *            be deserialized. If the mergePoint is nonnegative (including
   *            0), the target CAS will not be reset, and only Feature
   *            Structures whose xmi:id is strictly greater than the
   *            mergePoint value will be deserialized.
   * @param allowPreexistingFS
   *            used when deserializing delta CAS whether to allow, disallow or
   *            ignore elements representign preexisting FSs or preexisting
   *            FSs updates in View element.
   *            if IGNORE, FSs below the mergePoint are ignored and only new FSs are processed.
   *            if ALLOW,  FSs below the mergePoint are processed as well as new FSs.
   *            if DISALLOW FSs below mergePoint will cause serialization to fail. FSs below
   *               the mergePoint referenced in View element will be flagged as an error condition
   *               and will not modifiy the CAS being filled and an exception reporting this will
   *               be thrown at the end of deserialization.
   *             
   *
   * @throws SAXException
   *             if an XML Parsing error occurs
   * @throws IOException
   *             if an I/O failure occurs
   *            
   * NOTES:
   *     It is expected that Delta CAS serialization will serialize
   *     modified preexisting FSs first so that disallowed preexisting
   *     FSs are detected at the start and the CAS being filled is
   *     left untouched.  If disallowed prexisting FS is encountered in
   *     the View element, the FS is ignored and the deserialization completes
   *     but throws an exception at the end.
   *           
   *     Possible performance issue with StringListFS.
   *     When processing String, StringArrayFS and StringListFS features of a preexisting FS,
   *     the string value in the CAS is updated only if it is not equal to the incoming string value.
   *     Processing of a StringListFS where a new string value has been inserted, all subsequent
   *     strings in the list will be updated with new strings.  
   *    
   */
  public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
      XmiSerializationSharedData aSharedData, int aMergePoint, AllowPreexistingFS allowPreexistingFS)
  throws SAXException, IOException {
    XMLReader xmlReader = XMLReaderFactory.createXMLReader();
    XmiCasDeserializer deser = new XmiCasDeserializer(aCAS.getTypeSystem());
    ContentHandler handler = deser.getXmiCasHandler(aCAS, aLenient, aSharedData, aMergePoint, allowPreexistingFS);
    xmlReader.setContentHandler(handler);
    xmlReader.parse(new InputSource(aStream));
 
 
  /**
   * Converts an XMI element name to a UIMA-style dotted type name.
   *
   * @param nsUri
   *          the namespace URI of the XMI element
   * @param localName
   *          the local name of the XMI element
   *
   * @return the UIMA type name corresponding to the XMI element name
   */
  private String xmiElementName2uimaTypeName(String nsUri, String localName) throws SAXException {
    // check map first to see if we've already computed the namespace mapping
    String uimaNamespace = (String) xmiNamespaceToUimaNamespaceMap.get(nsUri);
    if (uimaNamespace == null) {
      // check for the special "no-namespace" URI, which is used for UIMA types with no namespace
      if (XmiCasSerializer.DEFAULT_NAMESPACE_URI.equals(nsUri)) {
        uimaNamespace = "";
      } else {
        // Our convention is that the UIMA namespace is the URI path, with leading slashes
        // removed, trailing ".ecore" removed, and internal slashes converted to dots
        java.net.URI uri;
        try {
          uri = new URI(nsUri);
        } catch (URISyntaxException e) {
          throw new SAXException(e);
        }
        String path = uri.getPath();
        while (path.startsWith("/")) {
          path = path.substring(1);
        }
        if (path.endsWith(".ecore")) {
          path = path.substring(0, path.length() - 6);
        }
        uimaNamespace = path.replace('/', '.') + '.'; // include trailing dot for convenience
      }
      xmiNamespaceToUimaNamespaceMap.put(nsUri, uimaNamespace);
    }
    return uimaNamespace + localName;
  }
}
TOP

Related Classes of org.apache.uima.cas.impl.XmiCasDeserializer$XmiCasDeserializerHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.