Package org.fcrepo.server.storage.translation

Source Code of org.fcrepo.server.storage.translation.METSFedoraExtDODeserializer

/* The contents of this file are subject to the license and copyright terms
* detailed in the license directory at the root of the source tree (also
* available online at http://fedora-commons.org/license/).
*/

package org.fcrepo.server.storage.translation;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;

import java.text.ParseException;

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import org.fcrepo.common.Constants;
import org.fcrepo.common.xml.format.XMLFormat;
import org.fcrepo.common.xml.namespace.XMLNamespace;
import org.fcrepo.server.errors.ObjectIntegrityException;
import org.fcrepo.server.errors.StreamIOException;
import org.fcrepo.server.errors.ValidationException;
import org.fcrepo.server.storage.types.AuditRecord;
import org.fcrepo.server.storage.types.DSBinding;
import org.fcrepo.server.storage.types.DSBindingMap;
import org.fcrepo.server.storage.types.Datastream;
import org.fcrepo.server.storage.types.DatastreamManagedContent;
import org.fcrepo.server.storage.types.DatastreamReferencedContent;
import org.fcrepo.server.storage.types.DatastreamXMLMetadata;
import org.fcrepo.server.storage.types.DigitalObject;
import org.fcrepo.server.storage.types.Disseminator;
import org.fcrepo.server.utilities.StreamUtility;
import org.fcrepo.server.validation.ValidationUtility;
import org.fcrepo.utilities.Base64;
import org.fcrepo.utilities.DateUtility;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;




/**
* Deserializes objects in the constructor-provided version of the METS Fedora
* Extension format.
*
* @author Sandy Payette
* @author Chris Wilper
*/
@SuppressWarnings("deprecation")
public class METSFedoraExtDODeserializer
        extends DefaultHandler
        implements Constants, DODeserializer {

    /**
     * The format this deserializer will read if unspecified at construction.
     * This defaults to the latest FOXML format.
     */
    public static final XMLFormat DEFAULT_FORMAT = METS_EXT1_1;

    private static final Logger logger =
            LoggerFactory.getLogger(METSFedoraExtDODeserializer.class);

    private static final SAXParserFactory spf = SAXParserFactory.newInstance();
    static {
        spf.setValidating(false);
        spf.setNamespaceAware(true);
    }
   
    /** The format this deserializer reads. */
    private final XMLFormat m_format;

    /** The xlink namespace this deserializer understands; depends on format. */
    private final XMLNamespace m_xlink;

    /** The current translation context. */
    private int m_transContext;

    /** The object to deserialize to. */
    private DigitalObject m_obj;

    /** Buffer to build RDF expression of ADMID and DMDID relationships * */
    private StringBuffer m_relsBuffer;

    private boolean hasRels = false;

    /** Hashtables to record DMDID references */
    private HashMap<String, List<String>> m_dsDMDIDs; // key=dsVersionID, value=ArrayList of dsID

    /** Hashtables to record ADMID references */
    private HashMap<String, List<String>> m_dsADMIDs; // key=dsVersionID, value=ArrayList of dsID

    /** Hashtables to correlate audit record ids to datastreams */
    private HashMap<String, String> m_AuditIdToComponentId;

    private SAXParser m_parser;

    private String m_characterEncoding;

    /** Namespace prefix-to-URI mapping info from SAX2 startPrefixMapping events. */
    private HashMap<String, String> m_prefixMap;

    private HashMap<String, String> m_localPrefixMap;

    private ArrayList<String> m_prefixList;

    /** Variables to parse into */
    private boolean m_rootElementFound;

    private String m_agentRole;

    private String m_dsId;

    private String m_dsVersId;

    private Date m_dsCreateDate;

    private String m_dissemId;

    private String m_dissemState;

    private String m_dsState;

    private String m_dsInfoType;

    private String m_dsOtherInfoType;

    private String m_dsLabel;

    private int m_dsMDClass;

    private long m_dsSize;

    private String m_dsLocation;

    private String m_dsLocationType;

    private String m_dsMimeType;

    private String m_dsControlGrp;

    private boolean m_dsVersionable;

    private String m_dsFormatURI;

    private String[] m_dsAltIDs;

    private String m_dsChecksum;

    private String m_dsChecksumType;

    private StringBuffer m_dsXMLBuffer;

    // are we reading binary in an FContent element? (base64-encoded)
    private boolean m_readingContent; // indicates reading element content

    private boolean m_readingBinaryContent; // indicates reading binary element content

    private File m_binaryContentTempFile;

    private StringBuffer m_elementContent; // single element

    /** While parsing, are we inside XML metadata? */
    private boolean m_inXMLMetadata;

    /**
     * Used to differentiate between a metadata section in this object and a
     * metadata section in an inline XML datastream that happens to be a METS
     * document.
     */
    private int m_xmlDataLevel;

    /** String buffer for audit element contents */
    private StringBuffer m_auditBuffer;

    private String m_auditId;

    private String m_auditProcessType;

    private String m_auditAction;

    private String m_auditComponentID;

    private String m_auditResponsibility;

    private String m_auditDate;

    private String m_auditJustification;

    /**
     * Hashmap for holding disseminators during parsing, keyed by structMapId
     */
    private HashMap<String, Disseminator> m_dissems;

    /**
     * Currently-being-initialized disseminator, during structmap parsing.
     */
    private Disseminator m_diss;

    /**
     * Whether, while in structmap, we've already seen a div
     */
    private boolean m_indiv;

    /** The structMapId of the dissem currently being parsed. */
    private String m_structId;

    /**
     * Creates a deserializer that reads the default Fedora METS Extension
     * format.
     */
    public METSFedoraExtDODeserializer() {
        this(DEFAULT_FORMAT);
    }

    /**
     * Creates a deserializer that reads the given Fedora METS Extension format.
     *
     * @param format
     *        the version-specific Fedora METS Extension format.
     * @throws IllegalArgumentException
     *         if format is not a known Fedora METS Extension format.
     */
    public METSFedoraExtDODeserializer(XMLFormat format) {
        if (format.equals(METS_EXT1_0)) {
            m_xlink = OLD_XLINK;
        } else if (format.equals(METS_EXT1_1)) {
            m_xlink = XLINK;
        } else {
            throw new IllegalArgumentException("Not a METSFedoraExt format: "
                    + format.uri);
        }
        m_format = format;
    }

    //---
    // DODeserializer implementation
    //---

    /**
     * {@inheritDoc}
     */
    public DODeserializer getInstance() {
        return new METSFedoraExtDODeserializer(m_format);
    }

    /**
     * {@inheritDoc}
     */
    public void deserialize(InputStream in,
                            DigitalObject obj,
                            String encoding,
                            int transContext) throws ObjectIntegrityException,
            StreamIOException, UnsupportedEncodingException {
        logger.debug("Deserializing " + m_format.uri + " for transContext: "
                + transContext);

        // initialize sax for this parse
        try {
            m_parser = spf.newSAXParser();
        } catch (Exception e) {
            throw new RuntimeException("Error initializing SAX parser", e);
        }

        m_obj = obj;
        m_obj.setOwnerId("");
        m_obj.setLabel("");
        m_characterEncoding = encoding;
        m_transContext = transContext;
        initialize();
        try {
            m_parser.parse(in, this);
        } catch (IOException ioe) {
            throw new StreamIOException("Low-level stream IO problem occurred "
                    + "while SAX parsing this object.");
        } catch (SAXException se) {
            throw new ObjectIntegrityException("METS stream was bad : "
                    + se.getMessage());
        }
        if (!m_rootElementFound) {
            throw new ObjectIntegrityException("METS root element not found");
        }

        // POST-PROCESSING...
        // convert audit records to contain component ids
        convertAudits();
        // preserve ADMID and DMDID relationships in a RELS-INT
        // datastream, if one does not already exist.
        createRelsInt();

        DOTranslationUtility.normalizeDatastreams(m_obj,
                                                  m_transContext,
                                                  m_characterEncoding);

        if (m_format.equals(METS_EXT1_0)) {
            // DISSEMINATORS... put disseminators in the instantiated digital
            // object
            Iterator<Disseminator> dissemIter = m_dissems.values().iterator();
            while (dissemIter.hasNext()) {
                Disseminator diss = dissemIter.next();
                m_obj.disseminators(diss.dissID).add(diss);
            }
        }

    }

    //---
    // DefaultHandler overrides
    //---

    /**
     * {@inheritDoc}
     */
    @Override
    public void startPrefixMapping(String prefix, String uri) {
        // Keep the prefix map up-to-date throughout the entire parse,
        // and maintain a list of newly mapped prefixes on a per-element basis.
        m_prefixMap.put(prefix, uri);
        if (m_inXMLMetadata) {
            m_localPrefixMap.put(prefix, uri);
            m_prefixList.add(prefix);
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void endPrefixMapping(String prefix) {
        m_prefixMap.remove(prefix);
        if (m_inXMLMetadata) {
            m_localPrefixMap.remove(prefix);
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void startElement(String uri,
                             String localName,
                             String qName,
                             Attributes a) throws SAXException {
        if (uri.equals(METS.uri) && !m_inXMLMetadata) {
            // a new mets element is starting
            if (localName.equals("mets")) {
                m_rootElementFound = true;
                m_obj.setPid(grab(a, METS.uri, "OBJID"));
                m_obj.setLabel(grab(a, METS.uri, "LABEL"));
                if (m_format.equals(METS_EXT1_0)) {
                    // In METS_EXT 1.0, the PROFILE attribute mapped to an
                    // object property, fedora-model:contentModel.  This will be
                    // retained as an extended property in the DigitalObject.
                    m_obj.setExtProperty(MODEL.CONTENT_MODEL.uri,
                                         grab(a, METS.uri, "PROFILE"));
                    // Similarly, the TYPE attribute mapped to rdf:type, and
                    // will also be retained as an external property.
                    m_obj.setExtProperty(RDF.TYPE.uri,
                                         grab(a, METS.uri, "TYPE"));
                }
            } else if (localName.equals("metsHdr")) {
                m_obj.setCreateDate(DateUtility
                        .convertStringToDate(grab(a, METS.uri, "CREATEDATE")));
                m_obj.setLastModDate(DateUtility
                        .convertStringToDate(grab(a, METS.uri, "LASTMODDATE")));
                try {
                    m_obj.setState(DOTranslationUtility
                        .readStateAttribute(grab(a, METS.uri, "RECORDSTATUS")));
                } catch (ParseException e) {
                    throw new SAXException("Could not read object state", e);
                }
            } else if (localName.equals("agent")) {
                m_agentRole = grab(a, METS.uri, "ROLE");
            } else if (localName.equals("name")
                    && m_agentRole.equals("IPOWNER")) {
                m_readingContent = true;
                m_elementContent = new StringBuffer();
            } else if (localName.equals("amdSec")) {
                m_dsId = grab(a, METS.uri, "ID");
                m_dsState = grab(a, METS.uri, "STATUS");
                String dsVersionable = grab(a, METS.uri, "VERSIONABLE");
                if (dsVersionable != null && !dsVersionable.equals("")) {
                    m_dsVersionable =
                            new Boolean(grab(a, METS.uri, "VERSIONABLE"))
                                    .booleanValue();
                } else {
                    m_dsVersionable = true;
                }
            } else if (localName.equals("dmdSecFedora")) {
                m_dsId = grab(a, METS.uri, "ID");
                m_dsState = grab(a, METS.uri, "STATUS");
                String dsVersionable = grab(a, METS.uri, "VERSIONABLE");
                if (dsVersionable != null && !dsVersionable.equals("")) {
                    m_dsVersionable =
                            new Boolean(grab(a, METS.uri, "VERSIONABLE"))
                                    .booleanValue();
                } else {
                    m_dsVersionable = true;
                }
            } else if (localName.equals("techMD") || localName.equals("descMD")
                    || localName.equals("sourceMD")
                    || localName.equals("rightsMD")
                    || localName.equals("digiprovMD")) {
                m_dsVersId = grab(a, METS.uri, "ID");
                if (localName.equals("techMD")) {
                    m_dsMDClass = DatastreamXMLMetadata.TECHNICAL;
                }
                if (localName.equals("sourceMD")) {
                    m_dsMDClass = DatastreamXMLMetadata.SOURCE;
                }
                if (localName.equals("rightsMD")) {
                    m_dsMDClass = DatastreamXMLMetadata.RIGHTS;
                }
                if (localName.equals("digiprovMD")) {
                    m_dsMDClass = DatastreamXMLMetadata.DIGIPROV;
                }
                if (localName.equals("descMD")) {
                    m_dsMDClass = DatastreamXMLMetadata.DESCRIPTIVE;
                }
                String dateString = grab(a, METS.uri, "CREATED");
                if (dateString != null && !dateString.equals("")) {
                    m_dsCreateDate =
                            DateUtility.convertStringToDate(dateString);
                }
            } else if (localName.equals("mdWrap")) {
                m_dsInfoType = grab(a, METS.uri, "MDTYPE");
                m_dsOtherInfoType = grab(a, METS.uri, "OTHERMDTYPE");
                m_dsLabel = grab(a, METS.uri, "LABEL");
                m_dsMimeType = grab(a, METS.uri, "MIMETYPE");
                m_dsFormatURI = grab(a, METS.uri, "FORMAT_URI");
                String altIDs = grab(a, METS.uri, "ALT_IDS");
                if (altIDs.length() == 0) {
                    m_dsAltIDs = new String[0];
                } else {
                    m_dsAltIDs = altIDs.split(" ");
                }
                m_dsChecksum = grab(a, METS.uri, "CHECKSUM");
                m_dsChecksumType = grab(a, METS.uri, "CHECKSUMTYPE");
            } else if (localName.equals("xmlData")) {
                m_dsXMLBuffer = new StringBuffer();
                m_xmlDataLevel = 0;
                m_inXMLMetadata = true;
            } else if (localName.equals("fileGrp")) {
                m_dsId = grab(a, METS.uri, "ID");
                String dsVersionable = grab(a, METS.uri, "VERSIONABLE");
                if (dsVersionable != null && !dsVersionable.equals("")) {
                    m_dsVersionable =
                            new Boolean(grab(a, METS.uri, "VERSIONABLE"))
                                    .booleanValue();
                } else {
                    m_dsVersionable = true;
                }
                // reset the values for the next file
                m_dsVersId = "";
                m_dsCreateDate = null;
                m_dsMimeType = "";
                m_dsControlGrp = "";
                m_dsFormatURI = "";
                m_dsAltIDs = new String[0];
                m_dsState = grab(a, METS.uri, "STATUS");
                m_dsSize = -1;
                m_dsChecksum = "";
                m_dsChecksumType = "";
            } else if (localName.equals("file")) {
                m_dsVersId = grab(a, METS.uri, "ID");
                String dateString = grab(a, METS.uri, "CREATED");
                if (dateString != null && !dateString.equals("")) {
                    m_dsCreateDate =
                            DateUtility.convertStringToDate(dateString);
                }
                m_dsMimeType = grab(a, METS.uri, "MIMETYPE");
                m_dsControlGrp = grab(a, METS.uri, "OWNERID");
                String ADMID = grab(a, METS.uri, "ADMID");
                if (ADMID != null && !"".equals(ADMID)) {
                    ArrayList<String> al = new ArrayList<String>();
                    if (ADMID.indexOf(" ") != -1) {
                        String[] admIds = ADMID.split(" ");
                        for (String element : admIds) {
                            al.add(element);
                        }
                    } else {
                        al.add(ADMID);
                    }
                    m_dsADMIDs.put(m_dsVersId, al);
                }
                String DMDID = grab(a, METS.uri, "DMDID");
                if (DMDID != null && !"".equals(DMDID)) {
                    ArrayList<String> al = new ArrayList<String>();
                    if (DMDID.indexOf(" ") != -1) {
                        String[] dmdIds = DMDID.split(" ");
                        for (String element : dmdIds) {
                            al.add(element);
                        }
                    } else {
                        al.add(DMDID);
                    }
                    m_dsDMDIDs.put(m_dsVersId, al);
                }
                String sizeString = grab(a, METS.uri, "SIZE");
                if (sizeString != null && !sizeString.equals("")) {
                    try {
                        m_dsSize = Long.parseLong(sizeString);
                    } catch (NumberFormatException nfe) {
                        throw new SAXException("If specified, a datastream's "
                                + "SIZE attribute must be an xsd:long.");
                    }
                }
                String formatURI = grab(a, METS.uri, "FORMAT_URI");
                if (formatURI != null && !formatURI.equals("")) {
                    m_dsFormatURI = formatURI;
                }
                String altIDs = grab(a, METS.uri, "ALT_IDS");
                if (altIDs.length() == 0) {
                    m_dsAltIDs = new String[0];
                } else {
                    m_dsAltIDs = altIDs.split(" ");
                }
                m_dsChecksum = grab(a, METS.uri, "CHECKSUM");
                m_dsChecksumType = grab(a, METS.uri, "CHECKSUMTYPE");
                // inside a "file" element, it's either going to be
                // FLocat (a reference) or FContent (inline)
            } else if (localName.equals("FLocat")) {
                m_dsLabel = grab(a, m_xlink.uri, "title");
                String dsLocation = grab(a, m_xlink.uri, "href");
                if (dsLocation == null || dsLocation.equals("")) {
                    throw new SAXException("xlink:href must be specified in FLocat element");
                }

                if (m_dsControlGrp.equalsIgnoreCase("E")
                        || m_dsControlGrp.equalsIgnoreCase("R")) {

                    // URL FORMAT VALIDATION for dsLocation:
                    // make sure we have a properly formed URL (must have protocol)
                    try {
                        ValidationUtility.validateURL(dsLocation, m_dsControlGrp);
                    } catch (ValidationException ve) {
                        throw new SAXException(ve.getMessage());
                    }
                    // system will set dsLocationType for E and R datastreams...
                    m_dsLocationType = Datastream.DS_LOCATION_TYPE_URL;
                    m_dsInfoType = "DATA";
                    m_dsLocation = dsLocation;
                    instantiateDatastream(new DatastreamReferencedContent());
                } else if (m_dsControlGrp.equalsIgnoreCase("M")) {
                    // URL FORMAT VALIDATION for dsLocation:
                    // For Managed Content the URL is only checked when we are parsing a
                    // a NEW ingest file because the URL is replaced with an internal identifier
                    // once the repository has sucked in the content for storage.
                    if (m_obj.isNew()) {
                        try {
                            ValidationUtility.validateURL(dsLocation, m_dsControlGrp);
                            m_dsLocationType = Datastream.DS_LOCATION_TYPE_URL;
                        } catch (ValidationException ve) {
                            throw new SAXException(ve.getMessage());
                        }
                    } else {
                        m_dsLocationType = Datastream.DS_LOCATION_TYPE_INTERNAL;
                    }
                    m_dsInfoType = "DATA";
                    m_dsLocation = dsLocation;
                    instantiateDatastream(new DatastreamManagedContent());
                }
            } else if (localName.equals("FContent")) {
                // In METS_EXT, the FContent element contains base64-encoded
                // data.
                m_readingContent = true;
                m_elementContent = new StringBuffer();
                if (m_dsControlGrp.equalsIgnoreCase("M")) {
                    m_readingBinaryContent = true;
                    m_binaryContentTempFile = null;
                    try {
                        m_binaryContentTempFile =
                                File.createTempFile("binary-datastream", null);
                    } catch (IOException ioe) {
                        throw new SAXException(new StreamIOException("Unable to create temporary file for binary content"));
                    }
                }

            } else if (m_format.equals(METS_EXT1_0)) {
                startDisseminators(localName, a);
            }
        } else {
            if (m_inXMLMetadata) {
                // must be in xmlData... just output it, remembering the number
                // of METS:xmlData elements we see
                appendElementStart(uri, localName, qName, a, m_dsXMLBuffer);

                // METS INSIDE METS! we have an inline XML datastream
                // that is itself METS.  We do not want to parse this!
                if (uri.equals(METS.uri) && localName.equals("xmlData")) {
                    m_xmlDataLevel++;
                }
                // remember this stuff... (we don't have to look at level
                // because the audit schema doesn't allow for xml elements inside
                // these, so they're never set incorrectly)
                // signaling that we're interested in sending char data to
                // the m_auditBuffer by making it non-null, and getting
                // ready to accept data by allocating a new StringBuffer
                if (m_dsId.equals("FEDORA-AUDITTRAIL")
                        || m_dsId.equals("AUDIT")) {
                    if (localName.equals("record")) {
                        m_auditId = grab(a, uri, "ID");
                    } else if (localName.equals("process")) {
                        m_auditProcessType = grab(a, uri, "type");
                    } else if (localName.equals("action")
                            || localName.equals("componentID")
                            || localName.equals("responsibility")
                            || localName.equals("date")
                            || localName.equals("justification")) {
                        m_auditBuffer = new StringBuffer();
                    }
                }
            } else {
                // ignore all else
            }
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void characters(char[] ch, int start, int length) {
        if (m_inXMLMetadata) {
            if (m_auditBuffer != null) {
                m_auditBuffer.append(ch, start, length);
            } else {
                // since this data is encoded straight back to xml,
                // we need to make sure special characters &, <, >, ", and '
                // are re-converted to the xml-acceptable equivalents.
                StreamUtility.enc(ch, start, length, m_dsXMLBuffer);
            }
        } else if (m_readingContent) {
            // read normal element content into a string buffer
            if (m_elementContent != null) {
                m_elementContent.append(ch, start, length);
            }
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void endElement(String uri, String localName, String qName)
            throws SAXException {
        // first, deal with the situation when we are processing a block of inline XML
        if (m_inXMLMetadata) {
            if (uri.equals(METS.uri) && localName.equals("xmlData")
                    && m_xmlDataLevel == 0) {
                // finished all xml metadata for this datastream
                if (m_dsId.equals("FEDORA-AUDITTRAIL")
                        || m_dsId.equals("AUDIT")) {
                    // we've been looking at an audit trail... set audit record
                    AuditRecord a = new AuditRecord();
                    // In METS each audit record is in its own <digiprovMD>
                    // element within an <amdSec>.  So, pick up the XML ID
                    // of the <digiprovMD> element for the audit record id.
                    // This amdSec is treated like a datastream, and each
                    // digiprovMD is a version, so id was parsed into dsVersId.
                    a.id = m_auditId; //m_dsVersId;
                    a.processType = m_auditProcessType;
                    a.action = m_auditAction;
                    a.componentID = m_auditComponentID;
                    a.responsibility = m_auditResponsibility;
                    a.date = DateUtility.convertStringToDate(m_auditDate);
                    a.justification = m_auditJustification;
                    m_obj.getAuditRecords().add(a);
                    m_inXMLMetadata = false; // other stuff is re-initted upon
                    // startElement for next xml metadata
                    // element
                } else {
                    // Create the right kind of datastream and add to the object
                    DatastreamXMLMetadata ds = new DatastreamXMLMetadata();
                    instantiateXMLDatastream(ds);
                    m_inXMLMetadata = false;
                    m_localPrefixMap.clear();
                }
            } else {
                // finished an element within inline xml metadata
                m_dsXMLBuffer.append("</" + qName + ">");
                // make sure we know when to pay attention to METS again
                if (uri.equals(METS.uri) && localName.equals("xmlData")) {
                    m_xmlDataLevel--;
                }
                if (m_dsId.equals("FEDORA-AUDITTRAIL")
                        || m_dsId.equals("AUDIT")) {
                    if (localName.equals("action")) {
                        m_auditAction = m_auditBuffer.toString();
                        m_auditBuffer = null;
                    } else if (localName.equals("componentID")) {
                        m_auditComponentID = m_auditBuffer.toString();
                        m_auditBuffer = null;
                    } else if (localName.equals("responsibility")) {
                        m_auditResponsibility = m_auditBuffer.toString();
                        m_auditBuffer = null;
                    } else if (localName.equals("date")) {
                        m_auditDate = m_auditBuffer.toString();
                        m_auditBuffer = null;
                    } else if (localName.equals("justification")) {
                        m_auditJustification = m_auditBuffer.toString();
                        m_auditBuffer = null;
                    }
                }
            }
            // ALL OTHER ELEMENT CASES: we are NOT processing a block of inline XML metadata
        } else {
            if (m_readingBinaryContent) {
                // In the version of METS Fedora uses, FContent assumes base64-encoded content
                if (uri.equals(METS.uri) && localName.equals("FContent")) {
                    if (m_binaryContentTempFile != null) {
                        try {
                            FileOutputStream os =
                                    new FileOutputStream(m_binaryContentTempFile);
                            // remove all spaces and newlines, this might not be necessary.
                            String elementStr =
                                    m_elementContent.toString()
                                            .replaceAll("\\s", "");
                            byte elementBytes[] = Base64.decode(elementStr);
                            os.write(elementBytes);
                            os.close();
                            m_dsLocationType = Datastream.DS_LOCATION_TYPE_INTERNAL;
                            m_dsLocation =
                                DatastreamManagedContent.TEMP_SCHEME
                                            + m_binaryContentTempFile
                                                    .getAbsolutePath();
                            instantiateDatastream(new DatastreamManagedContent());
                        } catch (FileNotFoundException fnfe) {
                            throw new SAXException(new StreamIOException("Unable to open temporary file created for binary content"));
                        } catch (IOException fnfe) {
                            throw new SAXException(new StreamIOException("Error writing to temporary file created for binary content"));
                        }
                    }
                }
                m_binaryContentTempFile = null;
                m_readingBinaryContent = false;
                m_elementContent = null;
                // all other cases...
            } else {
                if (m_readingContent) {
                    // elements for which we were reading regular content
                    if (uri.equals(METS.uri) && localName.equals("name")
                            && m_agentRole.equals("IPOWNER")) {
                        m_obj.setOwnerId(m_elementContent.toString());
                    } else if (uri.equals(METS.uri)
                            && localName.equals("agent")) {
                        m_agentRole = null;
                    }
                    m_readingContent = false;
                    m_elementContent = null;
                } else {
                    // no other processing requirements at this time
                }
            }
        }
    }

    //---
    // Instance helpers
    //---

    private void startDisseminators(String localName, Attributes a)
            throws SAXException {
        if (localName.equals("structMap")) {
            // this is a component of a disseminator.  here we assume the rest
            // of the disseminator's information will be seen later, so we
            // construct a new Disseminator object to hold the structMap...
            // and later, the other info
            //
            // Building up a global map of Disseminators, m_dissems,
            // keyed by bindingmap ID.
            //
            if (grab(a, METS.uri, "TYPE").equals("fedora:dsBindingMap")) {
                String bmId = grab(a, METS.uri, "ID");
                if (bmId == null || bmId.equals("")) {
                    throw new SAXException("structMap with TYPE "
                            + "fedora:dsBindingMap must specify a non-empty "
                            + "ID attribute.");
                } else {
                    Disseminator diss = new Disseminator();
                    diss.dsBindMapID = bmId;
                    m_dissems.put(bmId, diss);
                    m_diss = diss;
                    m_diss.dsBindMap = new DSBindingMap();
                    m_diss.dsBindMap.dsBindMapID = bmId;
                    m_indiv = false; // flag we're not looking at inner part yet
                }
            } else {
                throw new SAXException("StructMap must have TYPE fedora:dsBindingMap");
            }
        } else if (localName.equals("div")) {
            if (m_indiv) {
                // inner part of structmap
                DSBinding binding = new DSBinding();
                if (m_diss.dsBindMap.dsBindings == null) {
                    // none yet.. create array of size one
                    DSBinding[] bindings = new DSBinding[1];
                    m_diss.dsBindMap.dsBindings = bindings;
                    m_diss.dsBindMap.dsBindings[0] = binding;
                } else {
                    // need to expand the array size by one,
                    // and do an array copy.
                    int curSize = m_diss.dsBindMap.dsBindings.length;
                    DSBinding[] oldArray = m_diss.dsBindMap.dsBindings;
                    DSBinding[] newArray = new DSBinding[curSize + 1];
                    for (int i = 0; i < curSize; i++) {
                        newArray[i] = oldArray[i];
                    }
                    newArray[curSize] = binding;
                    m_diss.dsBindMap.dsBindings = newArray;
                }
                // now populate 'binding' values...we'll have
                // everything at this point except datastreamID...
                // that comes as a child: <fptr FILEID="DS2"/>
                binding.bindKeyName = grab(a, METS.uri, "TYPE");
                binding.bindLabel = grab(a, METS.uri, "LABEL");
                binding.seqNo = grab(a, METS.uri, "ORDER");
            } else {
                m_indiv = true;
                // first (outer div) part of structmap
                m_diss.dsBindMap.dsBindMechanismPID = grab(a, METS.uri, "TYPE");
                m_diss.dsBindMap.dsBindMapLabel = grab(a, METS.uri, "LABEL");
            }
        } else if (localName.equals("fptr")) {
            // assume we're inside the inner div... that's the
            // only place the fptr element is valid.
            DSBinding binding =
                    m_diss.dsBindMap.dsBindings[m_diss.dsBindMap.dsBindings.length - 1];
            binding.datastreamID = grab(a, METS.uri, "FILEID");
        } else if (localName.equals("behaviorSec")) {
            // looks like we're in a disseminator... it should be in the
            // hash by now because we've already gone through structmaps
            // ...keyed by structmap id... remember the id (group id)
            // so we can put it in when parsing serviceBinding
            m_dissemId = grab(a, METS.uri, "ID");
            m_dissemState = grab(a, METS.uri, "STATUS");
        } else if (localName.equals("serviceBinding")) {
            // remember the structId so we can grab the right dissem
            // when parsing children
            m_structId = grab(a, METS.uri, "STRUCTID");
            // grab the disseminator associated with the provided structId
            Disseminator dissem = m_dissems.get(m_structId);
            // plug known items in..
            dissem.dissID = m_dissemId;
            dissem.dissState = m_dissemState;
            // then grab the new stuff for the dissem for this element, and
            // put it in.
            dissem.dissVersionID = grab(a, METS.uri, "ID");
            dissem.bDefID = grab(a, METS.uri, "BTYPE");
            dissem.dissCreateDT =
                    DateUtility
                            .convertStringToDate(grab(a, METS.uri, "CREATED"));
            dissem.dissLabel = grab(a, METS.uri, "LABEL");
        } else if (localName.equals("interfaceMD")) {
            Disseminator dissem = m_dissems.get(m_structId);
        } else if (localName.equals("serviceBindMD")) {
            Disseminator dissem = m_dissems.get(m_structId);
            dissem.sDepID = grab(a, m_xlink.uri, "href");
        }
    }

    private void appendElementStart(String uri,
                                    String localName,
                                    String qName,
                                    Attributes a,
                                    StringBuffer out) {
        out.append("<" + qName);
        // add the current qName's namespace to m_localPrefixMap
        // and m_prefixList if it's not already in m_localPrefixMap
        // This ensures that all namespaces used in inline XML are declared within,
        // since it's supposed to be a standalone chunk.
        String[] parts = qName.split(":");
        if (parts.length == 2) {
            String nsuri = m_localPrefixMap.get(parts[0]);
            if (nsuri == null) {
                m_localPrefixMap.put(parts[0], parts[1]);
                m_prefixList.add(parts[0]);
            }
        }
        // do we have any newly-mapped namespaces?
        while (m_prefixList.size() > 0) {
            String prefix = m_prefixList.remove(0);
            out.append(" xmlns");
            if (prefix.length() > 0) {
                out.append(":");
            }
            out.append(prefix + "=\""
                    + StreamUtility.enc(m_prefixMap.get(prefix))
                    + "\"");
        }
        for (int i = 0; i < a.getLength(); i++) {
            out.append(" " + a.getQName(i) + "=\""
                    + StreamUtility.enc(a.getValue(i)) + "\"");
        }
        out.append(">");
    }

    private void instantiateDatastream(Datastream ds) throws SAXException {

        // set datastream variables with values grabbed from the SAX parse
        ds.DatastreamID = m_dsId;
        ds.DSVersionable = m_dsVersionable;
        ds.DSFormatURI = m_dsFormatURI;
        ds.DatastreamAltIDs = m_dsAltIDs;
        ds.DSVersionID = m_dsVersId;
        ds.DSLabel = m_dsLabel;
        ds.DSCreateDT = m_dsCreateDate;
        ds.DSMIME = m_dsMimeType;
        ds.DSControlGrp = m_dsControlGrp;
        ds.DSState = m_dsState;
        ds.DSLocation = m_dsLocation;
        ds.DSLocationType = m_dsLocationType;
        ds.DSInfoType = m_dsInfoType;

        if (m_dsChecksumType == null || "".equals(m_dsChecksumType)){
            ds.DSChecksumType = (Datastream.autoChecksum)
                    ? Datastream.getDefaultChecksumType()
                    : Datastream.CHECKSUMTYPE_DISABLED;
        }
        logger.debug("instantiate datastream: dsid = " + m_dsId
                + "checksumType = " + m_dsChecksumType + "checksum = "
                + m_dsChecksum);
        if (m_obj.isNew()) {
            if (m_dsChecksum != null && !m_dsChecksum.equals("")
                    && !m_dsChecksum.equals(Datastream.CHECKSUM_NONE)) {
                String tmpChecksum = ds.getChecksum();
                logger.debug("checksum = " + tmpChecksum);
                if (!m_dsChecksum.equals(tmpChecksum)) {
                    throw new SAXException(new ValidationException("Checksum Mismatch: "
                            + tmpChecksum));
                }
            }
            ds.DSChecksumType = ds.getChecksumType();
        } else {
            ds.DSChecksum = m_dsChecksum;
        }

        // Normalize the dsLocation for the deserialization context
        ds.DSLocation =
                (DOTranslationUtility.normalizeDSLocationURLs(m_obj.getPid(),
                                                              ds,
                                                              m_transContext)).DSLocation;

        // FINALLY! add the datastream to the digital object instantiation
        m_obj.addDatastreamVersion(ds, true);
    }

    private void instantiateXMLDatastream(DatastreamXMLMetadata ds)
            throws SAXException {

        // set the attrs common to all datastream versions
        ds.DatastreamID = m_dsId;
        ds.DSVersionable = m_dsVersionable;
        ds.DSFormatURI = m_dsFormatURI;
        ds.DatastreamAltIDs = m_dsAltIDs;
        ds.DSVersionID = m_dsVersId;
        ds.DSLabel = m_dsLabel;
        ds.DSCreateDT = m_dsCreateDate;
        if (m_dsMimeType == null || m_dsMimeType.equals("")) {
            ds.DSMIME = "text/xml";
        } else {
            ds.DSMIME = m_dsMimeType;
        }
        // set the attrs specific to datastream version
        ds.DSControlGrp = "X";
        ds.DSState = m_dsState;
        ds.DSLocation = m_obj.getPid() + "+" + m_dsId + "+" + m_dsVersId;
        ds.DSLocationType = m_dsLocationType;
        ds.DSInfoType = m_dsInfoType; // METS only
        ds.DSMDClass = m_dsMDClass; // METS only
        ds.DSChecksumType = m_dsChecksumType;

        // now set the xml content stream itself...
        try {
            String xmlString = m_dsXMLBuffer.toString();
            ds.xmlContent = xmlString.getBytes(m_characterEncoding);
            //LOOK! this sets bytes, not characters.  Do we want to set this?
            ds.DSSize = ds.xmlContent.length;
        } catch (Exception uee) {
            logger.debug("Error processing inline xml content in SAX parse: "
                    + uee.getMessage());
        }

        logger.debug("instantiate datastream: dsid = " + m_dsId
                + "checksumType = " + m_dsChecksumType + "checksum = "
                + m_dsChecksum);
        if (m_obj.isNew()) {
            if (m_dsChecksum != null && !m_dsChecksum.equals("")
                    && !m_dsChecksum.equals(Datastream.CHECKSUM_NONE)) {
                String tmpChecksum = ds.getChecksum();
                logger.debug("checksum = " + tmpChecksum);
                if (!m_dsChecksum.equals(tmpChecksum)) {
                    throw new SAXException(new ValidationException("Checksum Mismatch: "
                            + tmpChecksum));
                }
            }
            ds.DSChecksumType = ds.getChecksumType();
            ds.DSChecksum = m_dsChecksum;
        } else {
            ds.DSChecksum = m_dsChecksum;
        }
        // FINALLY! add the xml datastream to the digitalObject
        m_obj.addDatastreamVersion(ds, true);
    }

    /**
     * convertAudits: In Fedora 2.0 and beyond, we want self-standing audit
     * records. Make sure audit records are converted to new format that
     * contains a componentID to show what component in the object the audit
     * record is about.
     */
    private void convertAudits() {
        // Only do this if ADMID values were found in the object.
        if (m_dsADMIDs.size() > 0) {
            // Look at datastreams to see if there are audit records for them.
            // NOTE:  we do not look at disseminators because in pre-2.0
            // the disseminators did not point to their audit records as
            // did the datastreams.
            Iterator<String> dsIdIter = m_obj.datastreamIdIterator();
            while (dsIdIter.hasNext()) {
                for (Datastream ds : m_obj.datastreams(dsIdIter.next())) {
                    // ADMID processing...
                    // get list of ADMIDs that go with a datastream version
                    List<String> admIdList = m_dsADMIDs.get(ds.DSVersionID);
                    List<String> cleanAdmIdList = new ArrayList<String>();
                    if (admIdList != null) {
                        Iterator<String> admIdIter = admIdList.iterator();
                        while (admIdIter.hasNext()) {
                            String admId = admIdIter.next();
                            // Detect ADMIDs that reference audit records
                            // vs. regular admin metadata. Drop audits from
                            // the list. We know we have an audit if the ADMID
                            // is not a regular datatream in the object.
                            Iterator<Datastream> matchedDatastreams =
                                    m_obj.datastreams(admId).iterator();
                            if (matchedDatastreams.hasNext()) {

                                // Keep track of audit metadata correlated with the
                                // datastream version it's about (for later use).
                                m_AuditIdToComponentId.put(admId,
                                                           ds.DSVersionID);
                            } else {
                                // Keep track of non-audit metadata in a new list.
                                cleanAdmIdList.add(admId);
                            }
                        }
                    }
                    if (cleanAdmIdList.size() <= 0) {
                        // we keep track of admin metadata references
                        // for each datastream, but we exclude the audit
                        // records from this list.  If there are no
                        // non-audit metadata references, remove the
                        // datastream entry from the master hashmap.
                        m_dsADMIDs.remove(ds.DSVersionID);
                    } else {
                        // otherwise, update the master hashmap with the
                        // clean list of non-audit metadata
                        m_dsADMIDs.put(ds.DSVersionID, cleanAdmIdList);
                    }
                }
            }
            // Now, put component ids on audit records.  Pre-Fedora 2.0
            // datastream versions pointed to their audit records.
            Iterator<AuditRecord> iter = m_obj.getAuditRecords().iterator();
            while (iter.hasNext()) {
                AuditRecord au = iter.next();
                if (au.componentID == null || au.componentID.equals("")) {
                    // Before Fedora 2.0 audit records were associated with
                    // datastream version ids.  From now on, the datastream id
                    // will be posted as the component id in the audit record,
                    // and associations to particular datastream versions can
                    // be derived via the datastream version dates and the audit
                    // record dates.
                    String dsVersId = m_AuditIdToComponentId.get(au.id);
                    if (dsVersId != null && !dsVersId.equals("")) {
                        au.componentID =
                                dsVersId.substring(0, dsVersId.indexOf("."));
                    }

                }
            }
        }
    }

    /**
     * addRelsInt: Build an RDF relationship datastream to preserve DMDID and
     * ADMID references in the digital object when METS is converted to FOXML
     * (or other formats in the future). If there is no pre-existing RELS-INT,
     * look for DMDID and ADMID attributes to create new RELS-INT datastream.
     */
    private void createRelsInt() {

        // create a new RELS-INT datastream only if one does not already exist.
        Iterator<Datastream> metsrels =
                m_obj.datastreams("RELS-INT").iterator();
        if (metsrels.hasNext()) {
            m_relsBuffer = new StringBuffer();
            appendRDFStart(m_relsBuffer);
            Iterator<String> dsIds = m_obj.datastreamIdIterator();
            while (dsIds.hasNext()) {
                // initialize hash sets to keep a list of
                // unique DMDIDs or ADMIDs at the datatream id level.
                HashSet<String> uniqueDMDIDs = new HashSet<String>();
                HashSet<String> uniqueADMIDs = new HashSet<String>();
                // get list of datastream *versions*
                for (Datastream dsVersion : m_obj.datastreams(dsIds
                        .next())) {
                    // DMDID processing...
                    List<String> dmdIdList =
                            m_dsDMDIDs.get(dsVersion.DSVersionID);
                    if (dmdIdList != null) {
                        hasRels = true;
                        Iterator<String> dmdIdIter = dmdIdList.iterator();
                        while (dmdIdIter.hasNext()) {
                            String dmdId = dmdIdIter.next();
                            // APPEND TO RDF: record the DMDID relationship.
                            // Relationships will now be recorded at the
                            // datastream level, not the datastream version level.
                            // So, is the relationship existed on more than one
                            // datastream version, only write it once to the RDF.
                            if (!uniqueDMDIDs.contains(dmdId)) {
                                appendRDFRel(m_relsBuffer,
                                             m_obj.getPid(),
                                             dsVersion.DatastreamID,
                                             "hasDescMetadata",
                                             dmdId);
                            }
                            uniqueDMDIDs.add(dmdId);
                        }
                    }
                    // ADMID processing (already cleansed of audit refs)...
                    List<String> cleanAdmIdList =
                            m_dsADMIDs.get(dsVersion.DSVersionID);
                    if (cleanAdmIdList != null) {
                        hasRels = true;
                        Iterator<String> admIdIter = cleanAdmIdList.iterator();
                        while (admIdIter.hasNext()) {
                            String admId = admIdIter.next();
                            // APPEND TO RDF: record the ADMID relationship.
                            // Relationships will now be recorded at the
                            // datastream level, not the datastream version level.
                            // So, is the relationship existed on more than one
                            // datastream version, only write it once to the RDF.
                            if (!uniqueADMIDs.contains(admId)) {
                                appendRDFRel(m_relsBuffer,
                                             m_obj.getPid(),
                                             dsVersion.DatastreamID,
                                             "hasAdminMetadata",
                                             admId);
                            }
                            uniqueADMIDs.add(admId);
                        }
                    }
                }
            }
            // APPEND RDF: finish up and add RDF as a system-generated datastream
            if (hasRels) {
                appendRDFEnd(m_relsBuffer);
                setRDFAsDatastream(m_relsBuffer);
            } else {
                m_relsBuffer = null;
            }
        }
    }

    // Create a system-generated datastream from the RDF expression of the
    // DMDID and ADMID relationships found in the METS file.
    private void setRDFAsDatastream(StringBuffer buf) {

        DatastreamXMLMetadata ds = new DatastreamXMLMetadata();
        // set the attrs common to all datastream versions
        ds.DatastreamID = "RELS-INT";
        ds.DSVersionable = false;
        ds.DSFormatURI = m_dsFormatURI;
        ds.DatastreamAltIDs = m_dsAltIDs;
        ds.DSVersionID = "RELS-INT.0";
        ds.DSLabel =
                "DO NOT EDIT: System-generated datastream to preserve METS DMDID/ADMID relationships.";
        ds.DSCreateDT = new Date();
        ds.DSMIME = "application/rdf+xml";
        // set the attrs specific to datastream version
        ds.DSControlGrp = "X";
        ds.DSState = "A";
        ds.DSLocation =
                m_obj.getPid() + "+" + ds.DatastreamID + "+" + ds.DSVersionID;
        ds.DSLocationType = Datastream.DS_LOCATION_TYPE_INTERNAL;
        ds.DSInfoType = "DATA";
        ds.DSMDClass = DatastreamXMLMetadata.TECHNICAL;

        // now set the xml content stream itself...
        try {
            ds.xmlContent = buf.toString().getBytes(m_characterEncoding);
            ds.DSSize = ds.xmlContent.length;
        } catch (UnsupportedEncodingException uee) {
            logger.error("Encoding error when creating RELS-INT datastream", uee);
        }
        // FINALLY! add the RDF and an inline xml datastream in the digital object
        m_obj.addDatastreamVersion(ds, true);
    }

    private StringBuffer appendRDFStart(StringBuffer buf) {

        buf.append("<" + RDF.prefix + ":RDF" + " xmlns:" + RDF.prefix + "=\""
                + RDF.uri + "\"" + " xmlns:" + RELS_EXT.prefix + "=\""
                + RELS_EXT.uri + "\">\n");
        return buf;
    }

    private StringBuffer appendRDFRel(StringBuffer buf,
                                      String pid,
                                      String subjectNodeId,
                                      String relType,
                                      String objectNodeId) {

        // RDF subject node
        buf.append("    <" + RDF.prefix + ":Description " + RDF.prefix
                + ":about=\"" + "info:fedora/" + pid + "/" + subjectNodeId
                + "\">\n");
        // RDF relationship property and object node
        buf.append("        <" + RELS_EXT.prefix + ":" + relType + " "
                + RDF.prefix + ":resource=\"" + "info:fedora/" + pid + "/"
                + objectNodeId + "\"/>\n");
        buf.append("    </" + RDF.prefix + ":Description" + ">\n");
        return buf;
    }

    private void initialize() {
        // temporary variables and state variables
        m_rootElementFound = false;
        m_inXMLMetadata = false;
        m_prefixMap = new HashMap<String, String>();
        m_localPrefixMap = new HashMap<String, String>();
        m_prefixList = new ArrayList<String>();

        // temporary variables for processing datastreams
        m_dsId = "";
        m_dsVersionable = true;
        m_dsVersId = "";
        m_dsCreateDate = null;
        m_dsState = "";
        m_dsFormatURI = "";
        m_dsAltIDs = new String[0];
        m_dsSize = -1;
        m_dsLocationType = "";
        m_dsLocation = "";
        m_dsMimeType = "";
        m_dsControlGrp = "";
        m_dsInfoType = "";
        m_dsOtherInfoType = "";
        m_dsMDClass = 0;
        m_dsLabel = "";
        m_dsXMLBuffer = null;
        m_dsADMIDs = new HashMap<String, List<String>>();
        m_dsDMDIDs = new HashMap<String, List<String>>();
        m_dsChecksum = "";
        m_dsChecksumType = "";

        // temporary variables for processing disseminators
        m_dissems = new HashMap<String, Disseminator>();

        // temporary variables for processing audit records
        m_auditBuffer = null;
        m_auditId = "";
        m_auditComponentID = "";
        m_auditProcessType = "";
        m_auditAction = "";
        m_auditResponsibility = "";
        m_auditDate = "";
        m_auditJustification = "";

        m_AuditIdToComponentId = new HashMap<String, String>();
        m_relsBuffer = null;
    }

    //---
    // Static helpers
    //---

    private static StringBuffer appendRDFEnd(StringBuffer buf) {
        buf.append("</" + RDF.prefix + ":RDF>\n");
        return buf;
    }

    private static String grab(Attributes a,
                               String namespace,
                               String elementName) {
        String ret = a.getValue(namespace, elementName);
        if (ret == null) {
            ret = a.getValue(elementName);
        }
        // set null attribute value to empty string since it's
        // generally helpful in the code to avoid null pointer exception
        // when operations are performed on attributes values.
        if (ret == null) {
            ret = "";
        }
        return ret;
    }

}
TOP

Related Classes of org.fcrepo.server.storage.translation.METSFedoraExtDODeserializer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.