Package org.dspace.content.crosswalk

Source Code of org.dspace.content.crosswalk.AIPTechMDCrosswalk

* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
package org.dspace.content.crosswalk;

import java.util.List;
import java.util.ArrayList;

import java.sql.SQLException;

import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.ConfigurationManager;
import org.dspace.content.Metadatum;
import org.dspace.content.Item;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Site;
import org.dspace.content.packager.PackageUtils;
import org.dspace.eperson.EPerson;
import org.dspace.authorize.AuthorizeException;

import org.apache.log4j.Logger;
import org.dspace.content.packager.DSpaceAIPIngester;
import org.dspace.content.packager.METSManifest;
import org.dspace.handle.HandleManager;

import org.jdom.Element;
import org.jdom.Namespace;

* Crosswalk of technical metadata for DSpace AIP.  This is
* only intended for use by the METS AIP packager.   It borrows the
* DIM XML format and DC field names, although it abuses the meaning
* of Dublin Core terms and qualifiers because this format is
* a complete and accurate image of all of the attributes an object
* has in the RDBMS.
* It encodes the following common properties of all archival objects:
*   identifier.uri -- persistent identifier of object in URI form (e.g. Handle URN)
*   relation.isPartOf -- persistent identifier of object's parent in URI form (e.g. Handle URN)
*   relation.isReferencedBy -- if relevant, persistent identifier of other objects that map this one as a child.  May repeat.
* There may also be other fields, depending on the type of object,
* which encode attributes that are not part of the descriptive metadata and
* are not adequately covered by other technical MD formats (i.e. PREMIS).
*  Configuration entries:
*    aip.ingest.createEperson -- boolean, create EPerson for Submitter
*              automatically, on ingest, if it doesn't exist.
* @author Larry Stone
* @version $Revision: 1.2 $
public class AIPTechMDCrosswalk
    implements DisseminationCrosswalk, IngestionCrosswalk
    /** log4j category */
    private static Logger log = Logger.getLogger(AIPTechMDCrosswalk.class);

     * Get XML namespaces of the elements this crosswalk may return.
     * Returns the XML namespaces (as JDOM objects) of the root element.
     * @return array of namespaces, which may be empty.
    public Namespace[] getNamespaces()
        Namespace result[] = new Namespace[1];
        result[0] = XSLTCrosswalk.DIM_NS;
        return result;

     * Get the XML Schema location(s) of the target metadata format.
     * Returns the string value of the <code>xsi:schemaLocation</code>
     * attribute that should be applied to the generated XML.
     *  <p>
     * It may return the empty string if no schema is known, but crosswalk
     * authors are strongly encouraged to implement this call so their output
     * XML can be validated correctly.
     * @return SchemaLocation string, including URI namespace, followed by
     *  whitespace and URI of XML schema document, or empty string if unknown.
    public String getSchemaLocation()
        return "";

     * Predicate: Can this disseminator crosswalk the given object.
     * Needed by OAI-PMH server implementation.
     * @param dso  dspace object, e.g. an <code>Item</code>.
     * @return true when disseminator is capable of producing metadata.
    public boolean canDisseminate(DSpaceObject dso)
        //can only Disseminate SITE, COMMUNITY, COLLECTION, ITEM, BITSTREAM
        return (dso.getType()==Constants.SITE
                || dso.getType()==Constants.COMMUNITY
                || dso.getType()==Constants.COLLECTION
                || dso.getType()==Constants.ITEM
                || dso.getType()==Constants.BITSTREAM);

     * Predicate: Does this disseminator prefer to return a list of Elements,
     * rather than a single root Element?
     * <p>
     * Some metadata formats have an XML schema without a root element,
     * for example, the Dublin Core and Qualified Dublin Core formats.
     * This would be <code>true</code> for a crosswalk into QDC, since
     * it would "prefer" to return a list, since any root element it has
     * to produce would have to be part of a nonstandard schema.  In
     * most cases your implementation will want to return
     * <code>false</code>
     * @return true when disseminator prefers you call disseminateList().
    public boolean preferList()
        return false;

     * Execute crosswalk, returning List of XML elements.
     * Returns a <code>List</code> of JDOM <code>Element</code> objects representing
     * the XML produced by the crosswalk.  This is typically called when
     * a list of fields is desired, e.g. for embedding in a METS document
     * <code>xmlData</code> field.
     * <p>
     * When there are no results, an
     * empty list is returned, but never <code>null</code>.
     * @param dso the  DSpace Object whose metadata to export.
     * @return results of crosswalk as list of XML elements.
     * @throws CrosswalkInternalException (<code>CrosswalkException</code>) failure of the crosswalk itself.
     * @throws CrosswalkObjectNotSupported (<code>CrosswalkException</code>) Cannot crosswalk this kind of DSpace object.
     * @throws IOException  I/O failure in services this calls
     * @throws SQLException  Database failure in services this calls
     * @throws AuthorizeException current user not authorized for this operation.
    public List<Element> disseminateList(DSpaceObject dso)
        throws CrosswalkException, IOException, SQLException,
        Element dim = disseminateElement(dso);
        return dim.getChildren();

     * Execute crosswalk, returning one XML root element as
     * a JDOM <code>Element</code> object.
     * This is typically the root element of a document.
     * <p>
     * @param dso the  DSpace Object whose metadata to export.
     * @return root Element of the target metadata, never <code>null</code>
     * @throws CrosswalkInternalException (<code>CrosswalkException</code>) failure of the crosswalk itself.
     * @throws CrosswalkObjectNotSupported (<code>CrosswalkException</code>) Cannot crosswalk this kind of DSpace object.
     * @throws IOException  I/O failure in services this calls
     * @throws SQLException  Database failure in services this calls
     * @throws AuthorizeException current user not authorized for this operation.
    public Element disseminateElement(DSpaceObject dso)
        throws CrosswalkException, IOException, SQLException,
        List<Metadatum> dc = new ArrayList<Metadatum>();
        if (dso.getType() == Constants.ITEM)
            Item item = (Item)dso;
            EPerson is = item.getSubmitter();
            if (is != null)
                dc.add(makeDC("creator", null, is.getEmail()));
            dc.add(makeDC("identifier", "uri", "hdl:" + item.getHandle()));
            Collection owningColl = item.getOwningCollection();
            String owner = owningColl.getHandle();
            if (owner != null)
                dc.add(makeDC("relation", "isPartOf", "hdl:" + owner));
            Collection inColl[] = item.getCollections();
            for (int i = 0; i < inColl.length; ++i)
                if (inColl[i].getID() != owningColl.getID())
                    String h = inColl[i].getHandle();
                    if (h != null)
                        dc.add(makeDC("relation", "isReferencedBy", "hdl:" + h));
            if (item.isWithdrawn())
                dc.add(makeDC("rights", "accessRights", "WITHDRAWN"));
        else if (dso.getType() == Constants.BITSTREAM)
            Bitstream bitstream = (Bitstream)dso;
            String bsName = bitstream.getName();
            if (bsName != null)
                dc.add(makeDC("title", null, bsName));
            String bsSource = bitstream.getSource();
            if (bsSource != null)
                dc.add(makeDC("title", "alternative", bsSource));
            String bsDesc = bitstream.getDescription();
            if (bsDesc != null)
                dc.add(makeDC("description", null, bsDesc));
            String bsUfmt = bitstream.getUserFormatDescription();
            if (bsUfmt != null)
                dc.add(makeDC("format", null, bsUfmt));
            BitstreamFormat bsf = bitstream.getFormat();
            dc.add(makeDC("format", "medium", bsf.getShortDescription()));
            dc.add(makeDC("format", "mimetype", bsf.getMIMEType()));
            dc.add(makeDC("format", "supportlevel", bsf.getSupportLevelText()));
            dc.add(makeDC("format", "internal", Boolean.toString(bsf.isInternal())));
        else if (dso.getType() == Constants.COLLECTION)
            Collection collection = (Collection)dso;
            dc.add(makeDC("identifier", "uri", "hdl:" + dso.getHandle()));
            Community owners[] = collection.getCommunities();
            String ownerHdl = owners[0].getHandle();
            if (ownerHdl != null)
                dc.add(makeDC("relation", "isPartOf", "hdl:" + ownerHdl));
            for (int i = 1; i < owners.length; ++i)
                String h = owners[i].getHandle();
                if (h != null)
                    dc.add(makeDC("relation", "isReferencedBy", "hdl:" + h));
        else if (dso.getType() == Constants.COMMUNITY)
            Community  community = (Community)dso;
            dc.add(makeDC("identifier", "uri", "hdl:" + dso.getHandle()));
            Community owner = community.getParentCommunity();
            String ownerHdl = null;
            if (owner == null)
                ownerHdl = Site.getSiteHandle();
                ownerHdl = owner.getHandle();

            if (ownerHdl != null)
                dc.add(makeDC("relation", "isPartOf", "hdl:" + ownerHdl));
        else if (dso.getType() == Constants.SITE)
            Site site = (Site) dso;
            //FIXME: adding two URIs for now (site handle and URL), in case site isn't using handles
            dc.add(makeDC("identifier", "uri", "hdl:" + site.getHandle()));
            dc.add(makeDC("identifier", "uri", site.getURL()));

        Metadatum result[] = (Metadatum[])dc.toArray(new Metadatum[dc.size()]);
        return XSLTDisseminationCrosswalk.createDIM(dso, result);

    private static Metadatum makeDC(String element, String qualifier, String value)
        Metadatum dcv = new Metadatum();
        dcv.schema = "dc";
        dcv.language = null;
        dcv.element = element;
        dcv.qualifier = qualifier;
        dcv.value = value;
        return dcv;

     * Ingest a whole document.  Build Document object around root element,
     * and feed that to the transformation, since it may get handled
     * differently than a List of metadata elements.
    public void ingest(Context context, DSpaceObject dso, Element root)
        throws CrosswalkException, IOException, SQLException, AuthorizeException
        ingest(context, dso, root.getChildren());

     * Translate metadata with XSL stylesheet and ingest it.
     * Translation produces a list of DIM "field" elements;
     * these correspond directly to Item.addMetadata() calls so
     * they are simply executed.
    public void ingest(Context context, DSpaceObject dso, List<Element> dimList)
        throws CrosswalkException,
               IOException, SQLException, AuthorizeException
        int type = dso.getType();

        // accumulate values for bitstream format in case we have to make one
        String bsfShortName = null;
        String bsfMIMEType = null;
        int bsfSupport = BitstreamFormat.KNOWN;
        boolean bsfInternal = false;

        for (Element field : dimList)

            // if we get <dim> in a list, recurse.
            if (field.getName().equals("dim") && field.getNamespace().equals(XSLTCrosswalk.DIM_NS))
                ingest(context, dso, field.getChildren());
            else if (field.getName().equals("field") && field.getNamespace().equals(XSLTCrosswalk.DIM_NS))
                String schema = field.getAttributeValue("mdschema");
                if (schema.equals("dc"))
                    String dcField = field.getAttributeValue("element");
                    String qualifier = field.getAttributeValue("qualifier");
                    if (qualifier != null)
                        dcField += "." + qualifier;
                    String value = field.getText();

                    if (type == Constants.BITSTREAM)
                        Bitstream bitstream = (Bitstream)dso;
                        if (dcField.equals("title"))
                        else if (dcField.equals("title.alternative"))
                        else if (dcField.equals("description"))
                        else if (dcField.equals("format"))
                        else if (dcField.equals("format.medium"))
                            bsfShortName = value;
                        else if (dcField.equals("format.mimetype"))
                            bsfMIMEType = value;
                        else if (dcField.equals("format.supportlevel"))
                            int sl = BitstreamFormat.getSupportLevelID(value);
                            if (sl < 0)
                                throw new MetadataValidationException("Got unrecognized value for bitstream support level: " + value);
                                bsfSupport = sl;
                        else if (dcField.equals("format.internal"))
                            bsfInternal = (Boolean.valueOf(value)).booleanValue();
                            log.warn("Got unrecognized DC field for Bitstream: " + dcField);
                    else if (type == Constants.ITEM)
                        Item item = (Item)dso;

                        // item submitter
                        if (dcField.equals("creator"))
                            EPerson sub = EPerson.findByEmail(context, value);

                            // if eperson doesn't exist yet, optionally create it:
                            if (sub == null)
                                //This class works in conjunction with the DSpaceAIPIngester.
                                // so, we'll use the configuration settings for that ingester
                                String configName = new DSpaceAIPIngester().getConfigurationName();

                                //Create the EPerson if specified and person doesn't already exit
                                if (ConfigurationManager.getBooleanProperty(METSManifest.CONFIG_METS_PREFIX + configName + ".ingest.createSubmitter"))
                                    sub = EPerson.create(context);
                                    log.warn("Ignoring unknown Submitter=" + value + " in AIP Tech MD, no matching EPerson and 'mets.dspaceAIP.ingest.createSubmitter' is false in dspace.cfg.");
                            if (sub != null)
                        else if (dcField.equals("rights.accessRights"))
                            //check if item is withdrawn
                            if (value.equalsIgnoreCase("WITHDRAWN"))
                        else if(dcField.equals("identifier.uri") ||
                            // Ignore identifier.uri (which specifies object handle)
                            // and relation.isPartOf (which specifies primary parent object)
                            // Both of these should already be set on object, as they
                            // are required/generated when a DSpaceObject is created.
                        else if (dcField.equals("relation.isReferencedBy"))
                            // This Item is referenced by other Collections.  This means
                            // it has been mapped into one or more additional collections.

                            // We'll attempt to map it to all referenced collections.
                            // But if this is a recursive ingest, it is possible some of these
                            // collections may not have been created yet. No need to worry,
                            // when each Collection is created it will create any mappings that
                            // we were unable to create now.
                            String parentHandle = value;

                            if(parentHandle!=null && !parentHandle.isEmpty())
                                //Remove 'hdl:' prefix, if it exists
                                if (parentHandle.startsWith("hdl:"))
                                    parentHandle = parentHandle.substring(4);

                                //Get parent object (if it exists)
                                DSpaceObject parentDso = HandleManager.resolveToObject(context, parentHandle);
                                //For Items, this parent *must* be a Collection
                                if(parentDso!=null && parentDso.getType()==Constants.COLLECTION)
                                    Collection collection = (Collection) parentDso;

                                    //If this item is not already mapped into this collection, map it!
                                    if (!item.isIn(collection))
                            log.warn("Got unrecognized DC field for Item: " + dcField);

                    else if (type == Constants.COMMUNITY || type == Constants.COLLECTION)
                        if (dcField.equals("identifier.uri") || dcField.equals("relation.isPartOf"))
                            // Ignore identifier.uri (which specifies object handle)
                            // and relation.isPartOf (which specifies primary parent object)
                            // Both of these should already be set on object, as they
                            // are required/generated when a DSpaceObject is created.
                        else if (dcField.equals("relation.isReferencedBy"))
                            // Ignore relation.isReferencedBy since it only
                            // lists _extra_ mapped parents, not the primary one.
                            // DSpace currently doesn't fully support mapping of Collections/Communities
                            log.warn("Got unrecognized DC field for Collection/Community: " + dcField);
                    log.warn("Skipping DIM field with mdschema=\"" + schema + "\".");

                log.error("Got unexpected element in DIM list: "+field.toString());
                throw new MetadataValidationException("Got unexpected element in DIM list: "+field.toString());

        // final step: find or create bitstream format since it
        // takes the accumulation of a few values:
        if (type == Constants.BITSTREAM && bsfShortName != null)
            BitstreamFormat bsf = BitstreamFormat.findByShortDescription(context, bsfShortName);
            if (bsf == null && bsfMIMEType != null)
                bsf = PackageUtils.findOrCreateBitstreamFormat(context,
            if (bsf != null)
                ((Bitstream) dso).setFormat(bsf);
                log.warn("Failed to find or create bitstream format named \"" + bsfShortName + "\"");

Related Classes of org.dspace.content.crosswalk.AIPTechMDCrosswalk

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact