Package org.dspace.content.crosswalk

Source Code of org.dspace.content.crosswalk.PREMISCrosswalk

/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.crosswalk;

import java.io.IOException;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.DSpaceObject;
import org.dspace.content.FormatIdentifier;
import org.dspace.content.Item;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.jdom.Element;
import org.jdom.Namespace;

/**
* PREMIS Crosswalk
* <p>
* Translate between DSpace Bitstream properties and PREMIS metadata format
* (see <a href="http://www.oclc.org/research/projects/pmwg/">
* http://www.oclc.org/research/projects/pmwg/</a> for details).
* This is intended to implement the requirements of the DSpace METS SIP
* specification for both ingest and dissemination.
*
* @author Larry Stone
* @version $Revision$
*/
public class PREMISCrosswalk
    implements IngestionCrosswalk, DisseminationCrosswalk
{
    /** log4j category */
    private static Logger log = Logger.getLogger(PREMISCrosswalk.class);

    private static final Namespace PREMIS_NS =
        Namespace.getNamespace("premis", "http://www.loc.gov/standards/premis");

    // XML schemaLocation fragment for this crosswalk, from config.
    private String schemaLocation =
        PREMIS_NS.getURI()+" http://www.loc.gov/standards/premis/PREMIS-v1-0.xsd";

    private static final Namespace namespaces[] = { PREMIS_NS };

    /*----------- Submission functions -------------------*/

    public void ingest(Context context, DSpaceObject dso, Element root)
        throws CrosswalkException, IOException, SQLException, AuthorizeException
    {
        if (!(root.getName().equals("premis")))
        {
            throw new MetadataValidationException("Wrong root element for PREMIS: " + root.toString());
        }
        ingest(context, dso, root.getChildren());
    }

    public void ingest(Context context, DSpaceObject dso, List<Element> ml)
        throws CrosswalkException, IOException, SQLException, AuthorizeException
    {
        // we only understand how to crosswalk PREMIS to a Bitstream.
        if (dso.getType() != Constants.BITSTREAM)
        {
            throw new CrosswalkObjectNotSupported("Wrong target object type, PREMISCrosswalk can only crosswalk to a Bitstream.");
        }

        Bitstream bitstream = (Bitstream)dso;
        String MIMEType = null;
        String bsName = null;
        for (Element me : ml)
        {
            if (me.getName().equals("premis"))
            {
                // if we're fed a <premis> wrapper object, recurse on its guts:
                ingest(context, dso, me.getChildren());
            }
            else if (me.getName().equals("object"))
            {
                // "object" section:

                // originalName becomes new bitstream source and (default) name
                Element on = me.getChild("originalName", PREMIS_NS);
                if (on != null)
                {
                    bsName = on.getTextTrim();
                }

                // Reconcile technical metadata with bitstream content;
                // check that length and message digest (checksum) match.
                // XXX FIXME: wait for Checksum Checker code to add better test.
                Element oc = me.getChild("objectCharacteristics", PREMIS_NS);
                if (oc != null)
                {
                    String ssize = oc.getChildTextTrim("size", PREMIS_NS);
                    if (ssize != null)
                    {
                        try
                        {
                            int size = Integer.parseInt(ssize);
                            if (bitstream.getSize() != size)
                            {
                                throw new MetadataValidationException(
                                        "Bitstream size (" + String.valueOf(bitstream.getSize()) +
                                                ") does not match size in PREMIS (" + ssize + "), rejecting it.");
                            }
                        }
                        catch (NumberFormatException ne)
                        {
                            throw new MetadataValidationException("Bad number value in PREMIS object/objectCharacteristics/size: "+ssize, ne);
                        }
                    }
                    Element fixity = oc.getChild("fixity", PREMIS_NS);
                    if (fixity != null)
                    {
                        String alg = fixity.getChildTextTrim("messageDigestAlgorithm", PREMIS_NS);
                        String md = fixity.getChildTextTrim("messageDigest", PREMIS_NS);
                        String b_alg = bitstream.getChecksumAlgorithm();
                        String b_md = bitstream.getChecksum();
                        if (StringUtils.equals(alg, b_alg))
                        {
                            if (StringUtils.equals(md, b_md))
                            {
                                log.debug("Bitstream checksum agrees with PREMIS: " + bitstream.getName());
                            }
                            else
                            {
                                throw new MetadataValidationException("Bitstream " + alg + " Checksum does not match value in PREMIS (" + b_md + " != " + md + "), for bitstream: " + bitstream.getName());
                            }
                        }
                        else
                        {
                            log.warn("Cannot test checksum on bitstream=" + bitstream.getName() +
                                    ", algorithm in PREMIS is different: " + alg);
                        }
                    }

                    // Look for formatDesignation/formatName, which is
                    // MIME Type.  Match with DSpace bitstream format.
                    Element format = oc.getChild("format", PREMIS_NS);
                    if (format != null)
                    {
                        Element fd = format.getChild("formatDesignation", PREMIS_NS);
                        if (fd != null)
                        {
                            MIMEType = fd.getChildTextTrim("formatName", PREMIS_NS);
                        }
                    }
                }

                // Apply new bitstream name if we found it.
                if (bsName != null)
                {
                    bitstream.setName(bsName);
                    log.debug("Changing bitstream id="+String.valueOf(bitstream.getID())+"name and source to: "+bsName);
                }

                // reconcile bitstream format; if there's a MIMEtype,
                // get it from that, otherwise try to divine from file extension
                // (guessFormat() looks at bitstream Name, which we just set)
                BitstreamFormat bf = (MIMEType == null) ? null :
                        BitstreamFormat.findByMIMEType(context, MIMEType);
                if (bf == null)
                {
                    bf = FormatIdentifier.guessFormat(context, bitstream);
                }

                if (bf != null)
                {
                    bitstream.setFormat(bf);
                }
            }
            else
            {
                log.debug("Skipping element: " + me.toString());
            }
        }
        bitstream.update();
    }

    /*----------- Dissemination functions -------------------*/

    public Namespace[] getNamespaces()
    {
        return (Namespace[]) ArrayUtils.clone(namespaces);
    }

    public String getSchemaLocation()
    {
        return schemaLocation;
    }

    public boolean canDisseminate(DSpaceObject dso)
    {
        //PREMISCrosswalk can only crosswalk a Bitstream
        return (dso.getType() == Constants.BITSTREAM);
    }

    public Element disseminateElement(DSpaceObject dso)
        throws CrosswalkException,
               IOException, SQLException, AuthorizeException
    {
        if (dso.getType() != Constants.BITSTREAM)
        {
            throw new CrosswalkObjectNotSupported("PREMISCrosswalk can only crosswalk a Bitstream.");
        }
        Bitstream bitstream = (Bitstream)dso;

        Element premis = new Element("premis", PREMIS_NS);
        Element object = new Element("object", PREMIS_NS);
        premis.addContent(object);

        // objectIdentifier is required
        Element oid = new Element("objectIdentifier", PREMIS_NS);
        Element oit = new Element("objectIdentifierType", PREMIS_NS);
        oit.setText("URL");
        oid.addContent(oit);
        Element oiv = new Element("objectIdentifierValue", PREMIS_NS);

        // objectIdentifier value: by preference, if available:
        //  a. DSpace "persistent" URL to bitstream, if components available.
        //  b. name of bitstream, if any
        //  c. made-up name based on sequence ID and extension.
        String sid = String.valueOf(bitstream.getSequenceID());
        String baseUrl = ConfigurationManager.getProperty("dspace.url");
        String handle = null;
        // get handle of parent Item of this bitstream, if there is one:
        Bundle[] bn = bitstream.getBundles();
        if (bn.length > 0)
        {
            Item bi[] = bn[0].getItems();
            if (bi.length > 0)
            {
                handle = bi[0].getHandle();
            }
        }
        // get or make up name for bitstream:
        String bsName = bitstream.getName();
        if (bsName == null)
        {
            String ext[] = bitstream.getFormat().getExtensions();
            bsName = "bitstream_"+sid+ (ext.length > 0 ? ext[0] : "");
        }
        if (handle != null && baseUrl != null)
        {
            oiv.setText(baseUrl
                    + "/bitstream/"
                    + URLEncoder.encode(handle, "UTF-8")
                    + "/"
                    + sid
                    + "/"
                    + URLEncoder.encode(bsName, "UTF-8"));
        }
        else
        {
            oiv.setText(URLEncoder.encode(bsName, "UTF-8"));
        }

        oid.addContent(oiv);
        object.addContent(oid);

        // objectCategory is fixed value, "File".
        Element oc = new Element("objectCategory", PREMIS_NS);
        oc.setText("File");
        object.addContent(oc);

        Element ochar = new Element("objectCharacteristics", PREMIS_NS);
        object.addContent(ochar);

        // checksum if available
        String cks = bitstream.getChecksum();
        String cka = bitstream.getChecksumAlgorithm();
        if (cks != null && cka != null)
        {
            Element fixity = new Element("fixity", PREMIS_NS);
            Element mda = new Element("messageDigestAlgorithm", PREMIS_NS);
            mda.setText(cka);
            fixity.addContent(mda);
            Element md = new Element("messageDigest", PREMIS_NS);
            md.setText(cks);
            fixity.addContent(md);
            ochar.addContent(fixity);
        }

        // size
        Element size = new Element("size", PREMIS_NS);
        size.setText(String.valueOf(bitstream.getSize()));
        ochar.addContent(size);

        //  Punt and set formatName to the MIME type; the best we can
        //  do for now in the absence of any usable global format registries.
        // objectCharacteristics/format/formatDesignation/
        //                                       formatName <- MIME Type
        //
        Element format = new Element("format", PREMIS_NS);
        Element formatDes = new Element("formatDesignation", PREMIS_NS);
        Element formatName = new Element("formatName", PREMIS_NS);
        formatName.setText(bitstream.getFormat().getMIMEType());
        formatDes.addContent(formatName);
        format.addContent(formatDes);
        ochar.addContent(format);

        // originalName <- name (or source if none)
        String oname = bitstream.getName();
        if (oname == null)
        {
            oname = bitstream.getSource();
        }
        if (oname != null)
        {
            Element on = new Element("originalName", PREMIS_NS);
            on.setText(oname);
            object.addContent(on);
        }

        return premis;
    }

    public List<Element> disseminateList(DSpaceObject dso)
        throws CrosswalkException,
               IOException, SQLException, AuthorizeException
    {
        List<Element> result = new ArrayList<Element>(1);
        result.add(disseminateElement(dso));
        return result;
    }

    public boolean preferList()
    {
        return false;
    }
}
TOP

Related Classes of org.dspace.content.crosswalk.PREMISCrosswalk

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.