Package org.dspace.app.mets

Source Code of org.dspace.app.mets.METSExport

/*
* METSExport.java
*
* Version: $Revision: 3739 $
*
* Date: $Date: 2009-04-27 22:26:36 +0000 (Mon, 27 Apr 2009) $
*
* Copyright (c) 2002-2009, The DSpace Foundation.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.app.mets;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URLEncoder;
import java.sql.SQLException;
import java.util.Date;
import java.util.Properties;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.dspace.authorize.AuthorizeException;
import org.dspace.authorize.AuthorizeManager;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
import org.dspace.content.DCValue;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.ItemIterator;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.Utils;
import org.dspace.handle.HandleManager;
import org.dspace.app.util.Util;

import edu.harvard.hul.ois.mets.Agent;
import edu.harvard.hul.ois.mets.AmdSec;
import edu.harvard.hul.ois.mets.BinData;
import edu.harvard.hul.ois.mets.Checksumtype;
import edu.harvard.hul.ois.mets.Div;
import edu.harvard.hul.ois.mets.DmdSec;
import edu.harvard.hul.ois.mets.FLocat;
import edu.harvard.hul.ois.mets.FileGrp;
import edu.harvard.hul.ois.mets.FileSec;
import edu.harvard.hul.ois.mets.Loctype;
import edu.harvard.hul.ois.mets.MdWrap;
import edu.harvard.hul.ois.mets.Mdtype;
import edu.harvard.hul.ois.mets.Mets;
import edu.harvard.hul.ois.mets.MetsHdr;
import edu.harvard.hul.ois.mets.Name;
import edu.harvard.hul.ois.mets.RightsMD;
import edu.harvard.hul.ois.mets.Role;
import edu.harvard.hul.ois.mets.StructMap;
import edu.harvard.hul.ois.mets.Type;
import edu.harvard.hul.ois.mets.XmlData;
import edu.harvard.hul.ois.mets.helper.Base64;
import edu.harvard.hul.ois.mets.helper.MetsException;
import edu.harvard.hul.ois.mets.helper.MetsValidator;
import edu.harvard.hul.ois.mets.helper.MetsWriter;
import edu.harvard.hul.ois.mets.helper.PCData;
import edu.harvard.hul.ois.mets.helper.PreformedXML;

/**
* Tool for exporting DSpace AIPs with the metadata serialised in METS format
*
* @author Robert Tansley
* @version $Revision: 3739 $
*/
public class METSExport
{
    private static int licenseFormat = -1;

    private static Properties dcToMODS;

    public static void main(String[] args) throws Exception
    {
        Context context = new Context();

        init(context);

        // create an options object and populate it
        CommandLineParser parser = new PosixParser();

        Options options = new Options();

        options.addOption("c", "collection", true,
                "Handle of collection to export");
        options.addOption("i", "item", true, "Handle of item to export");
        options.addOption("a", "all", false, "Export all items in the archive");
        options.addOption("d", "destination", true, "Destination directory");
        options.addOption("h", "help", false, "Help");

        CommandLine line = parser.parse(options, args);

        if (line.hasOption('h'))
        {
            HelpFormatter myhelp = new HelpFormatter();
            myhelp.printHelp("metsexport", options);
            System.out
                    .println("\nExport a collection:  metsexport -c hdl:123.456/789");
            System.out
                    .println("Export an item:       metsexport -i hdl:123.456/890");
            System.out.println("Export everything:    metsexport -a");

            System.exit(0);
        }

        String dest = "";

        if (line.hasOption('d'))
        {
            dest = line.getOptionValue('d');

            // Make sure it ends with a file separator
            if (!dest.endsWith(File.separator))
            {
                dest = dest + File.separator;
            }
        }

        if (line.hasOption('i'))
        {
            String handle = getHandleArg(line.getOptionValue('i'));

            // Exporting a single item
            DSpaceObject o = HandleManager.resolveToObject(context, handle);

            if ((o != null) && o instanceof Item)
            {
                writeAIP(context, (Item) o, dest);
                System.exit(0);
            }
            else
            {
                System.err.println(line.getOptionValue('i')
                        + " is not a valid item Handle");
                System.exit(1);
            }
        }

        ItemIterator items = null;
        try
        {
            if (line.hasOption('c'))
            {
                String handle = getHandleArg(line.getOptionValue('c'));

                // Exporting a collection's worth of items
                DSpaceObject o = HandleManager.resolveToObject(context, handle);

                if ((o != null) && o instanceof Collection)
                {
                    items = ((Collection) o).getItems();
                }
                else
                {
                    System.err.println(line.getOptionValue('c')
                            + " is not a valid collection Handle");
                    System.exit(1);
                }
            }

            if (line.hasOption('a'))
            {
                items = Item.findAll(context);
            }

            if (items == null)
            {
                System.err.println("Nothing to export specified!");
                System.exit(1);
            }

            while (items.hasNext())
            {
                writeAIP(context, items.next(), dest);
            }
        }
        finally
        {
            if (items != null)
                items.close();
        }
       
        context.abort();
        System.exit(0);
    }

    /**
     * Initialise various variables, read in config etc.
     *
     * @param context
     *            DSpace context
     */
    private static void init(Context context) throws SQLException, IOException
    {
        // Don't init again if initialised already
        if (licenseFormat != -1)
        {
            return;
        }

        // Find the License format
        BitstreamFormat bf = BitstreamFormat.findByShortDescription(context,
                "License");
        licenseFormat = bf.getID();

        // get path to DC->MODS map info file
        String configFile = ConfigurationManager.getProperty("dspace.dir")
                + File.separator + "config" + File.separator + "dc2mods.cfg";

        // Read it in
        InputStream is = null;
        try
        {
            is = new FileInputStream(configFile);
            dcToMODS = new Properties();
            dcToMODS.load(is);
        }
        finally
        {
            if (is != null)
                try { is.close(); } catch (IOException ioe) { }
        }
    }

    /**
     * Write out the AIP for the given item to the given directory. A new
     * directory will be created with the Handle (URL-encoded) as the directory
     * name, and inside, a mets.xml file written, together with the bitstreams.
     *
     * @param context
     *            DSpace context to use
     * @param item
     *            Item to write
     * @param dest
     *            destination directory
     */
    public static void writeAIP(Context context, Item item, String dest)
            throws SQLException, IOException, AuthorizeException, MetsException
    {
        System.out.println("Exporting item hdl:" + item.getHandle());

        // Create aip directory
        java.io.File aipDir = new java.io.File(dest
                + URLEncoder.encode("hdl:" + item.getHandle(), "UTF-8"));

        if (!aipDir.mkdir())
        {
            // Couldn't make the directory for some reason
            throw new IOException("Couldn't create " + aipDir.toString());
        }

        // Write the METS file
        FileOutputStream out = new FileOutputStream(aipDir.toString()
                + java.io.File.separator + "mets.xml");
        writeMETS(context, item, out, false);
        out.close();

        // Write bitstreams
        Bundle[] bundles = item.getBundles();

        for (int i = 0; i < bundles.length; i++)
        {
            Bitstream[] bitstreams = bundles[i].getBitstreams();

            for (int b = 0; b < bitstreams.length; b++)
            {
                // Skip license bitstream and unauthorized resources
                if ((bitstreams[b].getFormat().getID() != licenseFormat)
                        && AuthorizeManager.authorizeActionBoolean(context,
                                bitstreams[b], Constants.READ))
                {
                    out = new FileOutputStream(aipDir.toString()
                            + java.io.File.separator
                            + bitstreams[b].getName());

                    InputStream in = bitstreams[b].retrieve();
                    Utils.bufferedCopy(in, out);
                    out.close();
                    in.close();
                }
            }
        }
    }

    /**
     * Write METS metadata corresponding to the metadata for an item
     *
     * @param context
     *            DSpace context
     * @param item
     *            DSpace item to create METS object for
     * @param os
     *            A stream to write METS package to (UTF-8 encoding will be used)
     * @param fullURL
     *            if <code>true</code>, the &lt;FLocat&gt; values for each
     *            bitstream will be the full URL for that bitstream. Otherwise,
     *            only the filename itself will be used.
     */
    public static void writeMETS(Context context, Item item, OutputStream os, boolean fullURL)
            throws SQLException, IOException, AuthorizeException
    {
        try
        {
            init(context);

            // Create the METS file
            Mets mets = new Mets();

            // Top-level stuff
            mets.setOBJID("hdl:" + item.getHandle());
            mets.setLABEL("DSpace Item");
            mets.setSchema("mods", "http://www.loc.gov/mods/v3",
                    "http://www.loc.gov/standards/mods/v3/mods-3-0.xsd");

            // MetsHdr
            MetsHdr metsHdr = new MetsHdr();
            metsHdr.setCREATEDATE(new Date()); // FIXME: CREATEDATE is now:
                                               // maybe should be item create
                                               // date?

            // Agent
            Agent agent = new Agent();
            agent.setROLE(Role.CUSTODIAN);
            agent.setTYPE(Type.ORGANIZATION);

            Name name = new Name();
            name.getContent()
                    .add(
                            new PCData(ConfigurationManager
                                    .getProperty("dspace.name")));
            agent.getContent().add(name);

            metsHdr.getContent().add(agent);

            mets.getContent().add(metsHdr);

            DmdSec dmdSec = new DmdSec();
            dmdSec.setID("DMD_hdl_" + item.getHandle());

            MdWrap mdWrap = new MdWrap();
            mdWrap.setMDTYPE(Mdtype.MODS);

            XmlData xmlData = new XmlData();
            createMODS(item, xmlData);

            mdWrap.getContent().add(xmlData);
            dmdSec.getContent().add(mdWrap);
            mets.getContent().add(dmdSec);

            // amdSec
            AmdSec amdSec = new AmdSec();
            amdSec.setID("TMD_hdl_" + item.getHandle());

            // FIXME: techMD here
            // License as <rightsMD><mdWrap><binData>base64encoded</binData>...
            InputStream licenseStream = findLicense(context, item);

            if (licenseStream != null)
            {
                RightsMD rightsMD = new RightsMD();
                MdWrap rightsMDWrap = new MdWrap();
                rightsMDWrap.setMIMETYPE("text/plain");
                rightsMDWrap.setMDTYPE(Mdtype.OTHER);
                rightsMDWrap.setOTHERMDTYPE("TEXT");

                BinData binData = new BinData();
                Base64 base64 = new Base64(licenseStream);

                binData.getContent().add(base64);
                rightsMDWrap.getContent().add(binData);
                rightsMD.getContent().add(rightsMDWrap);
                amdSec.getContent().add(rightsMD);
            }

            // FIXME: History data???? Nooooo!!!!
            mets.getContent().add(amdSec);

            // fileSec
            FileSec fileSec = new FileSec();
            boolean fileSecEmpty = true;

            Bundle[] bundles = item.getBundles();

            for (int i = 0; i < bundles.length; i++)
            {
                Bitstream[] bitstreams = bundles[i].getBitstreams();

                // Unusual condition, but if no bitstreams, skip this bundle
                if (bitstreams.length == 0)
                {
                    continue;
                }
                       
                // First: we skip the license bundle, since it's included
                // elsewhere
                if (bitstreams[0].getFormat().getID() == licenseFormat)
                {
                    continue;
                }

                // Create a fileGrp
                FileGrp fileGrp = new FileGrp();

                // Bundle name for USE attribute
                if ((bundles[i].getName() != null)
                        && !bundles[i].getName().equals(""))
                {
                    fileGrp.setUSE(bundles[i].getName());
                }

                for (int bits = 0; bits < bitstreams.length; bits++)
                {
                    // What's the persistent(-ish) ID?
                    String bitstreamPID = ConfigurationManager
                            .getProperty("dspace.url")
                            + "/bitstream/"
                            + item.getHandle()
                            + "/"
                            + bitstreams[bits].getSequenceID()
                            + "/"
                            + Util.encodeBitstreamName(bitstreams[bits].getName(),
                                    "UTF-8");

                    edu.harvard.hul.ois.mets.File file = new edu.harvard.hul.ois.mets.File();

                    /*
                     * ID: we use the unique part of the persistent ID, i.e. the
                     * Handle + sequence number, but with _'s instead of /'s so
                     * it's a legal xsd:ID.
                     */
                    String xmlIDstart = item.getHandle().replaceAll("/", "_")
                            + "_";

                    file.setID(xmlIDstart + bitstreams[bits].getSequenceID());

                    String groupID = "GROUP_" + xmlIDstart
                            + bitstreams[bits].getSequenceID();

                    /*
                     * If we're in THUMBNAIL or TEXT bundles, the bitstream is
                     * extracted text or a thumbnail, so we use the name to work
                     * out which bitstream to be in the same group as
                     */
                    if ((bundles[i].getName() != null)
                            && (bundles[i].getName().equals("THUMBNAIL") || bundles[i]
                                    .getName().equals("TEXT")))
                    {
                        // Try and find the original bitstream, and chuck the
                        // derived
                        // bitstream in the same group
                        Bitstream original = findOriginalBitstream(item,
                                bitstreams[bits]);

                        if (original != null)
                        {
                            groupID = "GROUP_" + xmlIDstart
                                    + original.getSequenceID();
                        }
                    }

                    file.setGROUPID(groupID);
                    file.setOWNERID(bitstreamPID);

                    // FIXME: ADMID should point to appropriate TechMD section
                    // above
                    file
                            .setMIMETYPE(bitstreams[bits].getFormat()
                                    .getMIMEType());

                    // FIXME: CREATED: no date
                    file.setSIZE(bitstreams[bits].getSize());
                    file.setCHECKSUM(bitstreams[bits].getChecksum());
                    file.setCHECKSUMTYPE(Checksumtype.MD5);

                    // FLocat: filename is as in records, or full URL
                    // FIXME: Duplicate filenames and characters illegal to
                    // local OS may cause problems
                    FLocat flocat = new FLocat();
                    flocat.setLOCTYPE(Loctype.URL);
                    if (fullURL)
                    {
                        flocat.setXlinkHref(bitstreamPID);
                    }
                    else
                    {
                        flocat.setXlinkHref(bitstreams[bits].getName());
                    }

                    // Add FLocat to File, and File to FileGrp
                    file.getContent().add(flocat);
                    fileGrp.getContent().add(file);
                }

                // Add fileGrp to fileSec
                fileSec.getContent().add(fileGrp);
                fileSecEmpty = false;
            }

            // Add fileSec to document
            if (!fileSecEmpty)
            {
                mets.getContent().add(fileSec);
            }
           
            // FIXME: Add Structmap here, but it is empty and we won't use it now.
            StructMap structMap = new StructMap();
            Div div = new Div();
            structMap.getContent().add(div);
            mets.getContent().add(structMap);

           
            mets.validate(new MetsValidator());

            mets.write(new MetsWriter(os));
        }
        catch (MetsException e)
        {
            // We don't pass up a MetsException, so callers don't need to
            // know the details of the METS toolkit
            e.printStackTrace();
            throw new IOException(e.getMessage());
        }
    }

    /**
     * Utility to find the license bitstream from an item
     *
     * @param context
     *            DSpace context
     * @param item
     *            the item
     * @return the license as a string
     *
     * @throws IOException
     *             if the license bitstream can't be read
     */
    private static InputStream findLicense(Context context, Item item)
            throws SQLException, IOException, AuthorizeException
    {
        Bundle[] bundles = item.getBundles();

        for (int i = 0; i < bundles.length; i++)
        {
            // Assume license will be in its own bundle
            Bitstream[] bitstreams = bundles[i].getBitstreams();

            if (bitstreams.length > 0)
            {
                if (bitstreams[0].getFormat().getID() == licenseFormat)
                {
                    // Read the license into a string
                    return bitstreams[0].retrieve();
                }
            }
        }

        // Oops! No license!
        return null;
    }

    /**
     * For a bitstream that's a thumbnail or extracted text, find the
     * corresponding bitstream in the ORIGINAL bundle
     *
     * @param item
     *            the item we're dealing with
     * @param derived
     *            the derived bitstream
     *
     * @return the corresponding original bitstream (or null)
     */
    private static Bitstream findOriginalBitstream(Item item, Bitstream derived)
            throws SQLException
    {
        Bundle[] bundles = item.getBundles();

        // Filename of original will be filename of the derived bitstream
        // minus the extension (last 4 chars - .jpg or .txt)
        String originalFilename = derived.getName().substring(0,
                derived.getName().length() - 4);

        // First find "original" bundle
        for (int i = 0; i < bundles.length; i++)
        {
            if ((bundles[i].getName() != null)
                    && bundles[i].getName().equals("ORIGINAL"))
            {
                // Now find the corresponding bitstream
                Bitstream[] bitstreams = bundles[i].getBitstreams();

                for (int bsnum = 0; bsnum < bitstreams.length; bsnum++)
                {
                    if (bitstreams[bsnum].getName().equals(originalFilename))
                    {
                        return bitstreams[bsnum];
                    }
                }
            }
        }

        // Didn't find it
        return null;
    }

    /**
     * Create MODS metadata from the DC in the item, and add to the given
     * XmlData METS object.
     *
     * @param item
     *            the item
     * @param xmlData
     *            xmlData to add MODS to.
     */
    private static void createMODS(Item item, XmlData xmlData)
    {
        DCValue[] dc = item.getDC(Item.ANY, Item.ANY, Item.ANY);

        StringBuffer modsXML = new StringBuffer();

        for (int i = 0; i < dc.length; i++)
        {
            // Get the property name - element[.qualifier]
            String propName = ((dc[i].qualifier == null) ? dc[i].element
                    : (dc[i].element + "." + dc[i].qualifier));

            String modsMapping = dcToMODS.getProperty(propName);

            if (modsMapping == null)
            {
                System.err.println("WARNING: No MODS mapping for " + propName);
            }
            else
            {
                String value = dc[i].value;

                // Replace all $'s with \$ so it doesn't trip up the replaceAll!
                if (value != null && value.length() > 0)
                {
                    // RegExp note: Yes, there really does need to be this many backslashes!
                    // To have \$ inserted in the replacement, both the backslash and the dollar
                    // have to be escaped (backslash) - so the replacemenet string has to be
                    // passed as \\\$. All of those backslashes then have to escaped in the literal
                    // for them to be in string used!!!
                    value = dc[i].value.replaceAll("\\$", "\\\\\\$");
                }

                if (!(("description.provenance".equals(propName)) &&
                    ((ConfigurationManager.getBooleanProperty("oai.mets.hide-provenance", false)))))
                {
                    // Replace '%s' with DC value (with entities encoded)
                    modsXML.append(modsMapping.replaceAll("%s", Utils
                            .addEntities(value)));
                    modsXML.append("\n"); // For readability
                }
            }
        }

        PreformedXML pXML = new PreformedXML(modsXML.toString());
        xmlData.getContent().add(pXML);
    }

    /**
     * Get the handle from the command line in the form 123.456/789. Doesn't
     * matter if incoming handle has 'hdl:' or 'http://hdl....' before it.
     *
     * @param original
     *            Handle as passed in by user
     * @return Handle as can be looked up in our table
     */
    private static String getHandleArg(String original)
    {
        if (original.startsWith("hdl:"))
        {
            return original.substring(4);
        }

        if (original.startsWith("http://hdl.handle.net/"))
        {
            return original.substring(22);
        }

        return original;
    }
}
TOP

Related Classes of org.dspace.app.mets.METSExport

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.