Package org.dspace.content.packager

Source Code of org.dspace.content.packager.DSpaceAIPDisseminator

/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.packager;

import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;

import org.apache.log4j.Logger;
import org.dspace.app.util.Util;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bundle;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.Site;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.core.Constants;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Context;
import org.dspace.license.CreativeCommons;

import edu.harvard.hul.ois.mets.Agent;
import edu.harvard.hul.ois.mets.Loctype;
import edu.harvard.hul.ois.mets.Mets;
import edu.harvard.hul.ois.mets.MetsHdr;
import edu.harvard.hul.ois.mets.Name;
import edu.harvard.hul.ois.mets.Role;
import edu.harvard.hul.ois.mets.Div;
import edu.harvard.hul.ois.mets.Mptr;
import edu.harvard.hul.ois.mets.StructMap;
import edu.harvard.hul.ois.mets.Type;
import edu.harvard.hul.ois.mets.helper.MetsException;
import edu.harvard.hul.ois.mets.helper.PCData;
import java.util.Date;
import org.dspace.core.Utils;

/**
* Subclass of the METS packager framework to disseminate a DSpace
* Archival Information Package (AIP).  The AIP is intended to be, foremost,
* a _complete_ and _accurate_ representation of one object in the DSpace
* object model.  An AIP contains all of the information needed to restore
* the object precisely in another DSpace archive instance.
* <p>
* Configuration keys:
* <p>
* The following take as values a space-and-or-comma-separated list
* of plugin names that name *either* a DisseminationCrosswalk or
* StreamDisseminationCrosswalk plugin.  Shown are the default values.
* The value may be a simple crosswalk name, or a METS MDsec-name followed by
* a colon and the crosswalk name e.g. "DSpaceDepositLicense:DSPACE_DEPLICENSE"
*
*    # MD types to put in the sourceMD section of the object.
*    aip.disseminate.sourceMD = AIP-TECHMD
*
*    # MD types to put in the techMD section of the object (and member Bitstreams if an Item)
*    aip.disseminate.techMD = PREMIS
*
*    # MD types to put in digiprovMD section of the object.
*    #aip.disseminate.digiprovMD =
*
*    # MD types to put in the rightsMD section of the object.
*    aip.disseminate.rightsMD = DSpaceDepositLicense:DSPACE_DEPLICENSE, \
*       CreativeCommonsRDF:DSPACE_CCRDF, CreativeCommonsText:DSPACE_CCTXT, METSRights
*
*    # MD types to put in dmdSec's corresponding  the object.
*    aip.disseminate.dmd = MODS, DIM
*
* @author Larry Stone
* @author Tim Donohue
* @version $Revision: 1.1 $
* @see AbstractMETSDisseminator
* @see AbstractPackageDisseminator
*/
public class DSpaceAIPDisseminator extends AbstractMETSDisseminator
{
    private static final Logger log = Logger.getLogger(DSpaceAIPDisseminator.class);
   
    /**
     * Unique identifier for the profile of the METS document.
     * To ensure uniqueness, it is the URL that the XML schema document would
     * have _if_ there were to be one.  There is no schema at this time.
     */
    public static final String PROFILE_1_0 =
        "http://www.dspace.org/schema/aip/mets_aip_1_0.xsd";

    /** TYPE of the div containing AIP's parent handle in its mptr. */
    public static final String PARENT_DIV_TYPE = "AIP Parent Link";

    // Default MDTYPE value for deposit license -- "magic string"
    // NOTE: format is  <label-for-METS>:<DSpace-crosswalk-name>
    private static final String DSPACE_DEPOSIT_LICENSE_MDTYPE =
        "DSpaceDepositLicense:DSPACE_DEPLICENSE";

    // Default MDTYPE value for CC license in RDF -- "magic string"
    // NOTE: format is  <label-for-METS>:<DSpace-crosswalk-name>
    private static final String CREATIVE_COMMONS_RDF_MDTYPE =
        "CreativeCommonsRDF:DSPACE_CCRDF";

    // Default MDTYPE value for CC license in Text -- "magic string"
    // NOTE: format is  <label-for-METS>:<DSpace-crosswalk-name>
    private static final String CREATIVE_COMMONS_TEXT_MDTYPE =
        "CreativeCommonsText:DSPACE_CCTXT";

    // dissemination parameters passed to the AIP Disseminator
    private PackageParameters disseminateParams = null;
   
    // List of Bundles to filter on, when building AIP
    private List<String> filterBundles = new ArrayList<String>();
    // Whether 'filterBundles' specifies an exclusion list (default) or inclusion list.
    private boolean excludeBundles = true;

    @Override
    public void disseminate(Context context, DSpaceObject dso,
                            PackageParameters params, File pkgFile)
        throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException
    {
        //Before disseminating anything, save the passed in PackageParameters, so they can be used by all methods
        disseminateParams = params;

        boolean disseminate = true; //by default, always disseminate

        //if user specified to only disseminate objects updated *after* a specific date
        // (Note: this only works for Items right now, as DSpace doesn't store a
        //  last modified date for Collections or Communities)
        if(disseminateParams.containsKey("updatedAfter") && dso.getType()==Constants.ITEM)
        {
            Date afterDate = Utils.parseISO8601Date(disseminateParams.getProperty("updatedAfter"));

            //if null is returned, we couldn't parse the date!
            if(afterDate==null)
            {
                 throw new IOException("Invalid date passed in via 'updatedAfter' option. Date must be in ISO-8601 format, and include both a day and time (e.g. 2010-01-01T00:00:00).");
            }

            //check when this item was last modified.
            Item i = (Item) dso;
            if(i.getLastModified().after(afterDate))
            {
                disseminate = true;
            }
            else
            {
                disseminate = false;
            }
        }

        if(disseminate)
        {
            //just do a normal dissemination as specified by AbstractMETSDisseminator
            super.disseminate(context, dso, params, pkgFile);
        }
    }

    /**
     * Return identifier string for the METS profile this produces.
     *
     * @return string name of profile.
     */
    @Override
    public String getProfile()
    {
        return PROFILE_1_0;
    }

    /**
     * Returns name of METS fileGrp corresponding to a DSpace bundle name.
     * For AIP the mapping is direct.
     *
     * @param bname name of DSpace bundle.
     * @return string name of fileGrp
     */
    @Override
    public String bundleToFileGrp(String bname)
    {
        return bname;
    }

    /**
     * Create the metsHdr element for the AIP METS Manifest.
     * <p>
     * CREATEDATE is time at which the package (i.e. this manifest) was created.
     * LASTMODDATE is last-modified time of the target object, if available.
     * Agent describes the archive this belongs to.
     *
     * @param context DSpace Context
     * @param dso current DSpace Object
     * @param params Packager Parameters
     * @return List of crosswalk names to run
     * @throws SQLException
     * @throws IOException
     * @throws AuthorizeException
     */
    @Override
    public MetsHdr makeMetsHdr(Context context, DSpaceObject dso,
                               PackageParameters params)
    {
        MetsHdr metsHdr = new MetsHdr();

        // Note: we specifically do not add a CREATEDATE to <metsHdr>
        // as for AIPs we want md5 checksums to be identical if no content
        // has changed.  Adding a CREATEDATE changes checksum each time.

        // Add a LASTMODDATE for items
        if (dso.getType() == Constants.ITEM)
        {
            metsHdr.setLASTMODDATE(((Item) dso).getLastModified());
        }

        // Agent Custodian - name custodian, the DSpace Archive, by handle.
        Agent agent = new Agent();
        agent.setROLE(Role.CUSTODIAN);
        agent.setTYPE(Type.OTHER);
        agent.setOTHERTYPE("DSpace Archive");
        Name name = new Name();
        name.getContent()
                .add(new PCData(Site.getSiteHandle()));
        agent.getContent().add(name);
        metsHdr.getContent().add(agent);

        // Agent Creator - name creator, which is a specific version of DSpace.
        Agent agentCreator = new Agent();
        agentCreator.setROLE(Role.CREATOR);
        agentCreator.setTYPE(Type.OTHER);
        agentCreator.setOTHERTYPE("DSpace Software");
        Name creatorName = new Name();
        creatorName.getContent()
                .add(new PCData("DSpace " + Util.getSourceVersion()));
        agentCreator.getContent().add(creatorName);
        metsHdr.getContent().add(agentCreator);
       
        return metsHdr;
    }

    /**
     * Return the name of all crosswalks to run for the dmdSec section of
     * the METS Manifest.
     * <p>
     * Default is DIM (DSpace Internal Metadata) and MODS.
     *
     * @param context DSpace Context
     * @param dso current DSpace Object
     * @param params Packager Parameters
     * @return List of crosswalk names to run
     * @throws SQLException
     * @throws IOException
     * @throws AuthorizeException
     */
    @Override
    public String [] getDmdTypes(Context context, DSpaceObject dso, PackageParameters params)
        throws SQLException, IOException, AuthorizeException
    {
        String dmdTypes = ConfigurationManager.getProperty("aip.disseminate.dmd");
        if (dmdTypes == null)
        {
            String result[] = new String[2];
            result[0] = "MODS";
            result[1] = "DIM";
            return result;
        }
        else
        {
            return dmdTypes.split("\\s*,\\s*");
        }
    }

    /**
     * Return the name of all crosswalks to run for the techMD section of
     * the METS Manifest.
     * <p>
     * Default is PREMIS.
     *
     * @param context DSpace Context
     * @param dso current DSpace Object
     * @param params Packager Parameters
     * @return List of crosswalk names to run
     * @throws SQLException
     * @throws IOException
     * @throws AuthorizeException
     */
    @Override
    public String[] getTechMdTypes(Context context, DSpaceObject dso, PackageParameters params)
        throws SQLException, IOException, AuthorizeException
    {
        String techTypes = ConfigurationManager.getProperty("aip.disseminate.techMD");
        if (techTypes == null)
        {
            if (dso.getType() == Constants.BITSTREAM)
            {
                String result[] = new String[1];
                result[0] = "PREMIS";
                return result;
            }
            else
            {
                return new String[0];
            }
        }
        else
        {
            return techTypes.split("\\s*,\\s*");
        }
    }

    /**
     * Return the name of all crosswalks to run for the sourceMD section of
     * the METS Manifest.
     * <p>
     * Default is AIP-TECHMD.
     * <p>
     * In an AIP, the sourceMD element MUST include the original persistent
     * identifier (Handle) of the object, and the original persistent ID
     * (Handle) of its parent in the archive, so that it can be restored.
     *
     * @param context DSpace Context
     * @param dso current DSpace Object
     * @param params Packager Parameters
     * @return List of crosswalk names to run
     * @throws SQLException
     * @throws IOException
     * @throws AuthorizeException
     */
    @Override
    public String[] getSourceMdTypes(Context context, DSpaceObject dso, PackageParameters params)
        throws SQLException, IOException, AuthorizeException
    {
        String sourceTypes = ConfigurationManager.getProperty("aip.disseminate.sourceMD");
        if (sourceTypes == null)
        {
            String result[] = new String[1];
            result[0] = "AIP-TECHMD";
            return result;
        }
        else
        {
            return sourceTypes.split("\\s*,\\s*");
        }
    }

    /**
     * Return the name of all crosswalks to run for the digiprovMD section of
     * the METS Manifest. 
     * <p>
     * By default, none are returned
     *
     * @param context DSpace Context
     * @param dso current DSpace Object
     * @param params Packager Parameters
     * @return List of crosswalk names to run
     * @throws SQLException
     * @throws IOException
     * @throws AuthorizeException
     */
    @Override
    public String[] getDigiprovMdTypes(Context context, DSpaceObject dso, PackageParameters params)
        throws SQLException, IOException, AuthorizeException
    {
        String dpTypes = ConfigurationManager.getProperty("aip.disseminate.digiprovMD");
        if (dpTypes == null)
        {
            return new String[0];
        }
        else
        {
            return dpTypes.split("\\s*,\\s*");
        }
    }

    /**
     * Return the name of all crosswalks to run for the rightsMD section of
     * the METS Manifest.
     * <p>
     * By default, Deposit Licenses and CC Licenses will be added for Items.
     * Also, by default METSRights info will be added for all objects.
     *
     * @param context DSpace Context
     * @param dso current DSpace Object
     * @param params Packager Parameters
     * @return List of crosswalk names to run
     * @throws SQLException
     * @throws IOException
     * @throws AuthorizeException
     */
    @Override
    public String[] getRightsMdTypes(Context context, DSpaceObject dso, PackageParameters params)
        throws SQLException, IOException, AuthorizeException
    {

        List<String> result = new ArrayList<String>();
        String rTypes = ConfigurationManager.getProperty("aip.disseminate.rightsMD");

        //If unspecified in configuration file, add default settings
        if (rTypes == null)
        {
            // Licenses only apply to an Item
            if (dso.getType() == Constants.ITEM)
            {
                //By default, disseminate Deposit License, and any CC Licenses
                // to an item's rightsMD section
                if (PackageUtils.findDepositLicense(context, (Item)dso) != null)
                {
                    result.add(DSPACE_DEPOSIT_LICENSE_MDTYPE);
                }

                if (CreativeCommons.getLicenseRdfBitstream((Item)dso) != null)
                {
                    result.add(CREATIVE_COMMONS_RDF_MDTYPE);
                }
                else if (CreativeCommons.getLicenseTextBitstream((Item)dso) != null)
                {
                    result.add(CREATIVE_COMMONS_TEXT_MDTYPE);
                }
            }
           
            //By default, also add METSRights info to the rightsMD
            result.add("METSRights");
        }
        else
        {
            return rTypes.split("\\s*,\\s*");
        }

        return result.toArray(new String[result.size()]);
    }


    /**
     * Adds another structMap element to contain the "parent link" that
     * is an essential part of every AIP.  This is a structmap of one
     * div, which contains an mptr indicating the Handle of the parent
     * of this object in the archive.  The div has a unique TYPE attribute
     * value, "AIP Parent Link", and the mptr has a LOCTYPE of "HANDLE"
     * and an xlink:href containing the raw Handle value.
     * <p>
     * Note that the parent Handle has to be stored here because the
     * parent is needed to create a DSpace Object when restoring the
     * AIP; it cannot be determined later once the ingester parses it
     * out of the metadata when the crosswalks are run.  So, since the
     * crosswalks require an object to operate on, and creating the
     * object requires a parent, we cannot depend on metadata processed
     * by crosswalks (e.g.  AIP techMd) for the parent, it has to be at
     * a higher level in the AIP manifest.  The structMap is an obvious
     * and standards-compliant location for it.
     *
     * @param context DSpace context
     * @param dso Current DSpace object
     * @param params Packager Parameters
     * @param mets METS manifest
     * @throws SQLException
     * @throws IOException
     * @throws AuthorizeException
     * @throws MetsException
     */
    @Override
    public void addStructMap(Context context, DSpaceObject dso,
                               PackageParameters params, Mets mets)
        throws SQLException, IOException, AuthorizeException, MetsException
    {
        // find parent Handle
        String parentHandle = null;
        switch (dso.getType())
        {
            case Constants.ITEM:
                parentHandle = ((Item)dso).getOwningCollection().getHandle();
                break;

            case Constants.COLLECTION:
                parentHandle = (((Collection)dso).getCommunities())[0].getHandle();
                break;

            case Constants.COMMUNITY:
                Community parent = ((Community)dso).getParentCommunity();
                if (parent == null)
                {
                    parentHandle = Site.getSiteHandle();
                }
                else
                {
                    parentHandle = parent.getHandle();
                }
           case Constants.SITE:
                break;
        }

        // Parent Handle should only be null if we are creating a site-wide AIP
        if(parentHandle!=null)
        {
            // add a structMap to contain div pointing to parent:
            StructMap structMap = new StructMap();
            structMap.setID(gensym("struct"));
            structMap.setTYPE("LOGICAL");
            structMap.setLABEL("Parent");
            Div div0 = new Div();
            div0.setID(gensym("div"));
            div0.setTYPE(PARENT_DIV_TYPE);
            div0.setLABEL("Parent of this DSpace Object");
            Mptr mptr = new Mptr();
            mptr.setID(gensym("mptr"));
            mptr.setLOCTYPE(Loctype.HANDLE);
            mptr.setXlinkHref(parentHandle);
            div0.getContent().add(mptr);
            structMap.getContent().add(div0);
            mets.getContent().add(structMap);
        }
    }

    /**
     * By default, include all bundles in AIP as content.
     * <P>
     * However, if the user specified a comma separated list of bundle names
     * via the "filterBundles" (or "includeBundles")  option, then check if this
     * bundle is in that list.  If it is, return true.  If it is not, return false.
     *
     * @param bundle Bundle to check for
     * @return true if bundle should be disseminated when disseminating Item AIPs
     */
    @Override
    public boolean includeBundle(Bundle bundle)
    {
        List<String> bundleList = getBundleList();

        //Check if we are disseminating all bundles
        if(bundleList.size()==1 && bundleList.get(0).equalsIgnoreCase("all") && !this.excludeBundles)
        {
            return true; //all bundles should be disseminated
        }
        else
        {
            //Check if bundle name is in our list of filtered bundles
            boolean inList = filterBundles.contains(bundle.getName());
            //Based on whether this is an inclusion or exclusion filter,
            //return whether this bundle should be included.
            return this.excludeBundles ? !inList : inList;
        }
    }
   
    /**
     * Get our list of bundles to include/exclude in this AIP,
     * based on the passed in parameters
     * @return List of bundles to filter on
     */
    protected List<String> getBundleList()
    {
        // Check if we already have our list of bundles to filter on, if so, just return it.
        if(this.filterBundles!=null && !this.filterBundles.isEmpty())
            return this.filterBundles;
       
        // Check for 'filterBundles' option, as this allows for inclusion/exclusion of bundles.
        String bundleList = this.disseminateParams.getProperty("filterBundles");
       
        if(bundleList==null || bundleList.isEmpty())
        {
            //For backwards compatibility with DSpace 1.7.x, check the
            //'includeBundles' option to see if a list of bundles was provided
            bundleList = this.disseminateParams.getProperty("includeBundles", "+all");
            //if we are taking the 'includeBundles' value, prepend "+" to specify that this is an inclusion
            bundleList = bundleList.startsWith("+") ? bundleList : "+".concat(bundleList);
        }
        // At this point, 'bundleList' will be *non-null*. If neither option was passed in,
        // then 'bundleList' defaults to "+all" (i.e. include all bundles).
       
        //If our filter list of bundles begins with a '+', then this list
        // specifies all the bundles to *include*. Otherwise all
        // bundles *except* the listed ones are included
        if(bundleList.startsWith("+"))
        {
            this.excludeBundles = false;
            //remove the preceding '+' from our bundle list
            bundleList = bundleList.substring(1);
        }
        //Split our list of bundles to filter on commas
        this.filterBundles = Arrays.asList(bundleList.split(","));
       
        return this.filterBundles;
    }
   
   
    /**
     * Returns a user help string which should describe the
     * additional valid command-line options that this packager
     * implementation will accept when using the <code>-o</code> or
     * <code>--option</code> flags with the Packager script.
     *
     * @return a string describing additional command-line options available
     * with this packager
     */
    @Override
    public String getParameterHelp()
    {
        String parentHelp = super.getParameterHelp();

        //Return superclass help info, plus the extra parameter/option that this class supports
        return parentHelp +
                "\n\n" +
                "* filterBundles=[bundleList]      " +
                   "List of bundles specifying which Bundles should be included in an AIP. If this list starts with a '+' symbol," +
                   " then it represents a list of bundles to *include* in the AIP.  By default, the list represents a list of bundles" +
                   " to *exclude* from the AIP.";
    }
   
}
TOP

Related Classes of org.dspace.content.packager.DSpaceAIPDisseminator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.