Package org.dspace.rdf

Source Code of org.dspace.rdf.RDFizer

/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/

package org.dspace.rdf;

import com.hp.hpl.jena.rdf.model.Model;
import java.io.PrintWriter;
import java.sql.SQLException;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.ItemIterator;
import org.dspace.content.Site;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.handle.HandleManager;
import org.dspace.services.ConfigurationService;
import org.dspace.utils.DSpace;

/**
* This class manages the handling of RDF data in DSpace. It generates
* identifiers, it loads data, it manages the conversion of DSpace Objects into
* RDF data. It can be used as instantiated object as well as CLI.
*
* @author Pascal-Nicolas Becker (dspace -at- pascal -hyphen- becker -dot- de)
*/
public class RDFizer {
   
    private static final Logger log = Logger.getLogger(RDFizer.class);
   
    protected ConfigurationService configurationService;
   
    protected boolean stdout;
    protected boolean verbose;
    protected boolean dryrun;
    protected String lang;
    protected Context context;

    /**
     * Set to remember with DSpaceObject were converted or deleted from the
     * triplestore already. This set is helpful when converting or deleting
     * multiple DSpaceObjects (g.e. Communities with all Subcommunities and
     * Items).
     */
    protected Set<String> processed;

    public RDFizer() throws SQLException
    {
        this.configurationService = new DSpace().getConfigurationService();
        this.stdout = false;
        this.verbose = false;
        this.dryrun = false;
        this.lang = "TURTLE";
        this.processed = new CopyOnWriteArraySet<String>();
        this.context = new Context(Context.READ_ONLY);
    }
   
    /**
     * This method allows you to override the context used for conversion and to
     * determine which DSpaceObjects should be deleted from the triplestore,
     * consider well if this is really necessary.
     * If this method is not used the context of an anonymous user will be used.
     * <p>
     * Please consider: If your triplestore offers a public sparql endpoint
     * all information readable with the provided context will be exposed to
     * public!
     * If you store your data in a private triplestore that does not provides
     * public access, you might consider to use this method to convert all data
     * stored in your repository.
     * </p>
     *
     * @param context
     */
    protected void overrideContext(Context context)
    {
        this.context = context;
    }

    /**
     * Returns whether all converted data is printed to stdout. Turtle will be
     * used as serialization.
     * @return
     */
    public boolean isStdout() {
        return stdout;
    }

    /**
     * Set this to true to print all generated data to stdout. The data will be
     * stored as well, unless {@code dryrun} is set true. Turtle will be used
     * as serialization.
     * @param stdout
     */
    public void setStdout(boolean stdout) {
        this.stdout = stdout;
    }
   
    /**
     * Returns whether verbose information is printed to System.err. Probably
     * this is helpful for CLI only.
     * @return
     */
    public boolean isVerbose() {
        return verbose;
    }

    /**
     * Set this to true to print verbose information to System.err. Probably
     * this is helpful for CLI only.
     * @param verbose
     */
    public void setVerbose(boolean verbose) {
        this.verbose = verbose;
    }

    /**
     * Returns whether this is a dry run. Probably this is helpful for CLI only.
     * @return
     */
    public boolean isDryrun() {
        return dryrun;
    }

    /**
     * Set this true to prevent any changes on the triple store. Probably this
     * is helpful for CLI usage only.
     * @param dryrun
     */
    public void setDryrun(boolean dryrun) {
        this.dryrun = dryrun;
    }
   
    /**
     * Deletes all data stored in the triplestore (drops all named graphs and
     * cleans the default graph).
     */
    public void deleteAll()
    {
        report("Sending delete command to the triple store.");
        if (!this.dryrun) RDFConfiguration.getRDFStorage().deleteAll();
        report("Deleted all data from the triplestore.");
    }
   
    /**
     * Delete the data about the DSpaceObject from the triplestore.
     * All data about descendent Subcommunities, Collections and Items will be
     * deleted as well.
     */
    public void delete(DSpaceObject dso, boolean reset)
            throws SQLException
    {
        if (dso.getType() != Constants.SITE
                && dso.getType() != Constants.COMMUNITY
                && dso.getType() != Constants.COLLECTION
                && dso.getType() != Constants.ITEM)
        {
            throw new IllegalArgumentException(dso.getTypeText()
                    + " is currently not supported as independent entity.");
        }

        if (dso.getType() == Constants.SITE)
        {
            // we don't need to iterate over all objects, use a shorctut:
            this.deleteAll();
        }
        Callback callback = new Callback() {
            @Override
            protected void callback(DSpaceObject dso)
                    throws SQLException
            {
                String identifier = RDFUtil.generateIdentifier(context, dso);
               
                if (StringUtils.isEmpty(identifier))
                {
                    System.err.println("Cannot determine RDF URI for "
                            + dso.getTypeText() + " " + dso.getID() + "(handle "
                            + dso.getHandle() + ")" + ", skipping. Please "
                            + "delete it specifing the RDF URI.");
                    log.error("Cannot detgermine RDF URI for "
                            + dso.getTypeText() + " " + dso.getID() + "(handle "
                            + dso.getHandle() + ")" + ", skipping deletion.");
                    return;
                }
               
                report("Deleting Named Graph" + identifier);
                if (!dryrun)
                {
                    RDFConfiguration.getRDFStorage().delete(identifier);
                }
            }
        };
        this.dspaceDFS(dso, callback, false, reset);
    }
   
    /**
     * Converts and stores all DSpaceObjects that are readable for an anonymous
     * user.
     */
    public void convertAll()
            throws SQLException
    {
        report("Starting conversion of all DSpaceItems, this may take a while...");
        this.convert(new Site(), true);
        report("Conversion ended.");
    }
   
    protected void convert(DSpaceObject dso, boolean reset)
            throws SQLException
    {
        if (dso.getType() != Constants.SITE
                && dso.getType() != Constants.COMMUNITY
                && dso.getType() != Constants.COLLECTION
                && dso.getType() != Constants.ITEM)
        {
            throw new IllegalArgumentException(dso.getTypeText()
                    + " is currently not supported as independent entity.");
        }
       
        Callback callback = new Callback() {
            @Override
            protected void callback(DSpaceObject dso)
                    throws SQLException
            {
                Model converted = null;
                try
                {
                    if (dryrun)
                    {
                        converted = RDFUtil.convert(context, dso);
                    } else {
                        converted = RDFUtil.convertAndStore(context, dso);
                    }
                } catch (ItemNotArchivedException ex) {
                    if (!(dso instanceof Item)) throw new IllegalStateException(ex.getMessage(), ex);
                    report("Skipping conversion of Item " + dso.getID()
                            + " (handle " + dso.getHandle() + "): Item is not "
                            + "archived.");
                    return;
                } catch (ItemWithdrawnException ex) {
                    if (!(dso instanceof Item)) throw new IllegalStateException(ex.getMessage(), ex);
                    report("Skipping conversion of Item " + dso.getID()
                            + " (handle " + dso.getHandle() + "): Item is "
                            + "withdrawn.");
                    return;
                } catch (ItemNotDiscoverableException ex) {
                    if (!(dso instanceof Item)) throw new IllegalStateException(ex.getMessage(), ex);
                    report("Skipping conversion of Item " + dso.getID()
                            + " (handle " + dso.getHandle() + "): Item is not "
                            + "discoverable.");
                    return;
                } catch (AuthorizeException ex) {
                    report("Skipping conversion of " + dso.getTypeText() + " "
                            + dso.getID() + " (handle " + dso.getHandle() + ")"
                            + ", not authorized: " + ex.getMessage());
                    return;
                } catch (RDFMissingIdentifierException ex) {
                    String errormessage = "Skipping conversion of "
                            + dso.getTypeText() + " " + dso.getID()
                            + " (handle " + dso.getHandle() + ").";
                    log.error(errormessage, ex);
                    System.err.println(errormessage
                            + " Error while converting: " + ex.getMessage());
                   
                    return;
                }
                   
                if (stdout) {
                    if (converted == null)
                    {
                        System.err.println("Conversion of " + dso.getTypeText()
                                + " " + dso.getID() + " resulted in no data.");
                    } else {
                        converted.write(System.out, lang);
                    }
                }
                if (converted != null) converted.close();
            }
        };
       
        this.dspaceDFS(dso, callback, true, reset);
    }
   
    protected void dspaceDFS(DSpaceObject dso, Callback callback, boolean check, boolean reset)
            throws SQLException
    {
        if (dso.getType() != Constants.SITE
                && dso.getType() != Constants.COMMUNITY
                && dso.getType() != Constants.COLLECTION
                && dso.getType() != Constants.ITEM)
        {
            throw new IllegalArgumentException(dso.getTypeText()
                    + " is currently not supported as independent entity.");
        }

        if (reset)
        {
            this.processed.clear();
        }
       
        if (isProcessed(dso))
        {
            log.debug("Skipping processing of " + dso.getTypeText() + " "
                    + dso.getID() + " (handle " + dso.getHandle()
                    + "), already processed.");
            return;
        }
        markProcessed(dso);
        // this is useful to debug depth first search, but it is really noisy.
        // log.debug("Procesing " + dso.getTypeText() + " " + dso.getID() + handle + ".");
       
        // if this method is used for conversion we should check if we have the
        // permissions to read a DSO before converting all of it decendents
        // (g.e. check read permission on a community before converting all of
        // its subcommunties and collections).
        // just skip items with missing permissions and report them.
        if (check)
        {
            try
            {
                RDFUtil.isPublic(context, dso);
            } catch (ItemNotArchivedException ex) {
                if (!(dso instanceof Item)) throw new IllegalStateException(ex.getMessage(), ex);
                report("Skipping processing of Item " + dso.getID()
                        + " (handle " + dso.getHandle() + "): Item is not "
                        + "archived.");
                return;
            } catch (ItemWithdrawnException ex) {
                if (!(dso instanceof Item)) throw new IllegalStateException(ex.getMessage(), ex);
                report("Skipping processing of Item " + dso.getID()
                        + " (handle " + dso.getHandle() + "): Item is "
                        + "withdrawn.");
                return;
            } catch (ItemNotDiscoverableException ex) {
                if (!(dso instanceof Item)) throw new IllegalStateException(ex.getMessage(), ex);
                report("Skipping processing of Item " + dso.getID()
                        + " (handle " + dso.getHandle() + "): Item is not "
                        + "discoverable.");
                return;
            } catch (AuthorizeException ex) {
                report("Skipping processing of " + dso.getTypeText() + " "
                        + dso.getID() + " (handle " + dso.getHandle() + ")"
                        + ", not authorized: " + ex.getMessage());
                return;
            }
        }

        if (dso instanceof Site)
        {
            Community[] communities = Community.findAllTop(context);
            for (Community community : communities)
            {
                this.dspaceDFS(community, callback, check, false);
            }

        }
       
        if (dso instanceof Community)
        {
            Community[] subcommunities = ((Community) dso).getSubcommunities();
            for (Community sub : subcommunities)
            {
                this.dspaceDFS(sub, callback, check, false);
            }
            Collection[] collections = ((Community) dso).getCollections();
            for (Collection collection : collections)
            {
                this.dspaceDFS(collection, callback, check, false);
            }
        }
       
        if (dso instanceof Collection)
        {
            ItemIterator items = ((Collection) dso).getAllItems();
            while (items.hasNext())
            {
                Item item = items.next();
                this.dspaceDFS(item, callback, check, false);
                item.decache();
            }
        }

//        Currently Bundles and Bitsreams arn't supported as independent entities.
//        The should be converted as part of an item. So we do not need to make
//        the recursive call for them.
//       
//        if (dso instanceof Item)
//        {
//            Bundle[] bundles = ((Item) dso).getBundles();
//            for (Bundle bundle : bundles)
//            {
//                this.dspaceDFS(bundle, callback, check, false);
//            }
//        }
//       
//        if (dso instanceof Bundle)
//        {
//            Bitstream[] bistreams = ((Bundle) dso).getBitstreams();
//            for (Bitstream bitstream : bistreams)
//            {
//                this.dspaceDFS(bitstream, callback, check, false);
//            }
//        }
       
        callback.callback(dso);
        report("Processed " + dso.getTypeText() + " " + dso.getID()
                + " (handle " + dso.getHandle() + ").");
    }
   
    protected boolean isProcessed(DSpaceObject dso)
    {
        String key = Integer.toString(dso.getType()) + "/"
                + Integer.toString(dso.getID());
        return this.processed.contains(key);
    }
   
    protected void markProcessed(DSpaceObject dso)
    {
        String key = Integer.toString(dso.getType()) + "/"
                + Integer.toString(dso.getID());
        this.processed.add(key);
    }
   
    protected void report(String message)
    {
        if (this.verbose)
        {
            System.err.println(message);
        }
        log.debug(message);
    }
   
    protected void runCLI(String[] args)
    {
        // prepare CLI and parse arguments
        Options options = createOptions();
        CommandLineParser parser = new PosixParser();
        CommandLine line = null;
        try
        {
            line = parser.parse(options, args);
        }
        catch (ParseException ex)
        {
            usage(options);
            System.err.println();
            System.err.println(ex.getMessage());
            log.fatal(ex);
            System.exit(1);
        }
       
        String[] remainingArgs = line.getArgs();
        if (remainingArgs.length > 0)
        {
            this.usage(options);
            System.err.println();
            StringBuilder builder = new StringBuilder(100);
            for (String argument : remainingArgs)
            {
                if (builder.length() > 0) builder.append(", ");
                builder.append(argument);
            }
            String argumentsLine = builder.toString().trim();
            argumentsLine.substring(0, argumentsLine.length() - 1);
            System.err.print("Cannot recognize the following argument");
            if (remainingArgs.length >= 2) System.err.print("s");
            System.err.println(": " + argumentsLine + ".");
            System.exit(1);
        }


        // set member variables depending on CLI arguments.
        if (line.hasOption("verbose"))
        {
            setVerbose(true);
        }
       
        if (line.hasOption("dry-run"))
        {
            setDryrun(true);
        }

        if (line.hasOption("stdout"))
        {
            setStdout(true);
        }
               
        // check mutual exclusive arguments
        if (line.hasOption("delete") && line.hasOption("delete-all"))
        {
            usage(options);
            System.err.println("\n\nYou cannot use the options --delete <handle> "
                    + "and --delete-all together.");
            System.exit(1);
        }

        if (line.hasOption("convert-all")
                && (line.hasOption("delete") || line.hasOption("delete-all")))
        {
            usage(options);
            System.err.println("\n\nYou cannot use the option --convert-all "
                    + "together with --delete or --delete-all.");
            System.exit(1);
        }
        if (line.hasOption("identifiers")
                && (line.hasOption("delete") || line.hasOption("delete-all")))
        {
            usage(options);
            System.err.println("\n\nYou cannot use the option --identifiers <handle> "
                    + "together with --delete or --delete-all.");
            System.exit(1);
        }
        if (line.hasOption("stdout")
                && (line.hasOption("delete") || line.hasOption("delete-all")))
        {
            usage(options);
            System.err.println("\n\nYou cannot use the option --stdout together "
                    + "with --delete or --deleta-all.");
            System.exit(1);
        }

        // Run commands depending on CLI arguments.
        // process help first to prevent further evaluation of given options.
        if (line.hasOption('h'))
        {
            usage(options);
            System.exit(0);
        }
       
        if (line.hasOption("delete"))
        {
            String[] identifiers = line.getOptionValues("delete");
            for (String identifier : identifiers)
            {
                if (!StringUtils.startsWithIgnoreCase(identifier, "hdl:"))
                {
                    if (!this.dryrun)
                    {
                        RDFConfiguration.getRDFStorage().delete(identifier);
                    }
                    if (this.verbose)
                    {
                        System.err.println("Deleted " + identifier + ".");
                    }
                    continue;
                }
                String handle = identifier.substring(4);
               
                log.debug("Trying to resolve identifier " + handle + ".");
               
                DSpaceObject dso = resolveHandle(handle);
                if (dso == null) {
                    // resolveHandle reports problems and return null in case
                    // of an error or an unresolvable handle.
                    // Don't report it a second time, just continue...
                    continue;
                }
               
                log.debug("Resolved identifier " + handle + " as "
                        + dso.getTypeText() + " " + dso.getID());
               
                try
                {
                    this.delete(dso, true);
                }
                catch (SQLException ex)
                {
                    log.error(ex);
                    System.err.println("A problem with the database connection "
                            + "occured. Canceled pending actions.");
                    System.err.println(ex.getMessage());
                    ex.printStackTrace(System.err);
                    System.exit(1);
                }
            }
            System.exit(0);
        }
       
        if (line.hasOption("delete-all"))
        {
            this.deleteAll();
            System.exit(0);
        }
       
        if (line.hasOption("identifiers"))
        {
            String[] identifiers = line.getOptionValues("identifiers");
            report("Starting conversion of specified DSpaceObjects...");
           
            this.processed.clear();
            for (String handle : identifiers)
            {
                log.debug("Trying to resolve identifier " + handle + ".");
               
                DSpaceObject dso = resolveHandle(handle);
                if (dso == null) {
                    // resolveHandle reports problems and return null in case
                    // of an error or an unresolvable handle.
                    // Don't report it a second time, just continue...
                    continue;
                }
               
                try
                {
                    this.convert(dso, false);
                }
                catch (SQLException ex)
                {
                    log.error(ex);
                    System.err.println("A problem with the database connection "
                            + "occured. Canceled pending actions.");
                    System.err.println(ex.getMessage());
                    ex.printStackTrace(System.err);
                    System.exit(1);
                }
            }
            report("Conversion ended.");
            System.exit(0);
        }

        if (line.hasOption("convert-all"))
        {
            try {
                this.convertAll();
            }
            catch (SQLException ex)
            {
                log.error(ex);
                System.err.println("A problem with the database connection "
                        + "occured. Canceled pending actions.");
                System.err.println(ex.getMessage());
                ex.printStackTrace(System.err);
                System.exit(1);
            }
            System.exit(0);
        }
       
        this.usage(options);
        System.exit(0);
    }

    protected DSpaceObject resolveHandle(String handle)
    {
        DSpaceObject dso = null;
        try
        {
            dso = HandleManager.resolveToObject(this.context, handle);
        }
        catch (SQLException ex)
        {
            log.error(ex);
            System.err.println("A problem with the database connection "
                    + "occured. Canceled pending actions.");
            System.err.println(ex.getMessage());
            ex.printStackTrace(System.err);
            System.exit(1);
        }
        catch (IllegalStateException ex)
        {
            log.error(ex);
            System.err.println("Cannot recognize identifier '"
                    + handle + "', skipping.");
            return null;
        }
        if (dso == null)
        {
            System.err.println("Cannot resolve identifier '" + handle
                    + "', skipping.");
            log.debug("Couldn't resolve identifier '" + handle
                    + "', dso was null.");
            return null;
        }
        if (dso.getType() != Constants.SITE
                && dso.getType() != Constants.COMMUNITY
                && dso.getType() != Constants.COLLECTION
                && dso.getType() != Constants.ITEM)
        {
            System.err.println(dso.getTypeText() + " are currently not "
                    + "supported as independent entities. Bundles and Bitstreams "
                    + "should be processed as part of their item.");
            return null;
        }

        return dso;
    }
       
    protected Options createOptions() {
        Options options = new Options();

        options.addOption("h", "help", false, "Print usage information and exit.");
        options.addOption("v", "verbose", false, "Print verbose information to "
                + "stderr while converting data.");
        options.addOption("n", "dry-run", false, "Don't store the converted "
                + "data in the triple store, don't delete data from the "
                + "triplestore. Make a dry run, simulation what would happen.");
        options.addOption("o", "stdout", false, "Print all converted data to " +
                "stdout using turtle as serialization.");
        options.addOption("n", "dry-run", false, "Don't send any data or commands " +
                "to the triplestore. Usefull for debugging or in conjunction " +
                "with --stdout.");
        options.addOption("c", "convert-all", false, "Convert all DSpace Objects" +
                " that are readable for an anonymous user. This may take a long time" +
                "depending on the number of stored communties, collections and " +
                "items. Existing information in the triple store will be updated.");

        Option optIdentifiers = OptionBuilder.withLongOpt("identifiers")
            .hasArgs()
            .withArgName("handle")
            .withValueSeparator(' ')
            .withDescription("Only convert these DSpace Objects. If you specify "
                    + "a Community or Collection all of their Items will be "
                    + "converted as well. Separate multiple identifiers with a "
                    + "space.")
            .create('i');
        options.addOption(optIdentifiers);
       
        Option optDelete = OptionBuilder.withLongOpt("delete")
                .hasArgs()
                .withArgName("hdl:handle | URI")
                .withValueSeparator(' ')
                .withDescription("Delete previously converted data. Specify "
                        + "either the handle of a DSpaceObject in the format "
                        + "'hdl:<handle>' or the URI used to identify the rdf "
                        + "data in the triplestore. If you specify a Community, "
                        + "Collection or Item by its handle all converted "
                        + "information about attached Subcommunities, "
                        + "Collections, Items, Bundles and Bitstreams will be "
                        + "deleted as well. Separate multiple identifiers with "
                        + "a space.")
                .create();
        options.addOption(optDelete);
       
        Option optDeleteAll = OptionBuilder.withLongOpt("delete-all")
                .withDescription("Delete all converted data from the triplestore.")
                .create();
        options.addOption(optDeleteAll);
       
        return options;
    }
   
    protected static void usage(Options options)
    {
        String cliSyntax = "[dspace-bin]/bin/dspace rdfizer [OPTIONS...]";
        String header = "";
        String footer = "\nYou cannot use the options --convert-all, --identifiers " +
                "or --stdout together with --delete or --delete-all.\n" +
                "Please use at least one option out of --convert-all, --delete, " +
                "--delete-all or --identifiers.\n";
       
        PrintWriter err = new PrintWriter(System.err);
        HelpFormatter helpformater = new HelpFormatter();
        helpformater.printHelp(err, 79, cliSyntax, header, options, 2, 2, footer);
        err.flush();
        // don't close PrintWriter err, as it would close System.err!
    }
   
    public static void main(String[] args)
    {
        // get a context from an anonymous user.
        // don't switch off authorization system! We'll export the converted
        // data into a triple store that provides a public sparql endpoint.
        // all exported rdf data can be read by anonymous users.
        // We won't change the database => read_only context will assure this.
        Context context = null;
        try {
            context = new Context(Context.READ_ONLY);
        }
        catch (SQLException sqle)
        {
            log.info("Caught SQLException: ", sqle);
            System.err.println("Can't connect to database: " + sqle.getMessage());
            context.abort();
            System.exit(-1);
        }
       
       
        RDFizer myself = null;
        try {
            myself = new RDFizer();
        } catch (SQLException ex) {
            System.err.println("A problem with the database occured: "
                    + ex.getMessage());
            ex.printStackTrace(System.err);
            log.error(ex);
            context.abort();
            System.exit(1);
        }
       
        myself.overrideContext(context);
        myself.runCLI(args);
       
        // we don't change anything in the database, so abort the context.
        context.abort();
    }
   
    protected abstract class Callback
    {
        protected abstract void callback(DSpaceObject dso)
                throws SQLException;
    }
}
TOP

Related Classes of org.dspace.rdf.RDFizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.