Package org.dspace.app.harvest

Source Code of org.dspace.app.harvest.Harvest

/*
* Harvest.java
*
* Version: $Revision: 1 $
*
* Date: $Date: 2007-11-28 15:07:34 -0600 (Wed, 28 Nov 2007) $
*
* Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
* Institute of Technology.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.app.harvest;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.Vector;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.xpath.XPathAPI;
import org.dspace.app.itemimport.ItemImport;
import org.dspace.authorize.AuthorizeException;
import org.dspace.authorize.AuthorizeManager;
import org.dspace.authorize.ResourcePolicy;
import org.dspace.browse.IndexBrowse;
import org.dspace.content.Bitstream;
import org.dspace.content.BitstreamFormat;
import org.dspace.content.Bundle;
import org.dspace.content.Collection;
import org.dspace.content.DSpaceObject;
import org.dspace.content.FormatIdentifier;
import org.dspace.harvest.HarvestedCollection;
import org.dspace.content.InstallItem;
import org.dspace.content.Item;
import org.dspace.content.ItemIterator;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataSchema;
import org.dspace.harvest.OAIHarvester;
import org.dspace.content.WorkspaceItem;
import org.dspace.harvest.OAIHarvester.HarvestingException;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.dspace.eperson.Group;
import org.dspace.handle.HandleManager;
import org.dspace.workflow.WorkflowManager;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
*  Test class for harvested collections.
*
* @author Alexey Maslov
*/
public class Harvest
{
  private static Context context;
 
    public static void main(String[] argv) throws Exception
    {
        // create an options object and populate it
        CommandLineParser parser = new PosixParser();

        Options options = new Options();

        options.addOption("p", "purge", false, "delete all items in the collection");
        options.addOption("r", "run", false, "run the standrad harvest procedure");
        options.addOption("g", "ping", false, "test the OAI server and set");
        options.addOption("o", "once", false, "run the harvest procedure with specified parameters");
        options.addOption("s", "setup", false, "Set the collection up for harvesting");
        options.addOption("S", "start", false, "start the harvest loop");
        options.addOption("R", "reset", false, "reset harvest status on all collections");
        options.addOption("P", "purge", false, "purge all harvestable collections");
       

        options.addOption("e", "eperson", true, "eperson");
        options.addOption("c", "collection", true, "harvesting collection (handle or id)");
        options.addOption("t", "type", true, "type of harvesting (0 for none)");
        options.addOption("a", "address", true, "address of the OAI-PMH server");
        options.addOption("i", "oai_set_id", true, "id of the PMH set representing the harvested collection");
        options.addOption("m", "metadata_format", true, "the name of the desired metadata format for harvesting, resolved to namespace and crosswalk in dspace.cfg");

        options.addOption("h", "help", false, "help");

        CommandLine line = parser.parse(options, argv);

        String command = null;
        String eperson = null;
        String collection = null;
        String oaiSource = null;
        String oaiSetID = null;
        String metadataKey = null;
        int harvestType = 0;
       
        if (line.hasOption('h'))
        {
            HelpFormatter myhelp = new HelpFormatter();
            myhelp.printHelp("Harvest\n", options);
            System.out
            .println("\nPING OAI server: Harvest -g -s oai_source -i oai_set_id");
            System.out
          .println("RUNONCE harvest with arbitrary options: Harvest -o -e eperson -c collection -t harvest_type -a oai_source -i oai_set_id -m metadata_format");
            System.out
                    .println("SETUP a collection for harvesting: Harvest -s -c collection -t harvest_type -a oai_source -i oai_set_id -m metadata_format");
            System.out
                .println("RUN harvest once: Harvest -r -e eperson -c collection");
            System.out
            .println("START harvest scheduler: Harvest -S");
            System.out
          .println("RESET all harvest status: Harvest -R");
            System.out
                    .println("PURGE a collection of items and settings: Harvest -p -e eperson -c collection");
            System.out
          .println("PURGE all harvestable collections: Harvest -P -e eperson");
           
           

            System.exit(0);
        }

        if (line.hasOption('s')) {
            command = "config";
        }
        if (line.hasOption('p')) {
            command = "purge";
        }
        if (line.hasOption('r')) {
            command = "run";
        }
        if (line.hasOption('g')) {
            command = "ping";
        }
        if (line.hasOption('o')) {
            command = "runOnce";
        }
        if (line.hasOption('S')) {
            command = "start";
        }
        if (line.hasOption('R')) {
            command = "reset";
        }
        if (line.hasOption('P')) {
            command = "purgeAll";
        }

       
        if (line.hasOption('e')) {
            eperson = line.getOptionValue('e');
        }
        if (line.hasOption('c')) {
            collection = line.getOptionValue('c');
        }
        if (line.hasOption('t')) {
            harvestType = Integer.parseInt(line.getOptionValue('t'));
        } else {
          harvestType = 0;
        }
        if (line.hasOption('a')) {
            oaiSource = line.getOptionValue('a');
        }
        if (line.hasOption('i')) {
            oaiSetID = line.getOptionValue('i');
        }
        if (line.hasOption('m')) {
            metadataKey = line.getOptionValue('m');
        }
       

        // Instantiate our class
        Harvest harvester = new Harvest();
        harvester.context = new Context();
       
       
        // Check our options
        if (command == null)
        {
            System.out
                    .println("Error - no parameters specified (run with -h flag for details)");
            System.exit(1);
        }
        // Run a single harvest cycle on a collection using saved settings.
        else if (command.equals("run"))
        {
            if (collection == null || eperson == null)
            {
                System.out
                        .println("Error - a target collection and eperson must be provided");
                System.out.println(" (run with -h flag for details)");
                System.exit(1);
            }
           
            harvester.runHarvest(collection, eperson);
        }
        // start the harvest loop
        else if (command.equals("start"))
        {
          startHarvester();
        }
        // reset harvesting status
        else if (command.equals("reset"))
        {
          resetHarvesting();
        }
        // purge all collections that are set up for harvesting (obviously for testing purposes only)
        else if (command.equals("purgeAll"))
        {
          if (eperson == null)
            {
                System.out
                        .println("Error - an eperson must be provided");
                System.out.println(" (run with -h flag for details)");
                System.exit(1);
            }
         
          List<Integer> cids = HarvestedCollection.findAll(context);
          System.out.println("Purging the following collections (deleting items and resetting harvest status): " + cids.toString());
        for (Integer cid : cids)
        {
          harvester.purgeCollection(cid.toString(), eperson);
        }
        context.complete();
        }
        // Delete all items in a collection. Useful for testing fresh harvests.
        else if (command.equals("purge"))
        {
            if (collection == null || eperson == null)
            {
                System.out
                        .println("Error - a target collection and eperson must be provided");
                System.out.println(" (run with -h flag for details)");
                System.exit(1);
            }
           
            harvester.purgeCollection(collection, eperson);
            context.complete();
           
            //TODO: implement this... remove all items and remember to unset "last-harvested" settings
        }
        // Configure a collection with the three main settings
        else if (command.equals("config"))
        {
            if (collection == null)
            {
                System.out.println("Error -  a target collection must be provided");
                System.out.println(" (run with -h flag for details)");
                System.exit(1);
            }
            if (oaiSource == null || oaiSetID == null)
            {
                System.out.println("Error - both the OAI server address and OAI set id must be specified");
                System.out.println(" (run with -h flag for details)");
                System.exit(1);
            }
            if (metadataKey == null)
            {
              System.out.println("Error - a metadata key (commonly the prefix) must be specified for this collection");
                System.out.println(" (run with -h flag for details)");
                System.exit(1);             
            }
           
            harvester.configureCollection(collection, harvestType, oaiSource, oaiSetID, metadataKey);
        }
        else if (command.equals("ping"))
        {
            if (oaiSource == null || oaiSetID == null)
            {
                System.out.println("Error - both the OAI server address and OAI set id must be specified");
                System.out.println(" (run with -h flag for details)");
                System.exit(1);
            }
        }
    }
   
   
    /**
     * check
     * @param collectionID
     * @return
     */
    private Collection checkCollection(String collectionID)
    {
      Collection collection = resolveCollection(collectionID);
      try {
        HarvestedCollection hc = HarvestedCollection.find(context, collection.getID());
          if (!hc.isHarvestable()) {
            System.out.println("Collection '"+ collection.getName() +"' is not set up for harvesting");
              System.exit(1);
          }
      } catch (SQLException se) {
        se.printStackTrace();
      }
      return collection;
    }
   
    /*
     * Resolve the ID into a collection and check to see if its harvesting options are set. If so, return
     * the collection, if not, bail out.
     */
    private Collection resolveCollection(String collectionID) {
     
      DSpaceObject dso;
      Collection targetCollection = null;
     
      try {
        // is the ID a handle?
          if (collectionID.indexOf('/') != -1)
          {
              // string has a / so it must be a handle - try and resolve it
              dso = HandleManager.resolveToObject(context, collectionID);
 
              // resolved, now make sure it's a collection
              if (dso == null || dso.getType() != Constants.COLLECTION)
                  targetCollection = null;
              else
                targetCollection = (Collection)dso;
          }
          // not a handle, try and treat it as an integer collection
          // database ID
          else if (collectionID != null)
          {
            System.out.println("Looking up by id: " + collectionID + ", parsed as '" + Integer.parseInt(collectionID) + "', " + "in context: " + context);
              targetCollection = Collection.find(context, Integer.parseInt(collectionID));
          }
          // was the collection valid?
          if (targetCollection == null)
          {
            System.out.println("Cannot resolve " + collectionID + " to collection");
              System.exit(1);
          }
      }
      catch (SQLException se) {
        se.printStackTrace();
      }
     
      return targetCollection;
    }
   
   
    private void configureCollection(String collectionID, int type, String oaiSource, String oaiSetId, String mdConfigId) {
      System.out.println("Running: configure collection");
    
      Collection collection = resolveCollection(collectionID);
      System.out.println(collection.getID());
           
      try {
        HarvestedCollection hc = HarvestedCollection.find(context, collection.getID());
          if (hc == null) {
            hc = HarvestedCollection.create(context, collection.getID());
          }
       
        context.turnOffAuthorisationSystem();
        hc.setHarvestParams(type, oaiSource, oaiSetId, mdConfigId);
        hc.setHarvestStatus(HarvestedCollection.STATUS_READY);
        hc.update();
        context.restoreAuthSystemState();
        context.complete();
      }
      catch (Exception e) {
        System.out.println("Changes could not be committed");
        e.printStackTrace();
        System.exit(1);
      }
      finally {
        context.restoreAuthSystemState();
      }
    }
   
   
    /**
     * Purges a collection of all harvest-related data and settings. All items in the collection will be deleted.
     *
     * @param collectionID
     * @param email
     */
    private void purgeCollection(String collectionID, String email) {
      System.out.println("Purging collection of all items and reseting last_harvested and harvest_message: " + collectionID);
      Collection collection = resolveCollection(collectionID);
    
      try
      {
        EPerson eperson = EPerson.findByEmail(context, email);
          context.setCurrentUser(eperson);
        context.turnOffAuthorisationSystem();
       
        ItemIterator it = collection.getAllItems();
        IndexBrowse ib = new IndexBrowse(context);
        int i=0;
        while (it.hasNext()) {
          i++;
          Item item = it.next();
          System.out.println("Deleting: " + item.getHandle());
          ib.itemRemoved(item);
          collection.removeItem(item);
          // commit every 50 items
          if (i%50 == 0) {
            context.commit();
            i=0;
          }
        }
       
        HarvestedCollection hc = HarvestedCollection.find(context, collection.getID());
        if (hc != null) {
          hc.setHarvestResult(null,"");
          hc.setHarvestStatus(HarvestedCollection.STATUS_READY);
          hc.setHarvestStartTime(null);
          hc.update();
        }
        context.restoreAuthSystemState();       
        context.commit();
      }
      catch (Exception e) {
        System.out.println("Changes could not be committed");
        e.printStackTrace();
        System.exit(1);
      }
      finally {
        context.restoreAuthSystemState();
      }
    }
   
   
    /**
     * Run a single harvest cycle on the specified collection under the authorization of the supplied EPerson
     */
    private void runHarvest(String collectionID, String email) {
      System.out.println("Running: a harvest cycle on " + collectionID);
     
      System.out.print("Initializing the harvester... ");
      OAIHarvester harvester = null;
      try {
        Collection collection = resolveCollection(collectionID);
          HarvestedCollection hc = HarvestedCollection.find(context, collection.getID());
        harvester = new OAIHarvester(context, collection, hc);
        System.out.println("success. ");
      }
      catch (HarvestingException hex) {
        System.out.print("failed. ");
        System.out.println(hex.getMessage());
        System.exit(1);
      } catch (SQLException se) {
      // TODO Auto-generated catch block
      se.printStackTrace();
    }
           
      try {
        // Harvest will not work for an anonymous user
          EPerson eperson = EPerson.findByEmail(context, email);
          System.out.println("Harvest started... ");
          context.setCurrentUser(eperson);
        harvester.runHarvest();
        context.complete();
      }
      catch (Exception e) {
      // Not much else we can do at this point
      e.printStackTrace();
      System.exit(1);
    }
      System.out.println("Harvest complete. ");
    }
   
   
    /**
     * Resets harvest_status and harvest_start_time flags for all collections that have a row in the harvested_collections table
     */
    private static void resetHarvesting() {
      System.out.print("Resetting harvest status flag on all collections... ");
     
      try
      {
        List<Integer> cids = HarvestedCollection.findAll(context);
        for (Integer cid : cids)
        {
          HarvestedCollection hc = HarvestedCollection.find(context, cid);
          //hc.setHarvestResult(null,"");
          hc.setHarvestStartTime(null);
          hc.setHarvestStatus(HarvestedCollection.STATUS_READY);
          hc.update();
        }
        context.commit();
        System.out.println("success. ");
      }
      catch (Exception ex) {
        System.out.println("failed. ");
        ex.printStackTrace();
      }     
    }
   
   
    /**
     * Starts up the harvest scheduler. Terminating this process will stop the scheduler.
     */
    private static void startHarvester()
    {
      try
      {
        System.out.print("Starting harvest loop... ");
        OAIHarvester.startNewScheduler();
        System.out.println("running. ");
      }
      catch (Exception ex) {
        ex.printStackTrace();
      }
    }
   
}
TOP

Related Classes of org.dspace.app.harvest.Harvest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.