Package org.fcrepo.server.validation

Source Code of org.fcrepo.server.validation.DOValidatorImpl

/* The contents of this file are subject to the license and copyright terms
* detailed in the license directory at the root of the source tree (also
* available online at http://fedora-commons.org/license/).
*/
package org.fcrepo.server.validation;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import javax.xml.XMLConstants;
import javax.xml.validation.SchemaFactory;

import org.fcrepo.common.Constants;
import org.fcrepo.server.errors.GeneralException;
import org.fcrepo.server.errors.ObjectValidityException;
import org.fcrepo.server.errors.ServerException;
import org.fcrepo.server.storage.types.Validation;
import org.fcrepo.utilities.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;


/**
* The implementation of the digital object validation module (see
* DOValidator.class and DOValidatorModule.class). The validator operates on
* digital object XML files encoded in one of the Fedora-supported encoding
* formats (i.e., FOXML, Fedora METS, and possibly others in the future). The
* following types of validation can be run:
*
* <pre>
*   0=VALDIATE_ALL : All validation will be done.
*   1=VALIDATE_XML_SCHEMA : the digital object will be validated against
*                 the the appropriate XML Schema. An ObjectValidityException
*                 will be thrown if the object fails the schema test.
*   2=VALIDATE_SCHEMATRON : the digital object will be validated
*                 against a set of rules expressed by a Schematron schema.
*                 These rules are beyond what can be expressed in XML Schema.
*                 The Schematron schema expresses rules for different phases
*                 of the object. There are rules appropriate to a digital
*                 object when it is first ingested into the repository
*                 (ingest phase). There are additional rules that must be met
*                 before a digital object is considered valid for permanent
*                 storage in the repository (completed phase). These rules
*                 pertain to aspects of the object that are system assigned,
*                 such as created dates and state codes.
*                 An ObjectValidityException will be thrown if the object fails
*                 the Fedora rules test.
* </pre>
*
* @author Sandy Payette
* @version $Id$
*/
public class DOValidatorImpl
        implements DOValidator {

    private static final Logger logger =
            LoggerFactory.getLogger(DOValidatorImpl.class);

    protected static boolean debug = true;

    /** Configuration variable: tempdir is a working area for validation */
    protected static String tempDir = null;

    /**
     * Configuration variable: xmlSchemaPath is the location of the XML Schema.
     */
    protected static String xmlSchemaPath = null;

    /**
     * Configuration variable: schematronPreprocessorPath is the Schematron
     * stylesheet that is used to transform a Schematron schema into a
     * validating stylesheet based on the rules in the schema.
     */
    protected static String schematronPreprocessorPath = null;

    /**
     * Configuration variable: schematronSchemaPath is the Schematron schema
     * that expresses Fedora-specific validation rules. It is transformed into a
     * validating stylesheet by the Schematron preprocessing stylesheet.
     */
    protected static String schematronSchemaPath = null;

    /**
     * Map of XML Schemas configured with the Fedora Repository. key = format
     * uri value = schema file path
     */
    private final Map<String, DOValidatorXMLSchema> m_xmlSchemaMap;

    /**
     * Map of Schematron rule schemas configured with the Fedora Repository. key =
     * format uri value = schema file path
     */
    private final Map<String, String> m_ruleSchemaMap;
   
    private final File m_tempDir;
   
    private final String m_absoluteTempPath;

    /**
     * <p>
     * Constructs a new DOValidatorImpl to support all forms of digital object
     * validation, using specified values for configuration values.
     * </p>
     * <p>
     * Any parameter may be given as null, in which case the default value is
     * assumed.
     * </p>
     *
     * @param tempDir
     *        Working area for validation, default is <i>temp/</i>
     * @param xmlSchemaMap
     *        Location of XML Schemas (W3 Schema) configured with Fedora (see
     *        Fedora.fcfg). Current options are <i>xsd/foxml1-1.xsd</i> for
     *        FOXML or <i>xsd/mets-fedora-ext1-1.xsd</i> for METS (Fedora
     *        extension)
     * @param schematronPreprocessorPath
     *        Location of the Schematron pre-processing stylesheet configured
     *        with Fedora.</i>
     * @param ruleSchemaMap
     *        Location of rule schemas (Schematron), configured with Fedora (see
     *        Fedora.fcfg). Current options are <i>schematron/foxmlRules1-0.xml</i>
     *        for FOXML or <i>schematron/metsExtRules1-0.xml</i> for METS
     * @throws ServerException
     *         If construction fails for any reason.
     */
    public DOValidatorImpl(String tempDir,
                           Map<String, String> xmlSchemaMap,
                           String schematronPreprocessorPath,
                           Map<String, String> ruleSchemaMap)
            throws ServerException {
        logger.debug("VALIDATE: Initializing object validation...");
        m_xmlSchemaMap = new HashMap<String, DOValidatorXMLSchema>(xmlSchemaMap.size());
        SchemaFactory schemaFactory =
            SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
        for (Entry<String,String> entry: xmlSchemaMap.entrySet()) {
            try {
                m_xmlSchemaMap.put(
                    entry.getKey(),
                    new DOValidatorXMLSchema(
                        schemaFactory.newSchema(new File(entry.getValue()))));
            } catch (SAXException e) {
                throw new GeneralException("Cannot read or create schema at " +
                        entry.getValue(),e);
            }
        }
        m_ruleSchemaMap = ruleSchemaMap;
        if (tempDir == null) {
            throw new ObjectValidityException("[DOValidatorImpl] ERROR in constructor: "
                    + "tempDir is null.");
        }
        if (schematronPreprocessorPath == null) {
            throw new ObjectValidityException("[DOValidatorImpl] ERROR in constructor. "
                    + "schematronPreprocessorPath is null.");
        }
        m_tempDir = new File(tempDir);
        if (!m_tempDir.exists() && !m_tempDir.mkdirs()) {
            throw new GeneralException("Cannot read or create tempDir at " +
                tempDir);
        }
        m_absoluteTempPath = m_tempDir.getAbsolutePath();
        DOValidatorImpl.tempDir = tempDir;
        DOValidatorImpl.schematronPreprocessorPath = schematronPreprocessorPath;
    }

    /**
     * <p>
     * Validates a digital object.
     * </p>
     *
     * @param objectAsStream
     *        The digital object provided as a stream.
     * @param format
     *           The format URI of the object serialization.
     * @param validationType
     *        The level of validation to perform on the digital object. This is
     *        an integer from 0-2 with the following meanings: 0 = VALIDATE_ALL
     *        (do all validation levels) 1 = VALIDATE_XML_SCHEMA (perform only
     *        XML Schema validation) 2 = VALIDATE_SCHEMATRON (perform only
     *        Schematron Rules validation)
     * @param phase
     *        The stage in the workflow for which the validation should be
     *        contextualized. "ingest" = the object is encoded for ingest into
     *        the repository "store" = the object is encoded with all final
     *        assignments so that it is appropriate for storage as the
     *        authoritative serialization of the object.
     * @throws ObjectValidityException
     *         If validation fails for any reason.
     * @throws GeneralException
     *         If validation fails for any reason.
     */
    public void validate(InputStream objectAsStream,
                         String format,
                         int validationType,
                         String phase) throws ObjectValidityException,
                         GeneralException {

        if (validationType == VALIDATE_NONE) return;
        checkFormat(format);
        switch (validationType) {
            case VALIDATE_NONE:
                break;
            case VALIDATE_ALL:
                try {
                    // FIXME We need to use the object Inputstream twice, once for XML
                    // Schema validation and once for Schematron validation.
                    // We may want to consider implementing some form of a rewindable
                    // InputStream. For now, I will just write the object InputStream to
                    // disk so I can read it multiple times.
                    if (logger.isDebugEnabled()) {
                        logger.debug(
                                "Validating streams against schema and schematron" +
                                        " requires caching tempfiles to disk; consider" +
                                        "calling validations seperately with a buffered" +
                                        "InputStream"
                                );
                    }
                    File objectAsFile = streamtoFile(objectAsStream);
                    validate(objectAsFile, format, validationType, phase);
                } catch (IOException ioe) {
                    throw new ObjectValidityException("[DOValidatorImpl]: "
                            + "ERROR in validate(InputStream objectAsStream...). " + ioe.getMessage());
                }
                break;
            case VALIDATE_XML_SCHEMA:
                validateXMLSchema(objectAsStream, m_xmlSchemaMap.get(format));
                break;
            case VALIDATE_SCHEMATRON:
                validateByRules(objectAsStream,
                        m_ruleSchemaMap.get(format),
                        schematronPreprocessorPath,
                        phase);
                break;
            default:
                String msg = "VALIDATE: ERROR - missing or invalid validationType";
                logger.error(msg);
                throw new GeneralException("[DOValidatorImpl] " + msg + ":"
                    + validationType);
        }
        return;
    }

    /**
     * <p>
     * Validates a digital object.
     * </p>
     *
     * @param objectAsFile
     *        The digital object provided as a file.
     * @param validationType
     *        The level of validation to perform on the digital object. This is
     *        an integer from 0-2 with the following meanings: 0 = VALIDATE_ALL
     *        (do all validation levels) 1 = VALIDATE_XML_SCHEMA (perform only
     *        XML Schema validation) 2 = VALIDATE_SCHEMATRON (perform only
     *        Schematron Rules validation)
     * @param phase
     *        The stage in the work flow for which the validation should be
     *        contextualized. "ingest" = the object is in the submission format
     *        for the ingest phase "store" = the object is in the authoritative
     *        format for the final storage phase
     * @throws ObjectValidityException
     *         If validation fails for any reason.
     * @throws GeneralException
     *         If validation fails for any reason.
     */
    public void validate(File objectAsFile,
                         String format,
                         int validationType,
                         String phase) throws ObjectValidityException,
            GeneralException {
        logger.debug("VALIDATE: Initiating validation: phase={} format={}",
                phase, format);
        if (validationType == VALIDATE_NONE) return;
        checkFormat(format);

        if (format.equals(Constants.ATOM_ZIP1_1.uri)) {
            // If the object serialization is a Zip file with an atom
            // manifest, extract the manifest for validation.
            try {
                File manifest = null;
                ZipInputStream zip = new ZipInputStream(new FileInputStream(objectAsFile));
                ZipEntry entry;
                while ((entry = zip.getNextEntry()) != null) {
                    if (entry.getName().equals("atommanifest.xml")) {
                        manifest = streamtoFile(zip);
                        break;
                    }
                }
                zip.close();
                objectAsFile = manifest;
            } catch(IOException e) {
                throw new GeneralException(e.getMessage(), e);
            }
        }
       
        try {

            FileInputStream objectAsStream = new FileInputStream(objectAsFile);
            if (validationType == VALIDATE_ALL) {
                validateByRules(objectAsStream,
                        m_ruleSchemaMap.get(format),
                        schematronPreprocessorPath,
                        phase);
                validateXMLSchema(new FileInputStream(objectAsFile),
                    m_xmlSchemaMap.get(format));
            } else if (validationType == VALIDATE_XML_SCHEMA) {
                validateXMLSchema(objectAsStream, m_xmlSchemaMap.get(format));
            } else if (validationType == VALIDATE_SCHEMATRON) {
                validateByRules(objectAsStream,
                        m_ruleSchemaMap.get(format),
                        schematronPreprocessorPath,
                        phase);
            } else {
                String msg = "VALIDATE: ERROR - missing or invalid validationType";
                logger.error(msg);
                throw new GeneralException("[DOValidatorImpl] " + msg + ":"
                        + validationType);
            }
        } catch (IOException ioe) {
            logger.error("VALIDATE: ERROR - failed validations.", ioe);
            throw new ObjectValidityException("[DOValidatorImpl]: validate(File input...). "
                    + ioe.getMessage());
        } finally {
            cleanUp(objectAsFile);
        }
    }

    private void checkFormat(String format) throws ObjectValidityException {
        if (!m_xmlSchemaMap.containsKey(format)) {
          Validation validation = new Validation("unknown");
          List<String> probs = new ArrayList<String>();
          probs.add("Unsupported format: " + format);
          validation.setObjectProblems(probs);
            throw new ObjectValidityException("Unsupported format: " + format, validation);
        }
    }

    /**
     * Do XML Schema validation on the Fedora object.
     *
     * @param objectAsFile
     *        The digital object provided as a file.
     * @throws ObjectValidityException
     *         If validation fails for any reason.
     * @throws GeneralException
     *         If validation fails for any reason.
     */
    private void validateXMLSchema(InputStream objectAsStream, DOValidatorXMLSchema xsv)
            throws ObjectValidityException, GeneralException {

        try {
            xsv.validate(objectAsStream);
        } catch (ObjectValidityException e) {
            logger.error("VALIDATE: ERROR - failed XML Schema validation.", e);
            throw e;
        } catch (Exception e) {
            logger.error("VALIDATE: ERROR - failed XML Schema validation.", e);
            throw new ObjectValidityException("[DOValidatorImpl]: validateXMLSchema. "
                    + e.getMessage());
        }
        logger.debug("VALIDATE: SUCCESS - passed XML Schema validation.");
    }

    /**
     * Do Schematron rules validation on the Fedora object. Schematron
     * validation tests the object against a set of rules expressed using XPATH
     * in a Schematron schema. These test for things that are beyond what can be
     * expressed using XML Schema.
     *
     * @param objectAsFile
     *        The digital object provided as a file.
     * @param schemaPath
     *        Location of the Schematron rules file.
     * @param preprocessorPath
     *        Location of Schematron preprocessing stylesheet
     * @param phase
     *        The workflow phase (ingest, store) for the object.
     * @throws ObjectValidityException
     *         If validation fails for any reason.
     * @throws GeneralException
     *         If validation fails for any reason.
     */
    private void validateByRules(InputStream objectAsStream,
                                 String ruleSchemaPath,
                                 String preprocessorPath,
                                 String phase) throws ObjectValidityException,
            GeneralException {

        try {
            DOValidatorSchematron schtron =
                    new DOValidatorSchematron(ruleSchemaPath,
                                              preprocessorPath,
                                              phase);
            schtron.validate(objectAsStream);
        } catch (ObjectValidityException e) {
            logger.error("VALIDATE: ERROR - failed Schematron rules validation.",
                      e);
            throw e;
        } catch (Exception e) {
            logger.error("VALIDATE: ERROR - failed Schematron rules validation.",
                      e);
            throw new ObjectValidityException("[DOValidatorImpl]: "
                    + "failed Schematron rules validation. " + e.getMessage());
        }
        logger.debug("VALIDATE: SUCCESS - passed Schematron rules validation.");
    }

    private File streamtoFile(InputStream objectAsStream)
            throws IOException {

        File objectAsFile = null;
        try {
            objectAsFile = File.createTempFile("validation", "tmp", m_tempDir);

            FileOutputStream fos = new FileOutputStream(objectAsFile);
            FileUtils.copy(objectAsStream, fos);
        } catch (IOException e) {
            if (objectAsFile != null && objectAsFile.exists()) {
                objectAsFile.delete();
            }
            throw e;
        }
        return objectAsFile;
    }

    // Distinguish temporary object files from real object files
    // that were passed in for validation.  This is a bit ugly as it stands,
    // but it should only blow away files in the temp directory.
    private void cleanUp(File f) {
        if (f.getParentFile() != null) {
            if (m_absoluteTempPath.equalsIgnoreCase(f
                    .getParentFile().getAbsolutePath())) {
                f.delete();
            }
        }
    }
   
}
TOP

Related Classes of org.fcrepo.server.validation.DOValidatorImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.