Package org.apache.pdfbox.preflight.metadata

Source Code of org.apache.pdfbox.preflight.metadata.SynchronizedMetaDataValidation

/*****************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
****************************************************************************/

package org.apache.pdfbox.preflight.metadata;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.preflight.PreflightConstants;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.ValidationException;
import org.apache.xmpbox.DateConverter;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.AdobePDFSchema;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.type.AbstractField;
import org.apache.xmpbox.type.TextType;

/**
* Class which check if document information available in a document are synchronized with XMP
*
* @author Germain Costenobel
*
*/
public class SynchronizedMetaDataValidation
{

    /**
     * Analyze if Title embedded in Document Information dictionary and in XMP properties are synchronized
     *
     * @param dico the Document Information Dictionary.
     * @param dc the Dublin Core Schema.
     * @param ve the list of validation errors.
     */
    protected void analyzeTitleProperty(PDDocumentInformation dico, DublinCoreSchema dc, List<ValidationError> ve)
    {
        String title = dico.getTitle();
        if (title != null)
        {
            if (dc != null)
            {
                // Check the x-default value, if not found, check with the first value
                // found
                if (dc.getTitle() != null)
                {
                    if (dc.getTitle("x-default") != null)
                    {
                        if (!dc.getTitle("x-default").equals(title))
                        {
                            ve.add(unsynchronizedMetaDataError("Title"));
                        }
                    }
                    else
                    {
                        // This search of first value is made just to keep compatibility
                        // with lot of PDF documents
                        // which use title without lang definition
                        // REM : MAY we have to delete this option in the future
                        Iterator<AbstractField> it = dc.getTitleProperty().getContainer().getAllProperties().iterator();
                        if (it.hasNext())
                        {
                            AbstractField tmp = it.next();
                            if (tmp instanceof TextType)
                            {
                                if (!((TextType) tmp).getStringValue().equals(title))
                                {
                                    ve.add(unsynchronizedMetaDataError("Title"));
                                }
                            }
                            else
                            {
                                ve.add(AbsentXMPPropertyError("Title", "Property is badly defined"));
                            }
                        }
                        else
                        {
                            ve.add(AbsentXMPPropertyError("Title", "Property is not defined"));
                        }
                    }

                }
                else
                {
                    ve.add(AbsentXMPPropertyError("Title", "Property is not defined"));
                }
            }
            else
            {
                ve.add(AbsentSchemaMetaDataError("Title", "Dublin Core"));
            }
        }
    }

    /**
     * Analyze if Author(s) embedded in Document Information dictionary and in XMP properties are synchronized
     *
     * @param dico
     *            Document Information Dictionary
     * @param dc
     *            Dublin Core Schema
     * @param ve
     *            The list of validation errors
     */
    protected void analyzeAuthorProperty(PDDocumentInformation dico, DublinCoreSchema dc, List<ValidationError> ve)
    {
        String author = dico.getAuthor();
        if (author != null)
        {
            if (dc != null)
            {
                if (dc.getCreatorsProperty() != null)
                {
                    if (dc.getCreators().size() != 1)
                    {
                        ve.add(AbsentXMPPropertyError("Author",
                                "In XMP metadata, Author(s) must be represented by a single entry in a text array (dc:creator) "));
                    }
                    else
                    {
                        if (dc.getCreators().get(0) == null)
                        {
                            ve.add(AbsentXMPPropertyError("Author", "Property is defined as null"));
                        }
                        else
                        {
                            if (!dc.getCreators().get(0).equals(author))
                            {
                                ve.add(unsynchronizedMetaDataError("Author"));
                            }
                        }
                    }
                }
                else
                {
                    ve.add(AbsentXMPPropertyError("Author", "Property is not defined in XMP Metadata"));
                }
            }
            else
            {
                ve.add(AbsentSchemaMetaDataError("Author", "Dublin Core"));
            }
        }
    }

    /**
     * Analyze if Subject(s) embedded in Document Information dictionary and in XMP properties are synchronized
     *
     * @param dico
     *            Document Information Dictionary
     * @param dc
     *            Dublin Core Schema
     * @param ve
     *            The list of validation errors
     */
    protected void analyzeSubjectProperty(PDDocumentInformation dico, DublinCoreSchema dc, List<ValidationError> ve)
    {
        String subject = dico.getSubject();
        if (subject != null)
        {
            if (dc != null)
            {
                // PDF/A Conformance Erratum (2007) specifies XMP Subject
                // as a Text type embedded in the dc:description["x-default"].
                if (dc.getDescriptionProperty() != null)
                {
                    if (dc.getDescription("x-default") == null)
                    {
                        ve.add(AbsentXMPPropertyError("Subject",
                                "Subject not found in XMP (dc:description[\"x-default\"] not found)"));
                    }
                    else
                    {
                        if (!dc.getDescription("x-default").equals(subject))
                        {
                            ve.add(unsynchronizedMetaDataError("Subject"));

                        }
                    }
                }
                else
                {
                    ve.add(AbsentXMPPropertyError("Subject", "Property is defined as null"));
                }
            }
            else
            {
                ve.add(AbsentSchemaMetaDataError("Subject", "Dublin Core"));
            }
        }
    }

    /**
     * Analyze if Keyword(s) embedded in Document Information dictionary and in XMP properties are synchronized
     *
     * @param dico
     *            Document Information Dictionary
     * @param pdf
     *            PDF Schema
     * @param ve
     *            The list of validation errors
     */
    protected void analyzeKeywordsProperty(PDDocumentInformation dico, AdobePDFSchema pdf, List<ValidationError> ve)
    {
        String keyword = dico.getKeywords();
        if (keyword != null)
        {
            if (pdf != null)
            {
                if (pdf.getKeywordsProperty() == null)
                {
                    ve.add(AbsentXMPPropertyError("Keywords", "Property is not defined"));
                }
                else
                {
                    if (!pdf.getKeywords().equals(keyword))
                    {
                        ve.add(unsynchronizedMetaDataError("Keywords"));
                    }
                }
            }
            else
            {
                ve.add(AbsentSchemaMetaDataError("Keywords", "PDF"));
            }
        }
    }

    /**
     * Analyze if Producer embedded in Document Information dictionary and in XMP properties are synchronized
     *
     * @param dico
     *            Document Information Dictionary
     * @param pdf
     *            PDF Schema
     * @param ve
     *            The list of validation errors
     */
    protected void analyzeProducerProperty(PDDocumentInformation dico, AdobePDFSchema pdf, List<ValidationError> ve)
    {
        String producer = dico.getProducer();
        if (producer != null)
        {
            if (pdf != null)
            {
                if (pdf.getProducerProperty() == null)
                {
                    ve.add(AbsentXMPPropertyError("Producer", "Property is not defined"));
                }
                else
                {
                    if (!pdf.getProducer().equals(producer))
                    {
                        ve.add(unsynchronizedMetaDataError("Producer"));
                    }
                }
            }
            else
            {
                ve.add(AbsentSchemaMetaDataError("Producer", "PDF"));
            }
        }

    }

    /**
     * Analyze if the creator tool embedded in Document Information dictionary and in XMP properties are synchronized
     *
     * @param dico
     *            Document Information Dictionary
     * @param xmp
     *            XMP Basic Schema
     * @param ve
     *            The list of validation errors
     *
     */
    protected void analyzeCreatorToolProperty(PDDocumentInformation dico, XMPBasicSchema xmp, List<ValidationError> ve)
    {
        String creatorTool = dico.getCreator();
        if (creatorTool != null)
        {
            if (xmp != null)
            {
                if (xmp.getCreatorToolProperty() == null)
                {
                    ve.add(AbsentXMPPropertyError("CreatorTool", "Property is not defined"));
                }
                else
                {
                    if (!xmp.getCreatorTool().equals(creatorTool))
                    {
                        ve.add(unsynchronizedMetaDataError("CreatorTool"));
                    }
                }
            }
            else
            {
                ve.add(AbsentSchemaMetaDataError("CreatorTool", "PDF"));
            }
        }

    }

    /**
     * Analyze if the CreationDate embedded in Document Information dictionary and in XMP properties are synchronized
     *
     * @param dico
     *            Document Information Dictionary
     * @param xmp
     *            XMP Basic Schema
     * @param ve
     *            The list of validation errors
     * @throws ValidationException
     */
    protected void analyzeCreationDateProperty(PDDocumentInformation dico, XMPBasicSchema xmp, List<ValidationError> ve)
            throws ValidationException
    {
        Calendar creationDate = null;
        try
        {
            creationDate = dico.getCreationDate();
        }
        catch (IOException e)
        {
            // If there is an error while converting this property to a date
            ve.add(new ValidationError(PreflightConstants.ERROR_METADATA_DICT_INFO_CORRUPT, "Document Information 'CreationDate' can't be read : " + e.getMessage()));
        }
        if (creationDate != null)
        {
            if (xmp != null)
            {
                Calendar xmpCreationDate = xmp.getCreateDate();

                if (xmpCreationDate == null)
                {
                    ve.add(AbsentXMPPropertyError("CreationDate", "Property is not defined"));
                }
                else
                {
                    if (!DateConverter.toISO8601(xmpCreationDate).equals(DateConverter.toISO8601(creationDate)))
                    {
                        ve.add(unsynchronizedMetaDataError("CreationDate"));
                    }
                }

            }
            else
            {
                ve.add(AbsentSchemaMetaDataError("CreationDate", "Basic XMP"));
            }
        }
    }

    /**
     * Analyze if the ModifyDate embedded in Document Information dictionary and in XMP properties are synchronized
     *
     * @param dico
     *            Document Information Dictionary
     * @param xmp
     *            XMP Basic Schema
     * @param ve
     *            The list of validation errors
     * @throws ValidationException
     */
    protected void analyzeModifyDateProperty(PDDocumentInformation dico, XMPBasicSchema xmp, List<ValidationError> ve)
            throws ValidationException
    {
        Calendar modifyDate;
        try
        {
            modifyDate = dico.getModificationDate();
            if (modifyDate != null)
            {
                if (xmp != null)
                {

                    Calendar xmpModifyDate = xmp.getModifyDate();
                    if (xmpModifyDate == null)
                    {
                        ve.add(AbsentXMPPropertyError("ModifyDate", "Property is not defined"));
                    }
                    else
                    {
                        if (!DateConverter.toISO8601(xmpModifyDate).equals(DateConverter.toISO8601(modifyDate)))
                        {

                            ve.add(unsynchronizedMetaDataError("ModificationDate"));
                        }
                    }

                }
                else
                {
                    ve.add(AbsentSchemaMetaDataError("ModifyDate", "Basic XMP"));
                }
            }
        }
        catch (IOException e)
        {
            // If there is an error while converting this property to a date
            ve.add(new ValidationError(PreflightConstants.ERROR_METADATA_DICT_INFO_CORRUPT, "Document Information 'ModifyDate' can't be read : " + e.getMessage()));
        }

    }

    /**
     * Check if document information entries and XMP information are synchronized
     *
     * @param document
     *            the PDF Document
     * @param metadata
     *            the XMP MetaData
     * @return List of validation errors
     * @throws ValidationException
     */
    public List<ValidationError> validateMetadataSynchronization(PDDocument document, XMPMetadata metadata)
            throws ValidationException
    {
        List<ValidationError> ve = new ArrayList<ValidationError>();

        if (document == null)
        {
            throw new ValidationException("Document provided is null");
        }
        else
        {
            PDDocumentInformation dico = document.getDocumentInformation();
            if (metadata == null)
            {
                throw new ValidationException("Metadata provided are null");
            }
            else
            {
                DublinCoreSchema dc = metadata.getDublinCoreSchema();

                // TITLE
                analyzeTitleProperty(dico, dc, ve);
                // AUTHOR
                analyzeAuthorProperty(dico, dc, ve);
                // SUBJECT
                analyzeSubjectProperty(dico, dc, ve);

                AdobePDFSchema pdf = metadata.getAdobePDFSchema();

                // KEYWORDS
                analyzeKeywordsProperty(dico, pdf, ve);
                // PRODUCER
                analyzeProducerProperty(dico, pdf, ve);

                XMPBasicSchema xmp = metadata.getXMPBasicSchema();

                // CREATOR TOOL
                analyzeCreatorToolProperty(dico, xmp, ve);

                // CREATION DATE
                analyzeCreationDateProperty(dico, xmp, ve);

                // MODIFY DATE
                analyzeModifyDateProperty(dico, xmp, ve);

            }

        }
        return ve;
    }

    /**
     * Return a validationError formatted when a schema has not the expected prefix
     *
     * @param prefFound
     * @param prefExpected
     * @param schema
     * @return the generated validation error.
     */
    protected ValidationError unexpectedPrefixFoundError(String prefFound, String prefExpected, String schema)
    {
        StringBuilder sb = new StringBuilder(80);
        sb.append(schema).append(" found but prefix used is '").append(prefFound).append("', prefix '")
                .append(prefExpected).append("' is expected.");

        return new ValidationError(PreflightConstants.ERROR_METADATA_WRONG_NS_PREFIX, sb.toString());
    }

    /**
     * Return an exception formatted on IOException when accessing on metadata schema
     *
     * @param target
     *            the name of the schema
     * @param cause
     *            the raised IOException
     * @return the generated exception
     */
    protected ValidationException SchemaAccessException(String target, Throwable cause)
    {
        StringBuilder sb = new StringBuilder(80);
        sb.append("Cannot access to the ").append(target).append(" schema");
        return new ValidationException(sb.toString(), cause);
    }

    /**
     * Return a formatted validation error when metadata are not synchronized
     *
     * @param target
     *            the concerned property
     * @return the generated validation error
     */
    protected ValidationError unsynchronizedMetaDataError(String target)
    {
        StringBuilder sb = new StringBuilder(80);
        sb.append(target).append(" present in the document catalog dictionary doesn't match with XMP information");
        return new ValidationError(PreflightConstants.ERROR_METADATA_MISMATCH, sb.toString());
    }

    /**
     * Return a formatted validation error when a specific metadata schema can't be found
     *
     * @param target
     *            the concerned property
     * @param schema
     *            the XMP schema which can't be found
     * @return the generated validation error
     */
    protected ValidationError AbsentSchemaMetaDataError(String target, String schema)
    {
        StringBuilder sb = new StringBuilder(80);
        sb.append(target).append(" present in the document catalog dictionary can't be found in XMP information (")
                .append(schema).append(" schema not declared)");
        return new ValidationError(PreflightConstants.ERROR_METADATA_MISMATCH, sb.toString());
    }

    /**
     * Return a formatted validation error when a specific XMP property can't be found
     *
     * @param target
     *            the concerned property
     * @param details
     *            comments about the XMP property
     * @return the generated validation error
     */
    protected ValidationError AbsentXMPPropertyError(String target, String details)
    {
        StringBuilder sb = new StringBuilder(80);
        sb.append(target).append(" present in the document catalog dictionary can't be found in XMP information (")
                .append(details).append(")");
        return new ValidationError(PreflightConstants.ERROR_METADATA_MISMATCH, sb.toString());
    }
}
TOP

Related Classes of org.apache.pdfbox.preflight.metadata.SynchronizedMetaDataValidation

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.