Package org.apache.padaf.preflight.xmp

Source Code of org.apache.padaf.preflight.xmp.SynchronizedMetaDataValidation

/*****************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
****************************************************************************/

package org.apache.padaf.preflight.xmp;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;


import org.apache.padaf.preflight.ValidationConstants;
import org.apache.padaf.preflight.ValidationException;
import org.apache.padaf.preflight.ValidationResult.ValidationError;
import org.apache.padaf.xmpbox.XMPMetadata;
import org.apache.padaf.xmpbox.parser.DateConverter;
import org.apache.padaf.xmpbox.schema.AdobePDFSchema;
import org.apache.padaf.xmpbox.schema.DublinCoreSchema;
import org.apache.padaf.xmpbox.schema.XMPBasicSchema;
import org.apache.padaf.xmpbox.type.AbstractField;
import org.apache.padaf.xmpbox.type.TextType;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;

/**
* Class which check if document information available in a document are
* synchronized with XMP
*
* @author Germain Costenobel
*
*/
public class SynchronizedMetaDataValidation {

  /**
   * Analyze if Title embedded in Document Information dictionary and in XMP
   * properties are synchronized
   *
   * @param dico
   *          Document Information Dictionary
   * @param dc
   *          Dublin Core Schema
   * @param ve
   *          The list of validation errors
   */
  protected void analyzeTitleProperty(PDDocumentInformation dico,
      DublinCoreSchema dc, List<ValidationError> ve) {
    String title = dico.getTitle();
    if (title != null) {
      if (dc != null) {
        // Check the x-default value, if not found, check with the first value
        // found
        if (dc.getTitle() != null) {
          if (dc.getTitleValue("x-default") != null) {
            if (!dc.getTitleValue("x-default").equals(title)) {
              ve.add(unsynchronizedMetaDataError("Title"));
            }
          } else {
            // This search of first value is made just to keep compatibility
            // with lot of PDF documents
            // which use title without lang definition
            // REM : MAY we have to delete this option in the future
            Iterator<AbstractField> it = dc.getTitle().getContainer()
                .getAllProperties().iterator();
            if (it.hasNext()) {
              AbstractField tmp = it.next();
              if (tmp instanceof TextType) {
                if (!((TextType) tmp).getStringValue().equals(title)) {
                  ve.add(unsynchronizedMetaDataError("Title"));
                }
              } else {
                ve.add(AbsentXMPPropertyError("Title",
                    "Property is badly defined"));
              }
            } else {
              ve
                  .add(AbsentXMPPropertyError("Title",
                      "Property is not defined"));
            }
          }

        } else {
          ve.add(AbsentXMPPropertyError("Title", "Property is not defined"));
        }
      } else {
        ve.add(AbsentSchemaMetaDataError("Title", "Dublin Core"));
      }
    }
  }

  /**
   * Analyze if Author(s) embedded in Document Information dictionary and in XMP
   * properties are synchronized
   *
   * @param dico
   *          Document Information Dictionary
   * @param dc
   *          Dublin Core Schema
   * @param ve
   *          The list of validation errors
   */
  protected void analyzeAuthorProperty(PDDocumentInformation dico,
      DublinCoreSchema dc, List<ValidationError> ve) {
    String author = dico.getAuthor();
    if (author != null) {
      if (dc != null) {
        if (dc.getCreator() != null) {
          if (dc.getCreatorValue().size() != 1) {
            ve
                .add(AbsentXMPPropertyError(
                    "Author",
                    "In XMP metadata, Author(s) must be represented by a single entry in a text array (dc:creator) "));
          } else {
            if (dc.getCreatorValue().get(0) == null) {
              ve.add(AbsentXMPPropertyError("Author",
                  "Property is defined as null"));
            } else {
              if (!dc.getCreatorValue().get(0).equals(author)) {
                ve.add(unsynchronizedMetaDataError("Author"));
              }
            }
          }
        } else {
          ve.add(AbsentXMPPropertyError("Author",
              "Property is not defined in XMP Metadata"));
        }
      } else {
        ve.add(AbsentSchemaMetaDataError("Author", "Dublin Core"));
      }
    }
  }

  /**
   * Analyze if Subject(s) embedded in Document Information dictionary and in
   * XMP properties are synchronized
   *
   * @param dico
   *          Document Information Dictionary
   * @param dc
   *          Dublin Core Schema
   * @param ve
   *          The list of validation errors
   */
  protected void analyzeSubjectProperty(PDDocumentInformation dico,
      DublinCoreSchema dc, List<ValidationError> ve) {
    String subject = dico.getSubject();
    if (subject != null) {
      if (dc != null) {
        // PDF/A Conformance Erratum (2007) specifies XMP Subject
        // as a Text type embedded in the dc:description["x-default"].
        if (dc.getDescription() != null) {
          if (dc.getDescriptionValue("x-default") == null) {
            ve
                .add(AbsentXMPPropertyError("Subject",
                    "Subject not found in XMP (dc:description[\"x-default\"] not found)"));
          } else {
            if (!dc.getDescriptionValue("x-default").equals(subject)) {
              ve.add(unsynchronizedMetaDataError("Subject"));

            }
          }
        } else {
          ve.add(AbsentXMPPropertyError("Subject",
              "Property is defined as null"));
        }
      } else {
        ve.add(AbsentSchemaMetaDataError("Subject", "Dublin Core"));
      }
    }
  }

  /**
   * Analyze if Keyword(s) embedded in Document Information dictionary and in
   * XMP properties are synchronized
   *
   * @param dico
   *          Document Information Dictionary
   * @param pdf
   *          PDF Schema
   * @param ve
   *          The list of validation errors
   */
  protected void analyzeKeywordsProperty(PDDocumentInformation dico,
      AdobePDFSchema pdf, List<ValidationError> ve) {
    String keyword = dico.getKeywords();
    if (keyword != null) {
      if (pdf != null) {
        if (pdf.getKeywords() == null) {
          ve.add(AbsentXMPPropertyError("Keywords", "Property is not defined"));
        } else {
          if (!pdf.getKeywordsValue().equals(keyword)) {
            ve.add(unsynchronizedMetaDataError("Keywords"));
          }
        }
      } else {
        ve.add(AbsentSchemaMetaDataError("Keywords", "PDF"));
      }
    }
  }

  /**
   * Analyze if Producer embedded in Document Information dictionary and in XMP
   * properties are synchronized
   *
   * @param dico
   *          Document Information Dictionary
   * @param pdf
   *          PDF Schema
   * @param ve
   *          The list of validation errors
   */
  protected void analyzeProducerProperty(PDDocumentInformation dico,
      AdobePDFSchema pdf, List<ValidationError> ve) {
    String producer = dico.getProducer();
    if (producer != null) {
      if (pdf != null) {
        if (pdf.getProducer() == null) {
          ve.add(AbsentXMPPropertyError("Producer", "Property is not defined"));
        } else {
          if (!pdf.getProducerValue().equals(producer)) {
            ve.add(unsynchronizedMetaDataError("Producer"));
          }
        }
      } else {
        ve.add(AbsentSchemaMetaDataError("Producer", "PDF"));
      }
    }

  }

  /**
   * Analyze if the creator tool embedded in Document Information dictionary and
   * in XMP properties are synchronized
   *
   * @param dico
   *          Document Information Dictionary
   * @param xmp
   *          XMP Basic Schema
   * @param ve
   *          The list of validation errors
   *
   */
  protected void analyzeCreatorToolProperty(PDDocumentInformation dico,
      XMPBasicSchema xmp, List<ValidationError> ve) {
    String creatorTool = dico.getCreator();
    if (creatorTool != null) {
      if (xmp != null) {
        if (xmp.getCreatorTool() == null) {
          ve.add(AbsentXMPPropertyError("CreatorTool",
              "Property is not defined"));
        } else {
          if (!xmp.getCreatorToolValue().equals(creatorTool)) {
            ve.add(unsynchronizedMetaDataError("CreatorTool"));
          }
        }
      } else {
        ve.add(AbsentSchemaMetaDataError("CreatorTool", "PDF"));
      }
    }

  }

  /**
   * Analyze if the CreationDate embedded in Document Information dictionary and
   * in XMP properties are synchronized
   *
   * @param dico
   *          Document Information Dictionary
   * @param xmp
   *          XMP Basic Schema
   * @param ve
   *          The list of validation errors
   * @throws ValidationException
   */
  protected void analyzeCreationDateProperty(PDDocumentInformation dico,
      XMPBasicSchema xmp, List<ValidationError> ve) throws ValidationException {
    Calendar creationDate;
    try {
      creationDate = dico.getCreationDate();
    } catch (IOException e) {
      // If there is an error while converting this property to a date
      throw formatAccessException("Document Information", "CreationDate", e);
    }
    if (creationDate != null) {
      if (xmp != null) {
        Calendar xmpCreationDate = xmp.getCreateDateValue();

        if (xmpCreationDate == null) {
          ve.add(AbsentXMPPropertyError("CreationDate",
              "Property is not defined"));
        } else {
          if (!DateConverter.toISO8601(xmpCreationDate).equals(
              DateConverter.toISO8601(creationDate))) {
            ve.add(unsynchronizedMetaDataError("CreationDate"));
          }
        }

      } else {
        ve.add(AbsentSchemaMetaDataError("CreationDate", "Basic XMP"));
      }
    }
  }

  /**
   * Analyze if the ModifyDate embedded in Document Information dictionary and
   * in XMP properties are synchronized
   *
   * @param dico
   *          Document Information Dictionary
   * @param xmp
   *          XMP Basic Schema
   * @param ve
   *          The list of validation errors
   * @throws ValidationException
   */
  protected void analyzeModifyDateProperty(PDDocumentInformation dico,
      XMPBasicSchema xmp, List<ValidationError> ve) throws ValidationException {
    Calendar modifyDate;
    try {
      modifyDate = dico.getModificationDate();
      if (modifyDate != null) {
        if (xmp != null) {

          Calendar xmpModifyDate = xmp.getModifyDateValue();
          if (xmpModifyDate == null) {
            ve.add(AbsentXMPPropertyError("ModifyDate",
                "Property is not defined"));
          } else {
            if (!DateConverter.toISO8601(xmpModifyDate).equals(
                DateConverter.toISO8601(modifyDate))) {

              ve.add(unsynchronizedMetaDataError("ModificationDate"));
            }
          }

        } else {
          ve.add(AbsentSchemaMetaDataError("ModifyDate", "Basic XMP"));
        }
      }
    } catch (IOException e) {
      // If there is an error while converting this property to a date
      throw formatAccessException("Document Information", "ModifyDate", e);
    }

  }

  /**
   * Check if document information entries and XMP information are synchronized
   *
   * @param document
   *          the PDF Document
   * @param metadata
   *          the XMP MetaData
   * @return List of validation errors
   * @throws ValidationException
   */
  public List<ValidationError> validateMetadataSynchronization(PDDocument document, XMPMetadata metadata)
  throws ValidationException {
    List<ValidationError> ve = new ArrayList<ValidationError>();

    if (document == null) {
      throw new ValidationException("Document provided is null");
    } else {
      PDDocumentInformation dico = document.getDocumentInformation();
      if (metadata == null) {
        throw new ValidationException("Metadata provided are null");
      } else {
        DublinCoreSchema dc = metadata.getDublinCoreSchema();

        // TITLE
        analyzeTitleProperty(dico, dc, ve);
        // AUTHOR
        analyzeAuthorProperty(dico, dc, ve);
        // SUBJECT
        analyzeSubjectProperty(dico, dc, ve);

        AdobePDFSchema pdf = metadata.getAdobePDFSchema();

        // KEYWORDS
        analyzeKeywordsProperty(dico, pdf, ve);
        // PRODUCER
        analyzeProducerProperty(dico, pdf, ve);

        XMPBasicSchema xmp = metadata.getXMPBasicSchema();

        // CREATOR TOOL
        analyzeCreatorToolProperty(dico, xmp, ve);

        // CREATION DATE
        analyzeCreationDateProperty(dico, xmp, ve);

        // MODIFY DATE
        analyzeModifyDateProperty(dico, xmp, ve);

      }

    }
    return ve;
  }

  /**
   * Return a validationError formatted when a schema has not the expected
   * prefix
   *
   * @param prefFound
   * @param prefExpected
   * @param schema
   * @return
   */
  protected ValidationError UnexpectedPrefixFoundError(String prefFound,
      String prefExpected, String schema) {
    StringBuilder sb = new StringBuilder(80);
    sb.append(schema).append(" found but prefix used is '").append(prefFound)
        .append("', prefix '").append(prefExpected).append("' is expected.");

    return new ValidationError(
        ValidationConstants.ERROR_METADATA_WRONG_NS_PREFIX, sb.toString());
  }

  /**
   * Return an exception formatted on IOException when accessing metadata
   *
   * @param type
   *          type of property (Document Info or XMP)
   * @param target
   *          the name of the metadata
   * @param cause
   *          the raised IOException
   * @return the generated exception
   */
  protected ValidationException formatAccessException(String type,
      String target, Throwable cause) {
    StringBuilder sb = new StringBuilder(80);
    sb.append("Cannot treat ").append(type).append(" ").append(target).append(
        " property");
    return new ValidationException(sb.toString(), cause);
  }

  /**
   * Return an exception formatted on IOException when accessing on metadata
   * schema
   *
   * @param target
   *          the name of the schema
   * @param cause
   *          the raised IOException
   * @return the generated exception
   */
  protected ValidationException SchemaAccessException(String target,
      Throwable cause) {
    StringBuilder sb = new StringBuilder(80);
    sb.append("Cannot access to the ").append(target).append(" schema");
    return new ValidationException(sb.toString(), cause);
  }

  /**
   * Return a formatted validation error when metadata are not synchronized
   *
   * @param target
   *          the concerned property
   * @return the generated validation error
   */
  protected ValidationError unsynchronizedMetaDataError(String target) {
    StringBuilder sb = new StringBuilder(80);
    sb
        .append(target)
        .append(
            " present in the document catalog dictionary doesn't match with XMP information");
    return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb
        .toString());
  }

  /**
   * Return a formatted validation error when a specific metadata schema can't
   * be found
   *
   * @param target
   *          the concerned property
   * @param schema
   *          the XMP schema which can't be found
   * @return the generated validation error
   */
  protected ValidationError AbsentSchemaMetaDataError(String target,
      String schema) {
    StringBuilder sb = new StringBuilder(80);
    sb
        .append(target)
        .append(
            " present in the document catalog dictionary can't be found in XMP information (")
        .append(schema).append(" schema not declared)");
    return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb
        .toString());
  }

  /**
   * Return a formatted validation error when a specific XMP property can't be
   * found
   *
   * @param target
   *          the concerned property
   * @param details
   *          comments about the XMP property
   * @return the generated validation error
   */
  protected ValidationError AbsentXMPPropertyError(String target, String details) {
    StringBuilder sb = new StringBuilder(80);
    sb
        .append(target)
        .append(
            " present in the document catalog dictionary can't be found in XMP information (")
        .append(details).append(")");
    return new ValidationError(ValidationConstants.ERROR_METADATA_MISMATCH, sb
        .toString());
  }
}
TOP

Related Classes of org.apache.padaf.preflight.xmp.SynchronizedMetaDataValidation

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.