Package org.apache.tika.xmp

Source Code of org.apache.tika.xmp.XMPMetadata

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.xmp;

import java.io.IOException;
import java.io.NotSerializableException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.Map;
import java.util.Properties;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.Property.PropertyType;
import org.apache.tika.metadata.PropertyTypeException;
import org.apache.tika.xmp.convert.TikaToXMP;

import com.adobe.xmp.XMPDateTime;
import com.adobe.xmp.XMPException;
import com.adobe.xmp.XMPIterator;
import com.adobe.xmp.XMPMeta;
import com.adobe.xmp.XMPMetaFactory;
import com.adobe.xmp.XMPSchemaRegistry;
import com.adobe.xmp.XMPUtils;
import com.adobe.xmp.options.IteratorOptions;
import com.adobe.xmp.options.PropertyOptions;
import com.adobe.xmp.options.SerializeOptions;
import com.adobe.xmp.properties.XMPProperty;

/**
* Provides a conversion of the Metadata map from Tika to the XMP data model by also providing the
* Metadata API for clients to ease transition. But clients can also work directly on the XMP data
* model, by getting the XMPMeta reference from this class. Usually the instance would be
* initialized by providing the Metadata object that had been returned from Tika-core which
* populates the XMP data model with all properties that can be converted.
*
* This class is not serializable!
*/
@SuppressWarnings("serial")
public class XMPMetadata extends Metadata {
    /** The XMP data */
    private XMPMeta xmpData;
    /** Use the XMP namespace registry implementation */
    private static final XMPSchemaRegistry registry = XMPMetaFactory.getSchemaRegistry();

    /**
     * Initializes with an empty XMP packet
     */
    public XMPMetadata() {
        xmpData = XMPMetaFactory.create();
    }

    /**
     * @see #XMPMetadata(Metadata, String)
     * But the mimetype is retrieved from the metadata map.
     */
    public XMPMetadata(Metadata meta) throws TikaException {
        this.xmpData = TikaToXMP.convert( meta );
    }

    /**
     * Initializes the data by converting the Metadata information to XMP. If a mimetype is
     * provided, a specific converter can be used, that converts all available metadata. If there is
     * no mimetype provided or no specific converter available a generic conversion is done which
     * will convert only those properties that are in known namespaces and are using the correct
     * prefixes
     *
     * @param meta
     *            the Metadata information from Tika-core
     * @param mimetype
     *            mimetype information
     * @throws In
     *             case an error occured during conversion
     */
    public XMPMetadata(Metadata meta, String mimetype) throws TikaException {
        this.xmpData = TikaToXMP.convert( meta, mimetype );
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#process(org.apache.tika.metadata.Metadata,
     *      java.lang.String)
     *  But the mimetype is retrieved from the metadata map.
     */
    public void process(Metadata meta) throws TikaException {
        this.xmpData = TikaToXMP.convert( meta );
    }

    /**
     * Converts the Metadata information to XMP. If a mimetype is provided, a specific converter can
     * be used, that converts all available metadata. If there is no mimetype provided or no
     * specific converter available a generic conversion is done which will convert only those
     * properties that are in known namespaces and are using the correct prefixes
     *
     * @param meta
     *            the Metadata information from Tika-core
     * @param mimetype
     *            mimetype information
     * @throws In
     *             case an error occured during conversion
     */
    public void process(Metadata meta, String mimetype) throws TikaException {
        this.xmpData = TikaToXMP.convert( meta, mimetype );
    }

    /**
     * Provides direct access to the XMP data model, in case a client prefers to work directly on it
     * instead of using the Metadata API
     *
     * @return the "internal" XMP data object
     */
    public XMPMeta getXMPData() {
        return xmpData;
    }

    // === Namespace Registry API === //
    /**
     * Register a namespace URI with a suggested prefix. It is not an error if the URI is already
     * registered, no matter what the prefix is. If the URI is not registered but the suggested
     * prefix is in use, a unique prefix is created from the suggested one. The actual registeed
     * prefix is always returned. The function result tells if the registered prefix is the
     * suggested one.
     * Note: No checking is presently done on either the URI or the prefix.
     *
     * @param namespaceURI
     *            The URI for the namespace. Must be a valid XML URI.
     * @param suggestedPrefix
     *            The suggested prefix to be used if the URI is not yet registered. Must be a valid
     *            XML name.
     * @return Returns the registered prefix for this URI, is equal to the suggestedPrefix if the
     *         namespace hasn't been registered before, otherwise the existing prefix.
     * @throws XMPException
     *             If the parameters are not accordingly set
     */
    public static String registerNamespace(String namespaceURI, String suggestedPrefix)
            throws XMPException {
        return registry.registerNamespace( namespaceURI, suggestedPrefix );
    }

    /**
     * Obtain the prefix for a registered namespace URI.
     * It is not an error if the namespace URI is not registered.
     *
     * @param namespaceURI
     *            The URI for the namespace. Must not be null or the empty string.
     * @return Returns the prefix registered for this namespace URI or null.
     */
    public static String getNamespacePrefix(String namespaceURI) {
        return registry.getNamespacePrefix( namespaceURI );
    }

    /**
     * Obtain the URI for a registered namespace prefix.
     * It is not an error if the namespace prefix is not registered.
     *
     * @param namespacePrefix
     *            The prefix for the namespace. Must not be null or the empty string.
     * @return Returns the URI registered for this prefix or null.
     */
    public static String getNamespaceURI(String namespacePrefix) {
        return registry.getNamespaceURI( namespacePrefix );
    }

    /**
     * @return Returns the registered prefix/namespace-pairs as map, where the keys are the
     *         namespaces and the values are the prefixes.
     */
    @SuppressWarnings("unchecked")
    public static Map<String, String> getNamespaces() {
        return registry.getNamespaces();
    }

    /**
     * @return Returns the registered namespace/prefix-pairs as map, where the keys are the prefixes
     *         and the values are the namespaces.
     */
    @SuppressWarnings("unchecked")
    public static Map<String, String> getPrefixes() {
        return registry.getPrefixes();
    }

    /**
     * Deletes a namespace from the registry.
     * <p>
     * Does nothing if the URI is not registered, or if the namespaceURI parameter is null or the
     * empty string.
     * <p>
     * Note: Not yet implemented.
     *
     * @param namespaceURI
     *            The URI for the namespace.
     */
    public static void deleteNamespace(String namespaceURI) {
        registry.deleteNamespace( namespaceURI );
    }

    // === Metadata API === //
    /**
     * @see org.apache.tika.xmp.XMPMetadata#isMultiValued(java.lang.String)
     */
    @Override
    public boolean isMultiValued(Property property) {
        return this.isMultiValued( property.getName() );
    }

    /**
     * Checks if the named property is an array.
     *
     * @see org.apache.tika.metadata.Metadata#isMultiValued(java.lang.String)
     */
    @Override
    public boolean isMultiValued(String name) {
        checkKey( name );

        String[] keyParts = splitKey( name );

        String ns = registry.getNamespaceURI( keyParts[0] );
        if (ns != null) {
            try {
                XMPProperty prop = xmpData.getProperty( ns, keyParts[1] );

                return prop.getOptions().isArray();
            }
            catch (XMPException e) {
                // Ignore
            }
        }

        return false;
    }

    /**
     * For XMP it is not clear what that API should return, therefor not implemented
     */
    @Override
    public String[] names() {
        throw new UnsupportedOperationException( "Not implemented" );
    }

    /**
     * Returns the value of a simple property or the first one of an array. The given name must
     * contain a namespace prefix of a registered namespace.
     *
     * @see org.apache.tika.metadata.Metadata#get(java.lang.String)
     */
    @Override
    public String get(String name) {
        checkKey( name );

        String value = null;
        String[] keyParts = splitKey( name );

        String ns = registry.getNamespaceURI( keyParts[0] );
        if (ns != null) {
            try {
                XMPProperty prop = xmpData.getProperty( ns, keyParts[1] );

                if (prop != null && prop.getOptions().isSimple()) {
                    value = prop.getValue();
                }
                else if (prop != null && prop.getOptions().isArray()) {
                    prop = xmpData.getArrayItem( ns, keyParts[1], 1 );
                    value = prop.getValue();
                }
                // in all other cases, null is returned
            }
            catch (XMPException e) {
                // Ignore
            }
        }

        return value;
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String)
     */
    @Override
    public String get(Property property) {
        return this.get( property.getName() );
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String)
     */
    @Override
    public Integer getInt(Property property) {
        Integer result = null;

        try {
            result = new Integer( XMPUtils.convertToInteger( this.get( property.getName() ) ) );
        }
        catch (XMPException e) {
            // Ignore
        }

        return result;
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#get(java.lang.String)
     */
    @Override
    public Date getDate(Property property) {
        Date result = null;

        try {
            XMPDateTime xmpDate = XMPUtils.convertToDate( this.get( property.getName() ) );
            if (xmpDate != null) {
                Calendar cal = xmpDate.getCalendar();
                // TODO Timezone is currently lost
                // need another solution that preserves the timezone
                result = cal.getTime();
            }
        }
        catch (XMPException e) {
            // Ignore
        }

        return result;
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#getValues(java.lang.String)
     */
    @Override
    public String[] getValues(Property property) {
        return this.getValues( property.getName() );
    }

    /**
     * Returns the value of a simple property or all if the property is an array and the elements
     * are of simple type. The given name must contain a namespace prefix of a registered namespace.
     *
     * @see org.apache.tika.metadata.Metadata#getValues(java.lang.String)
     */
    @Override
    public String[] getValues(String name) {
        checkKey( name );

        String[] value = null;
        String[] keyParts = splitKey( name );

        String ns = registry.getNamespaceURI( keyParts[0] );
        if (ns != null) {
            try {
                XMPProperty prop = xmpData.getProperty( ns, keyParts[1] );

                if (prop != null && prop.getOptions().isSimple()) {
                    value = new String[1];
                    value[0] = prop.getValue();
                }
                else if (prop != null && prop.getOptions().isArray()) {
                    int size = xmpData.countArrayItems( ns, keyParts[1] );
                    value = new String[size];
                    boolean onlySimpleChildren = true;

                    for (int i = 0; i < size && onlySimpleChildren; i++) {
                        prop = xmpData.getArrayItem( ns, keyParts[1], i + 1 );
                        if (prop.getOptions().isSimple()) {
                            value[i] = prop.getValue();
                        }
                        else {
                            onlySimpleChildren = false;
                        }
                    }

                    if (!onlySimpleChildren) {
                        value = null;
                    }
                }
                // in all other cases, null is returned
            }
            catch (XMPException e) {
                // Ignore
            }
        }

        return value;
    }

    /**
     * As this API could only possibly work for simple properties in XMP, it just calls the set
     * method, which replaces any existing value
     *
     * @see org.apache.tika.metadata.Metadata#add(java.lang.String, java.lang.String)
     */
    @Override
    public void add(String name, String value) {
        set( name, value );
    }

    /**
     * Sets the given property. If the property already exists, it is overwritten. Only simple
     * properties that use a registered prefix are stored in the XMP.
     *
     * @see org.apache.tika.metadata.Metadata#set(java.lang.String, java.lang.String)
     */
    @Override
    public void set(String name, String value) {
        checkKey( name );

        String[] keyParts = splitKey( name );

        String ns = registry.getNamespaceURI( keyParts[0] );
        if (ns != null) {
            try {
                xmpData.setProperty( ns, keyParts[1], value );
            }
            catch (XMPException e) {
                // Ignore
            }
        }
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String)
     */
    @Override
    public void set(Property property, String value) {
        this.set( property.getName(), value );
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String)
     */
    @Override
    public void set(Property property, int value) {
        // Can reuse the checks from the base class implementation which will call
        // the set(String, String) method in the end
        super.set( property, value );
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String)
     */
    @Override
    public void set(Property property, double value) {
        super.set( property, value );
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#set(java.lang.String, java.lang.String)
     */
    @Override
    public void set(Property property, Date date) {
        super.set( property, date );
    }

    /**
     * Sets array properties. If the property already exists, it is overwritten. Only array
     * properties that use a registered prefix are stored in the XMP.
     *
     * @see org.apache.tika.metadata.Metadata#set(org.apache.tika.metadata.Property,
     *      java.lang.String[])
     */
    @Override
    public void set(Property property, String[] values) {
        checkKey( property.getName() );

        if (!property.isMultiValuePermitted()) {
            throw new PropertyTypeException( "Property is not of an array type" );
        }

        String[] keyParts = splitKey( property.getName() );

        String ns = registry.getNamespaceURI( keyParts[0] );
        if (ns != null) {
            try {
                int arrayType = tikaToXMPArrayType( property.getPrimaryProperty().getPropertyType() );
                xmpData.setProperty( ns, keyParts[1], null, new PropertyOptions( arrayType ) );

                for (String value : values) {
                    xmpData.appendArrayItem( ns, keyParts[1], value );
                }
            }
            catch (XMPException e) {
                // Ignore
            }
        }
    }

    /**
     * It will set all simple and array properties that have QName keys in registered namespaces.
     *
     * @see org.apache.tika.metadata.Metadata#setAll(java.util.Properties)
     */
    @Override
    public void setAll(Properties properties) {
        @SuppressWarnings("unchecked")
        Enumeration<String> names = (Enumeration<String>) properties.propertyNames();

        while (names.hasMoreElements()) {
            String name = names.nextElement();
            Property property = Property.get( name );
            if (property == null) {
                throw new PropertyTypeException( "Unknown property: " + name );
            }

            String value = properties.getProperty( name );

            if (property.isMultiValuePermitted()) {
                this.set( property, new String[] { value } );
            }
            else {
                this.set( property, value );
            }
        }
    }

    /**
     * @see org.apache.tika.xmp.XMPMetadata#remove(java.lang.String)
     */
    public void remove(Property property) {
        this.remove( property.getName() );
    }

    /**
     * Removes the given property from the XMP data. If it is a complex property the whole subtree
     * is removed
     *
     * @see org.apache.tika.metadata.Metadata#remove(java.lang.String)
     */
    @Override
    public void remove(String name) {
        checkKey( name );

        String[] keyParts = splitKey( name );

        String ns = registry.getNamespaceURI( keyParts[0] );
        if (ns != null) {
            xmpData.deleteProperty( ns, keyParts[1] );
        }
    }

    /**
     * Returns the number of top-level namespaces
     */
    @Override
    public int size() {
        int size = 0;

        try {
            // Get an iterator for the XMP packet, starting at the top level schema nodes
            XMPIterator nsIter = xmpData.iterator( new IteratorOptions().setJustChildren( true )
                    .setOmitQualifiers( true ) );
            // iterate all top level namespaces
            while (nsIter.hasNext()) {
                nsIter.next();
                size++;
            }
        }
        catch (XMPException e) {
            // ignore
        }

        return size;
    }

    /**
     * This method is not implemented, yet. It is very tedious to check for semantic equality of XMP
     * packets
     */
    @Override
    public boolean equals(Object o) {
        throw new UnsupportedOperationException( "Not implemented" );
    }

    /**
     * Serializes the XMP data in compact form without packet wrapper
     *
     * @see org.apache.tika.metadata.Metadata#toString()
     */
    @Override
    public String toString() {
        String result = null;
        try {
            result = XMPMetaFactory.serializeToString( xmpData, new SerializeOptions()
                    .setOmitPacketWrapper( true ).setUseCompactFormat( true ) );
        }
        catch (XMPException e) {
            // ignore
        }
        return result;
    }

    // The XMP object is not serializable!
    private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException {
        throw new NotSerializableException();
    }

    // The XMP object is not serializable!
    private void writeObject(ObjectOutputStream ois) throws IOException {
        throw new NotSerializableException();
    }

    /**
     * Checks if the given key is a valid QName with a known standard namespace prefix
     *
     * @param key
     *            the key to check
     * @return true if the key is valid otherwise false
     */
    private void checkKey(String key) throws PropertyTypeException {
        if (key == null || key.length() == 0) {
            throw new PropertyTypeException( "Key must not be null" );
        }

        String[] keyParts = splitKey( key );
        if (keyParts == null) {
            throw new PropertyTypeException( "Key must be a QName in the form prefix:localName" );
        }

        if (registry.getNamespaceURI( keyParts[0] ) == null) {
            throw new PropertyTypeException( "Key does not use a registered Namespace prefix" );
        }
    }

    /**
     * Split the given key at the namespace prefix delimiter
     *
     * @param key
     *            the key to split
     * @return prefix and local name of the property or null if the key did not contain a delimiter
     *         or too much of them
     */
    private String[] splitKey(String key) {
        String[] keyParts = key.split( Metadata.NAMESPACE_PREFIX_DELIMITER );
        if (keyParts.length > 0 && keyParts.length <= 2) {
            return keyParts;
        }

        return null;
    }// checkKeyPrefix

    /**
     * Convert Tika array types to XMP array types
     *
     * @param type
     * @return
     */
    private int tikaToXMPArrayType(PropertyType type) {
        int result = 0;
        switch (type) {
            case BAG:
                result = PropertyOptions.ARRAY;
                break;
            case SEQ:
                result = PropertyOptions.ARRAY_ORDERED;
                break;
            case ALT:
                result = PropertyOptions.ARRAY_ALTERNATE;
                break;
        }
        return result;
    }
}
TOP

Related Classes of org.apache.tika.xmp.XMPMetadata

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.