Package eu.planets_project.ifr.core.services.identification.jhove.impl

Source Code of eu.planets_project.ifr.core.services.identification.jhove.impl.JhoveIdentification

package eu.planets_project.ifr.core.services.identification.jhove.impl;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
import java.util.logging.Logger;

import javax.ejb.Stateless;
import javax.jws.WebService;
import javax.xml.ws.soap.MTOM;

import com.sun.xml.ws.developer.StreamingAttachment;

import edu.harvard.hul.ois.jhove.App;
import edu.harvard.hul.ois.jhove.JhoveBase;
import edu.harvard.hul.ois.jhove.JhoveException;
import edu.harvard.hul.ois.jhove.handler.TextHandler;
import eu.planets_project.ifr.core.techreg.formats.FormatRegistry;
import eu.planets_project.ifr.core.techreg.formats.FormatRegistryFactory;
import eu.planets_project.services.PlanetsServices;
import eu.planets_project.services.datatypes.DigitalObject;
import eu.planets_project.services.datatypes.Parameter;
import eu.planets_project.services.datatypes.ServiceDescription;
import eu.planets_project.services.datatypes.ServiceReport;
import eu.planets_project.services.datatypes.ServiceReport.Status;
import eu.planets_project.services.datatypes.ServiceReport.Type;
import eu.planets_project.services.datatypes.Tool;
import eu.planets_project.services.identify.Identify;
import eu.planets_project.services.identify.IdentifyResult;
import eu.planets_project.services.utils.DigitalObjectUtils;
import eu.planets_project.services.utils.ServiceUtils;

/**
* JHOVE identification service.
* @author Fabian Steeg
*/
@WebService(
    name = JhoveIdentification.NAME,
    serviceName = Identify.NAME,
    targetNamespace = PlanetsServices.NS,
    endpointInterface = "eu.planets_project.services.identify.Identify")
@Stateless
@MTOM
@StreamingAttachment( parseEagerly=true, memoryThreshold=ServiceUtils.JAXWS_SIZE_THRESHOLD )
public final class JhoveIdentification implements Identify, Serializable {
    private static Logger log = Logger.getLogger(JhoveIdentification.class.getName());
    static final String NAME = "JhoveIdentification";
    private static final String ENCODING = "utf-8";
    private static final String RESOURCES = "PC/jhove/src/resources/";
    private static final String CONFIG_FILE = "jhove.conf";
    private static final long serialVersionUID = 1127650680714441971L;
    private static final String PLANETS = "Planets";
    private static final String CONF = "/server/default/conf/";
    private static final String JBOSS_HOME_DIR_KEY = "jboss.home.dir";
    private static final String OUTPUT = "planets-jhove-output";
    private String status;

    /**
     * {@inheritDoc}
     * @see eu.planets_project.services.identify.Identify#identify(eu.planets_project.services.datatypes.DigitalObject,
     *      java.util.List)
     */
    public IdentifyResult identify(final DigitalObject digitalObject,
            final List<Parameter> parameters) {
        File file = DigitalObjectUtils.toFile(digitalObject);
        List<URI> types = identifyOneBinary(file);
        log.info("JHOVE Identification, got types: " + types);
        ServiceReport report;
        if (types == null || types.size() == 0 || types.contains(null)) {
            report = new ServiceReport(Type.ERROR, Status.TOOL_ERROR,
                    "Could not identify " + file.getAbsolutePath()
                            + " (is the required Jhove module installed?)");
        } else {
            report = new ServiceReport(Type.INFO, Status.SUCCESS, status);
        }
        return new IdentifyResult(types, null, report);
    }

    /**
     * {@inheritDoc}
     * @see eu.planets_project.services.identify.Identify#describe()
     */
    public ServiceDescription describe() {
        ServiceDescription.Builder sd = new ServiceDescription.Builder(
                "JHOVE Identification Service", Identify.class
                        .getCanonicalName());
        sd.classname(this.getClass().getCanonicalName());
        sd.description("Identification service using JHOVE (1.5).");
        sd.author("Fabian Steeg");
        sd.tool(Tool.create(null, "JHOVE", "1.5", null,
                "http://hul.harvard.edu/jhove/"));
        sd.logo(URI.create("http://hul.harvard.edu/jhove/jhove-logo.gif"));
        sd.furtherInfo(URI.create("http://hul.harvard.edu/jhove/"));
        sd.inputFormats(inputFormats());
        sd.serviceProvider("The Planets Consortium");
        return sd.build();
    }

    /**
     * Simple enumeration of MIME types recognized by JHOVE.
     */
    private static enum SupportedFileType {
        AIFF("audio/x-aiff"),
        ASCII("text/plain"),
        GIF("image/gif"),
        HTML("text/html"),
        JPEG("image/jpeg"),
        PDF("application/pdf"),
        TIFF("image/tiff"),
        WAVE("audio/x-wav"),
        XML("text/xml");
        /***/
        private String mime;

        /**
         * @param mime The mime type
         */
        private SupportedFileType(final String mime) {
            this.mime = mime;
        }

       
    }

    /**
     * @param format The format to check for support
     * @return True, if the given format is supported
     */
    public static boolean supported(final URI format) {
        for (URI uri : JhoveIdentification.inputFormats()) {
            FormatRegistry registry = FormatRegistryFactory.getFormatRegistry();
            if (registry.getFormatUriAliases(uri).contains(format)) {
                return true;
            }
        }
        return false;
    }

    /**
     * @return An array of Pronom IDs supported as input formats by Jhove
     */
    public static URI[] inputFormats() {
        List<URI> result = new ArrayList<URI>();
        for (SupportedFileType type : SupportedFileType.values()) {
            FormatRegistry formatRegistry = FormatRegistryFactory
                    .getFormatRegistry();
            Set<URI> urisForMimeType = formatRegistry
                    .getUrisForMimeType(type.mime);
            if (urisForMimeType != null && urisForMimeType.size() > 0) {
                result.addAll(urisForMimeType);
            }
        }
        return result.toArray(new URI[] {});
    }

    /**
     * The actual JHOVE identification method.
     * @param temporary The file to be identified
     * @return Returns a Types object containing the result of the JHOVE
     *         identification
     */
    private List<URI> identifyOneBinary(final File temporary) {
        try {
            /* And use the JHOVE API to identify it: */
            JhoveBase base = new JhoveBase();
            base.setEncoding(ENCODING);
            Calendar calendar = Calendar.getInstance();
            int[] date = new int[] { calendar.get(Calendar.YEAR),
                    calendar.get(Calendar.MONTH),
                    calendar.get(Calendar.DAY_OF_MONTH) };
            App app = new App(PLANETS, PLANETS, date, PLANETS, PLANETS);
            base.init(config() + CONFIG_FILE, null);
            /* For JHOVE's output, we create another temporary file: */
            File output = File.createTempFile(OUTPUT, null);
            output.deleteOnExit();
            base.dispatch(app, null, null, new TextHandler(), output
                    .getAbsolutePath(), new String[] { temporary
                    .getAbsolutePath() });
            /* And finally we create our result object with the PRONOM ID: */
            return result(base);
        } catch (JhoveException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * Retrieves the results of a JhoveBase (by extracting from the resulting
     * text file, I have no idea how to get it straight from the JhoveBase,
     * there is no API for this).
     * @param base The JhoveBase to get the results from
     * @return Returns a Types object with the result for the JhoveBase
     */
    private List<URI> result(final JhoveBase base) {
        String file = base.getOuputFile();
        File f = new File(file);
        Scanner s = null;
        try {
            s = new Scanner(f);
        } catch (FileNotFoundException e) {
            IllegalArgumentException exception = new IllegalArgumentException(
                    "JhoveBase in use has no proper output file;");
            exception.initCause(e);
            throw exception;
        }
        String mime = null;
        String status = null;
        StringBuilder builder = new StringBuilder();
        while (s.hasNextLine()) {
            String line = s.nextLine().trim().toLowerCase();
            if (line.startsWith("mimetype:")) {
                mime = value(line);
            } else if (line.startsWith("status:")) {
                status = value(line);
            }
            builder.append(line);
        }
        if (mime == null) {
            String output = builder.toString();
            throw new IllegalStateException(
                    "Identification failed with status: " + status
                            + " (no mime type in "
                            + (output.length() == 0 ? "empty output" : output)
                            + ")");
        }
        /* Jhove output uses 'x-wave' while droid uses 'x-wav': */
        mime = mime.replace("x-wave", "x-wav");
        log.info("Got mime type: " + mime);
        log.info("Got status: " + status);
        this.status = status;
        List<URI> types = new ArrayList<URI>();
        FormatRegistry registry = FormatRegistryFactory.getFormatRegistry();
        Set<URI> uris = registry.getUrisForMimeType(mime);
        types.addAll(uris);
        return types;
    }

    /**
     * @param line The line to retrieve the value from
     * @return Returns the value of the line (after the colon, before a
     *         semicolon)
     */
    private String value(final String line) {
        return line.split(":")[1].trim().split(";")[0].trim();
    }

    /**
     * @return If running in JBoss, returns the deployment directory, else (like
     *         when running a unit test) returns the project directory to
     *         retrieve the concepts file
     */
    private static String config() {
        String deployedJBossHome = System.getProperty(JBOSS_HOME_DIR_KEY);
        String folder = (deployedJBossHome != null ? deployedJBossHome + CONF
                : RESOURCES);
        return folder;
    }
}
TOP

Related Classes of eu.planets_project.ifr.core.services.identification.jhove.impl.JhoveIdentification

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.