Package eu.planets_project.services.migrate.jtidy

Source Code of eu.planets_project.services.migrate.jtidy.JTidy

package eu.planets_project.services.migrate.jtidy;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.logging.Logger;

import javax.jws.WebService;
import javax.xml.ws.soap.MTOM;

import org.apache.commons.io.IOUtils;
import org.w3c.tidy.Tidy;

import com.sun.xml.ws.developer.StreamingAttachment;

import eu.planets_project.ifr.core.techreg.formats.FormatRegistry;
import eu.planets_project.ifr.core.techreg.formats.FormatRegistryFactory;
import eu.planets_project.services.PlanetsServices;
import eu.planets_project.services.datatypes.Content;
import eu.planets_project.services.datatypes.DigitalObject;
import eu.planets_project.services.datatypes.MigrationPath;
import eu.planets_project.services.datatypes.Parameter;
import eu.planets_project.services.datatypes.ServiceDescription;
import eu.planets_project.services.datatypes.ServiceReport;
import eu.planets_project.services.datatypes.ServiceReport.Status;
import eu.planets_project.services.datatypes.ServiceReport.Type;
import eu.planets_project.services.datatypes.Tool;
import eu.planets_project.services.migrate.Migrate;
import eu.planets_project.services.migrate.MigrateResult;
import eu.planets_project.services.utils.DigitalObjectUtils;
import eu.planets_project.services.utils.ServiceUtils;

/**
* JTidy migration service.
* @author Peter Melms
*/
@WebService(name = JTidy.NAME, serviceName = Migrate.NAME, targetNamespace = PlanetsServices.NS, endpointInterface = "eu.planets_project.services.migrate.Migrate")
@MTOM
@StreamingAttachment( parseEagerly=true, memoryThreshold=ServiceUtils.JAXWS_SIZE_THRESHOLD )
public class JTidy implements Migrate, Serializable {

    /**
     * the service name.
     */
    public static final String NAME = "JTidy";

    private static final long serialVersionUID = 4563930985924810312L;
    private static final Logger log = Logger.getLogger(JTidy.class.getName());
    private static final String CONFIG_FILE_PARAM = "configFile";
    private static final String LOG_FILE_NAME = "jTidy_log.txt";
    private static final String DEFAULT_CONFIGURATION = "config_file.properties";

    /**
     * {@inheritDoc}
     * @see eu.planets_project.services.migrate.Migrate#describe()
     */
    public ServiceDescription describe() {
        ServiceDescription.Builder sd = new ServiceDescription.Builder(
                JTidy.NAME, Migrate.class.getCanonicalName());
        sd.author("Peter Melms, mailto:peter.melms@uni-koeln.de");
        sd
                .description("This is a wrapper for the JTidy Java API.\n"
                        + "JTidy converts older or not compliant HTML files to XHTML compliant files.\n"
                        + "Could be used as HTML pretty printer as well.\n"
                        + "JTidy can be configured by passing a config file. In this case"
                        + "the config file has be passed using the Parameters, i.e. reading the config.txt into "
                        + "a String.");

        sd.classname(this.getClass().getCanonicalName());
        sd.version("0.1");

        List<URI> inputFormatsPUID = new ArrayList<URI>();
        List<URI> outputFormatsPUID = new ArrayList<URI>();
        FormatRegistry formatRegistry = FormatRegistryFactory
                .getFormatRegistry();

        inputFormatsPUID.add(formatRegistry.createPronomUri("fmt/96")); // HTML
        // versions
        // older than
        // 2.0
        inputFormatsPUID.add(formatRegistry.createPronomUri("fmt/97")); // HTML 2.0
        inputFormatsPUID.add(formatRegistry.createPronomUri("fmt/98")); // HTML 3.2
        inputFormatsPUID.add(formatRegistry.createPronomUri("fmt/99")); // HTML 4.0
        inputFormatsPUID.add(formatRegistry.createPronomUri("fmt/100")); // HTML 4.01

        outputFormatsPUID.add(formatRegistry.createPronomUri("fmt/102")); // XHTML 1.0

        List<Parameter> parameterList = new ArrayList<Parameter>();
        Parameter configFile = new Parameter.Builder("configFile",
                "The content of a config file read into a String.").description(
                "Allows configurating JTidy by passing it a config file consisting of name=value pairs.\n"
                + "like 'show-warnings=yes'\n"
                + "\nFor further instructions on configurating JTidy please see the "
                + "JTidy (http://jtidy.sourceforge.net/)and "
                + "Tidy (http://www.w3.org/People/Raggett/tidy/) website!").build();
        parameterList.add(configFile);

        MigrationPath[] migrationPathwaysPUID = createMigrationPathwayMatrix(
                inputFormatsPUID, outputFormatsPUID, parameterList);

        sd.paths(migrationPathwaysPUID);
        sd.tool(Tool.create(null, "JTidy", "rc820", "JTidy converts older or not compliant HTML files to XHTML compliant files.\n"
                + "Could be used as HTML pretty printer as well.\n"
                + "JTidy can be configured by passing a config file. In this case"
                + "the config file has be passed using the Parameters, i.e. reading the config.txt into "
                + "a String.", "http://jtidy.sourceforge.net"));
        sd.logo(URI.create("http://jtidy.sf.net/images/logo.png"));

        sd.parameters(parameterList);

        return sd.build();
    }

    /**
     * {@inheritDoc}
     */
    public MigrateResult migrate(DigitalObject digitalObject, URI inputFormat,
            URI outputFormat, List<Parameter> parameters) {

        DigitalObject result = null;
        Properties userProps = null;
        InputStream stream = null;
        File outHTML = null;
        File inHTML = DigitalObjectUtils.toFile(digitalObject);
        File logFile = new File(inHTML.getParentFile(), LOG_FILE_NAME);
        Tidy tidy = new Tidy();
        try {
            outHTML = File.createTempFile("out", null);

            if (parameters != null) {
                if (parameters.size() > 0) {
                    log.info("Got additional parameters!");
                    for (Parameter parameter : parameters) {
                        String name = parameter.getName();
                        if (name.equalsIgnoreCase(CONFIG_FILE_PARAM)) {
                            userProps = new Properties();
                            try {
                                userProps.load(new ByteArrayInputStream(parameter.getValue()
                                        .getBytes()));
                                log.info("Using this parameters for JTidy: \n"
                                        + "-----------------------------------\n"
                                        + parameter.getValue());
                            } catch (IOException e1) {
                                log.severe("Could not write config file!!!");
                                e1.printStackTrace();
                            }
                            break;
                        }
                    }
                }
            }

            BufferedInputStream inHTMLStream;

            inHTMLStream = new BufferedInputStream(new FileInputStream(inHTML));
            BufferedOutputStream outHTMLStream = new BufferedOutputStream(
                    new FileOutputStream(outHTML));

            if (userProps != null) {
                tidy.setConfigurationFromProps(userProps);
            }

            // use default config
            else {
                Properties defaultProps = new Properties();
                stream = this.getClass().getResourceAsStream(
                        DEFAULT_CONFIGURATION);
                defaultProps.load(stream);
                tidy.setConfigurationFromProps(defaultProps);
                log
                        .info("no additional configuration file passed, using DEFAULT config instead!\n");
                String defConfig = IOUtils.toString(this.getClass()
                                .getResourceAsStream(DEFAULT_CONFIGURATION));
                log.info(defConfig);
            }

            tidy.setShowWarnings(true);
            tidy
                    .setErrout(new PrintWriter(new FileOutputStream(logFile),
                            true));
            tidy.setWriteback(true);
            tidy.setOnlyErrors(false);
            tidy.setInputStreamName(inHTML.getAbsolutePath());
            tidy.setTidyMark(true);
            tidy.parse(inHTMLStream, outHTMLStream);
            inHTMLStream.close();
            outHTMLStream.flush();
            outHTMLStream.close();

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally{
            IOUtils.closeQuietly(stream);
        }

        if (outHTML != null && outHTML.canRead()) {
            result = new DigitalObject.Builder(Content
                        .byValue(outHTML)).title(outHTML.getName()).format(
                        outputFormat)
                        .permanentUri(
                                URI.create(PlanetsServices.SERVICES_NS
                                        + "/pserv-pa-jtidy")).build();

            ServiceReport sr;

            String message = createStringFromLogFile(logFile, true);
            if (tidy.getParseErrors() > 0) {
                sr = new ServiceReport(Type.ERROR, Status.TOOL_ERROR, message);
            } else {
                sr = new ServiceReport(Type.INFO, Status.SUCCESS, message);
            }

            MigrateResult mr = new MigrateResult(result, sr);

            return mr;
        } else {
            return this.returnWithErrorMessage(
                    "There seems to be a problem: No Result file created!",
                    null);
        }

    }

    /**
     * @param message an optional message on what happened to the service
     * @param e the Exception e which causes the problem
     * @return CharacteriseResult containing a Error-Report
     */
    private MigrateResult returnWithErrorMessage(final String message,
            final Exception e) {
        if (e == null) {
            return new MigrateResult(null, ServiceUtils
                    .createErrorReport(message));
        } else {
            return new MigrateResult(null, ServiceUtils
                    .createExceptionErrorReport(message, e));
        }
    }

    private String createStringFromLogFile(File errorLogFile,
            boolean enableDebugLog) {
        File debugLog = null;
        PrintWriter debugLogger = null;
        if (enableDebugLog) {
            FileOutputStream out = null;
            try {
                debugLog = File.createTempFile("jTidyDebugLog", ".txt");
                out = new FileOutputStream(debugLog, true);
                debugLogger = new PrintWriter(out, true);
            } catch (FileNotFoundException e1) {
                e1.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                IOUtils.closeQuietly(out);
                IOUtils.closeQuietly(debugLogger);
            }
        }
        StringBuffer buf = new StringBuffer();
        if (errorLogFile.canRead()) {
            try {
                BufferedReader errorLogIn = new BufferedReader(new FileReader(
                        errorLogFile));
                String line = null;

                buf.append(ServiceUtils.getSystemDateAndTimeFormatted() + ": ");
                buf
                        .append("JTidy output:\n----------------------------------------------");

                while ((line = errorLogIn.readLine()) != null) {
                    buf.append(line);
                    buf.append("\n");
                }
                errorLogIn.close();

            } catch (FileNotFoundException e) {
                log.severe("Could not find errorLog file: "
                        + errorLogFile.getAbsolutePath());
                e.printStackTrace();
            } catch (IOException e) {
                log.severe("Could not read errorLog file: "
                        + errorLogFile.getAbsolutePath());
                e.printStackTrace();
            }

            buf.deleteCharAt(buf.lastIndexOf("\n"));
            buf.append("----------------------------------------------");

            if (enableDebugLog) {
                debugLogger.write(buf.toString());
                debugLogger.println();
                debugLogger.println();
                debugLogger.flush();
                debugLogger.close();
            }
            return buf.toString();
        } else {
            log.severe("No error log file found!!!");
            return "";
        }
    }

    private MigrationPath[] createMigrationPathwayMatrix(
            List<URI> inputFormats, List<URI> outputFormats,
            List<Parameter> parameters) {
        List<MigrationPath> paths = new ArrayList<MigrationPath>();

        for (Iterator<URI> iterator = inputFormats.iterator(); iterator
                .hasNext();) {
            URI input = iterator.next();

            for (Iterator<URI> iterator2 = outputFormats.iterator(); iterator2
                    .hasNext();) {
                URI output = iterator2.next();

                MigrationPath path = new MigrationPath(input, output,
                        parameters);
                // Debug...
                // System.out.println(path.getInputFormat() + " --> " +
                // path.getOutputFormat());
                paths.add(path);
            }
        }
        return paths.toArray(new MigrationPath[] {});
    }

}
TOP

Related Classes of eu.planets_project.services.migrate.jtidy.JTidy

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.