Package info.freelibrary.djatoka.ingest

Source Code of info.freelibrary.djatoka.ingest.IngestThread

package info.freelibrary.djatoka.ingest;

import info.freelibrary.djatoka.Constants;

import info.freelibrary.util.FileUtils;
import info.freelibrary.util.Logger;
import info.freelibrary.util.LoggerFactory;
import info.freelibrary.util.RegexFileFilter;
import info.freelibrary.util.DirFileFilter;
import info.freelibrary.util.FileExtFileFilter;
import info.freelibrary.util.PairtreeObject;
import info.freelibrary.util.PairtreeUtils;
import info.freelibrary.util.PairtreeRoot;

import java.io.FileNotFoundException;
import java.io.FilenameFilter;

import gov.lanl.adore.djatoka.DjatokaCompress;
import gov.lanl.adore.djatoka.DjatokaEncodeParam;
import gov.lanl.adore.djatoka.DjatokaException;
import gov.lanl.adore.djatoka.ICompress;
import gov.lanl.adore.djatoka.kdu.KduCompressExe;

import java.io.File;
import java.io.IOException;

import java.util.Properties;

import nu.xom.Builder;
import nu.xom.Nodes;
import nu.xom.ParsingException;

public class IngestThread extends Thread implements Constants {

    private static final Logger LOGGER = LoggerFactory.getLogger(IngestThread.class, "freelib-djatoka_messages");

    private static final String MAX_SIZE = "djatoka.ingest.file.maxSize";

    private static final String ID_QUERY = "//id";

    private boolean isFinished;

    private boolean isWaiting;

    private int myCount;

    private DjatokaEncodeParam myParams;

    private ICompress myCompression;

    private String[] myExts;

    private long myMaxSize;

    private File mySource;

    private File myDest;

    private boolean myThreadRunsUnattended;

    /**
     * Creates a new ingest thread with the supplied source and target directories; this thread looks for files with the
     * extensions in the supplied array and is configured with a boolean to run attended (by a human) or unattended.
     *
     * @param aSource A source directory of source files
     * @param aDest A target directory of JP2 files
     * @param aExts Extensions of the files to convert and ingest
     * @param aCfg A configuration to use for the ingest
     * @param aUnattendedRun Whether the ingest is attended by a person or not
     * @throws Exception If there is a problem
     */
    public IngestThread(File aSource, File aDest, String[] aExts, Properties aCfg, boolean aUnattendedRun)
            throws Exception {
        super();

        mySource = aSource;
        myDest = aDest;
        myExts = aExts;
        myThreadRunsUnattended = aUnattendedRun;

        myParams = new DjatokaEncodeParam(aCfg);
        myCompression = new KduCompressExe();

        // Convert maximum file size to bytes
        myMaxSize = Long.parseLong(aCfg.getProperty(MAX_SIZE, "200")) * 1048576;
    }

    @Override
    public void run() {
        super.run();

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("INGEST_THREAD_STARTING", getId());
        }

        isWaiting = true;

        try {
            // Go through requested directory, converting to TIFs to JP2s
            convert(mySource, myDest);

            // Add converted file system JP2s to the Pairtree cache directory
            loadFileSystemImages(myDest, mySource);
        } catch (Exception details) {
            LOGGER.error("INGEST_EXCEPTION", details);
        }

        isFinished = true;

        while (isWaiting && !myThreadRunsUnattended) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException details) {
                if (LOGGER.isWarnEnabled()) {
                    LOGGER.warn(details.getMessage(), details);
                }
            }
        }

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("INGEST_SHUTDOWN", getId(), myCount);
        }
    }

    /**
     * Returns the current ingest count.
     *
     * @return The current ingest count
     */
    public int getCount() {
        return myCount;
    }

    /**
     * Cleans up the ingest thread.
     */
    public void cleanUp() {
        isWaiting = false;
    }

    /**
     * Returns true if the ingest thread is finished; else, false.
     *
     * @return True if the ingest thread is finished; else, false
     */
    public boolean isFinished() {
        return isFinished;
    }

    private void convert(File aSource, File aDest) throws IOException, Exception {
        File[] files = aSource.listFiles(new FileExtFileFilter(myExts));
        File[] dirs = aSource.listFiles(new DirFileFilter());
        PairtreeRoot ptRoot = new PairtreeRoot(myDest); // JP2 directory
        int pathIndex = myDest.getAbsolutePath().length();

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("INGEST_DIRS_FOUND", dirs.length, aSource);
        }

        // These are the directories into which we convert our JP2s
        for (File nextSource : dirs) {
            String fileName = nextSource.getName();

            if (!fileName.startsWith(".")) {
                File dest = new File(aDest, nextSource.getName());

                if (dest.exists() && (!dest.canWrite() || !dest.isDirectory())) {
                    throw new IOException("Problem with destination directory: " + dest.getAbsolutePath());
                } else {
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("INGEST_DESCENDING", dest);
                    }

                    if ((!dest.exists() && dest.mkdirs()) || (dest.exists() && dest.isDirectory())) {
                        convert(nextSource, dest); // go into a sub-directory
                    } else {
                        throw new IOException("Failed to create a new directory: " + dest.getAbsolutePath());
                    }
                }
            }
        }

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("INGEST_FILES_FOUND", files.length, aSource);
        }

        // These are the actual image files that we're going to convert
        for (File next : files) {
            String fileName = FileUtils.stripExt(next.getName()) + JP2_EXT;
            String sourceFileName = next.getAbsolutePath();
            File nextDest = new File(aDest, fileName); // JP2 image file

            if (next.length() > myMaxSize) {
                if (LOGGER.isErrorEnabled()) {
                    long size = next.length() / 1048576;
                    LOGGER.error("INGEST_TOO_LARGE", sourceFileName, size);
                }

                continue; // We've written to the error log, move along...
            }

            if (!aDest.exists() && !aDest.mkdirs()) {
                throw new IOException("Unable to create new directory: " + nextDest.getAbsolutePath());
            }

            if (!fileName.startsWith(".")) {
                String destFileName = nextDest.getAbsolutePath();

                // Check to see whether we've already converted this file!
                String path = FileUtils.stripExt(nextDest.getAbsolutePath());
                String id = "--" + path.substring(pathIndex);
                PairtreeObject ptDir = ptRoot.getObject(id);
                String ptFileName = PairtreeUtils.encodeID(id);
                File jp2 = new File(ptDir, ptFileName);

                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("INGEST_EXISTS_CHECK", jp2);
                }

                // We skip if it exists; delete it if you want to re-convert
                if (jp2.exists()) {
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("INGEST_SKIPPING", jp2);
                    }

                    continue;
                }

                // This file would only exist if the file system copy from a
                // previous conversion wasn't copied into the JP2 directory
                // like it should have been... it's an indication of a problem.
                if (nextDest.exists()) {
                    if (LOGGER.isWarnEnabled()) {
                        LOGGER.warn("INGEST_MOVING_STALE", nextDest);
                    }

                    if (!nextDest.delete() && LOGGER.isWarnEnabled()) {
                        LOGGER.warn("FILE_NOT_DELETED", nextDest);
                    }
                }

                if (LOGGER.isInfoEnabled()) {
                    LOGGER.info("INGEST_COMPRESSING", sourceFileName, destFileName, Integer.toString(myCount + 1));
                }

                try {
                    DjatokaCompress.compress(myCompression, sourceFileName, destFileName, myParams);

                    // If we get here, the conversion was successful; note it
                    myCount += 1;

                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("INGEST_WRITTEN_TO_DISK", destFileName, Long.toString(new File(destFileName)
                                .length()));
                    }
                } catch (DjatokaException details) {
                    LOGGER.error("INGEST_COMPRESSION_FAILED", destFileName, sourceFileName, details.getMessage());

                    if (!new File(destFileName).delete()) {
                        LOGGER.error("INGEST_BROKEN_FILE", destFileName);
                    }
                } catch (NullPointerException details) {
                    LOGGER.error("INGEST_EMPTY_SOURCE", sourceFileName);
                }
            }
        }
    }

    private void loadFileSystemImages(File aJP2Dir, File aSource) throws IOException, FileNotFoundException {
        FilenameFilter filter = new RegexFileFilter(JP2_FILE_PATTERN);
        PairtreeRoot pairtree = new PairtreeRoot(aJP2Dir);
        String skipped = "pairtree_root";
        Builder bob = new Builder();

        // +1 below is to lose the trailing slash; we add via "--/" below
        int pathIndex = aJP2Dir.getAbsolutePath().length() + 1;

        // Descend through file system skipping our already mapped Pairtree dir
        for (File file : FileUtils.listFiles(aJP2Dir, filter, true, skipped)) {
            String id = file.getAbsolutePath().substring(pathIndex);
            String baseName = FileUtils.stripExt(id);
            File xmlDescriptor = new File(aSource, baseName + ".xml");

            // Check to see if XML descriptor file exists and use ID from it
            if (xmlDescriptor.exists()) {
                try {
                    Nodes nodes = bob.build(xmlDescriptor).query(ID_QUERY);

                    if (nodes.size() > 0) {
                        String idValue = nodes.get(0).getValue();

                        // We store image with its explicit identifier
                        if (!idValue.equals("")) {
                            if (LOGGER.isInfoEnabled()) {
                                LOGGER.info("INGEST_ID_FOUND", file, idValue);
                            }

                            store(pairtree, idValue, file);
                        } else {
                            if (LOGGER.isWarnEnabled()) {
                                LOGGER.warn("INGEST_MISSING_ID", file);
                            }

                            // We fall back to the default storage mechanism
                            store(pairtree, "--/" + baseName, file);
                        }
                    } else {
                        if (LOGGER.isWarnEnabled()) {
                            LOGGER.warn("INGEST_MISSING_ID", file);
                        }

                        // We fall back to the default storage mechanism
                        store(pairtree, "--/" + baseName, file);
                    }
                } catch (ParsingException details) {
                    if (LOGGER.isErrorEnabled()) {
                        LOGGER.error("INGEST_PARSING_EXCEPTION", file, details);
                    }

                    // We fall back to the default storage mechanism
                    store(pairtree, "--/" + baseName, file);
                }
            } else {
                // Add a path prefix for file-system based Pairtree objects...
                // Making assumption that no external IDs will start with "--"
                store(pairtree, "--/" + baseName, file);
            }
        }
    }

    private void store(PairtreeRoot aPairtree, String aID, File aFile) throws IOException {
        PairtreeObject ptDir = aPairtree.getObject(aID);
        String ptFileName = PairtreeUtils.encodeID(aID);
        File jp2PtFile = new File(ptDir, ptFileName);

        // Move the file into the Pairtree structure
        aFile.renameTo(jp2PtFile);

        if (LOGGER.isDebugEnabled()) {
            LOGGER.info("INGEST_TO_CACHE", aID, jp2PtFile.getAbsolutePath());
        }
    }
}
TOP

Related Classes of info.freelibrary.djatoka.ingest.IngestThread

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.