Package de.chris_soft.nanodoa.misc

Source Code of de.chris_soft.nanodoa.misc.DocumentInputManagement

/**
* NanoDoA - File based document archive
*
* Copyright (C) 2011-2012 Christian Packenius, christian.packenius@googlemail.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package de.chris_soft.nanodoa.misc;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.Properties;
import java.util.Vector;

import javax.mail.MessagingException;

import com.itextpdf.text.Document;
import com.itextpdf.text.Image;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;
import com.itextpdf.text.pdf.codec.TiffImage;

import de.chris_soft.nanoarchive.Metadata;
import de.chris_soft.nanodoa.God;
import de.chris_soft.utilities.AppProperties;
import de.chris_soft.utilities.FileUtils;
import de.chris_soft.utilities.HtmlFormatter;
import de.chris_soft.utilities.IdUtils;
import de.chris_soft.utilities.LogUtils;
import de.chris_soft.utilities.SendHtmlMailUtils;

/**
* Searches for documents in multiple directories and puts them into the
* archive.
* @author Christian Packenius.
*/
public class DocumentInputManagement implements Runnable {
  private final Vector<File> sourceDirectories = new Vector<File>();

  private final Vector<File> sourceFiles = new Vector<File>();

  private volatile boolean stopObservation = false;

  private final Thread thread;

  /**
   * Constructor.
   * @throws IOException
   */
  public DocumentInputManagement() throws IOException {
    thread = startObservation();
  }

  private Thread startObservation() {
    Thread thread = new Thread(this, "File system observation thread");
    thread.setDaemon(false);
    return thread;
  }

  /**
   * Start file system observation.
   */
  public void start() {
    thread.start();
  }

  /**
   * Stop observation of destination directories.
   */
  public void stopObservation() {
    stopObservation = true;
  }

  /**
   * Changes the list of directories to observe for documents.
   * @param directories Directory list to observe.
   */
  public void setDestinationDirectories(Collection<File> directories) {
    sourceDirectories.clear();
    sourceDirectories.addAll(directories);
  }

  /**
   * @see java.lang.Runnable#run()
   */
  @Override
  public void run() {
    try {
      while (!stopObservation) {
        fillSourceFileList();
        God.trayIAM.setImage(sourceFiles.size());
        workFiles();
        if (!stopObservation) {
          pauseShortTime();
        }
      }
    }
    catch (Exception e) {
      LogUtils.log(e);
    }
  }

  private void fillSourceFileList() {
    for (File directory : sourceDirectories) {
      fillFileList(directory);
    }
  }

  private void fillFileList(File directory) {
    if (directory != null) {
      if (directory.isFile()) {
        addFileAfterFilter(directory);
      }
      else if (directory.isDirectory()) {
        addDirectoryFiles(directory);
      }
    }
  }

  private void addFileAfterFilter(File file) {
    if (isArchivableFile(file)) {
      sourceFiles.add(file);
    }
  }

  private boolean isArchivableFile(File file) {
    if (System.currentTimeMillis() - file.lastModified() <= 1000 * 60 * 5) {
      return false;
    }
    String ext = FileUtils.getFileExtension(file);
    if (ext != null) {
      ext = ext.toLowerCase();
      if (ext.equals("jpg") || ext.equals("jpe") || ext.equals("jpeg")) {
        return true;
      }
      if (ext.equals("png") || ext.equals("gif")) {
        return true;
      }
      if (ext.equals("pdf")) {
        return true;
      }
      if (ext.equals("tif") || ext.equals("tiff")) {
        return true;
      }
    }
    return false;
  }

  private void addDirectoryFiles(File directory) {
    File[] files = directory.listFiles();
    if (files != null) {
      for (File file : files) {
        fillFileList(file);
      }
    }
  }

  private void workFiles() {
    if (!sourceFiles.isEmpty()) {
      waitFiveSeconds();
    }
    while (!sourceFiles.isEmpty() && !stopObservation) {
      God.trayIAM.setImage(sourceFiles.size());
      File file = sourceFiles.remove(0);
      if (FileUtils.isReadable(file) && file.length() > 0) {
        workFile(file);
      }
    }
  }

  private void waitFiveSeconds() {
    try {
      Thread.sleep(5000);
    }
    catch (InterruptedException e) {
      // Ignore.
    }
  }

  private void workFile(File documentFile) {
    System.out.println("Working: " + documentFile.getName());
    Properties metadata = new Properties();
    setSomeMetadata(metadata, documentFile);
    try {
      getDocumentTextMetadata(documentFile, metadata);
      File pdfFile = createArchivePdfFromImage(documentFile, metadata);
      if (pdfFile != null) {
        try {
          String sDocumentID = metadata.getProperty(Metadata.DOCUMENT_ID);
          long documentID = Long.parseLong(sDocumentID);
          informAppWindow(documentID);
          sendDocumentViaMail(pdfFile, metadata);
        }
        catch (Exception e) {
          // Ignore.
        }
        try {
          FileUtils.copyFile(documentFile, new File("temp/" + IdUtils.getUniqueID() + "-" + documentFile.getName()));
          FileUtils.deleteFile(documentFile);
        }
        catch (IOException e) {
          LogUtils.log(e);
        }
      }
    }
    catch (DocumentReadingException dre) {
      renameBadDocumentFileToTempFileAfterTenMinutes(documentFile);
    }
  }

  private void renameBadDocumentFileToTempFileAfterTenMinutes(File documentFile) {
    if (System.currentTimeMillis() - documentFile.lastModified() > 1000 * 60 * 10) {
      File tmpFile = new File(documentFile.getAbsolutePath() + ".tmp");
      documentFile.renameTo(tmpFile);
    }
  }

  private void informAppWindow(long documentID) {
    God.specialsTree.addNewArchiveDocument(documentID);
  }

  private void setSomeMetadata(Properties metadata, File documentFile) {
    String documentID = Long.toString(IdUtils.getUniqueID());
    metadata.setProperty(Metadata.CREATION_TIME, documentID);
    documentID = formatUniqueDocID(documentID);
    String documentName = FileUtils.getNameWithoutExtension(documentFile) + " - " + documentID;
    metadata.setProperty(Metadata.NAME, documentName);
  }

  private String formatUniqueDocID(String name) {
    int k = name.length() - 4;
    while (k > 0) {
      name = name.substring(0, k) + "-" + name.substring(k);
      k -= 4;
    }
    return name;
  }

  private void getDocumentTextMetadata(File documentFile, Properties metadata) throws DocumentReadingException {
    String fulltext = FulltextRecognizer.getFulltext(documentFile, metadata);
    metadata.setProperty(Metadata.FULLTEXT, fulltext);
  }

  private File createArchivePdfFromImage(File documentFile, Properties metadata) {
    File pdfFile = new File("temp/" + IdUtils.getUniqueID() + ".pdf");
    try {
      String ext = FileUtils.getFileExtension(documentFile).toLowerCase();
      String fulltext = (String) metadata.remove(Metadata.FULLTEXT);
      String ocrtext = (String) metadata.remove(Metadata.OCRTEXT);
      String realtext = (String) metadata.remove(Metadata.REALTEXT);
      long documentID = 0;
      if (ext.equals("pdf")) {
        FileUtils.deleteFile(pdfFile);
        documentID = God.archive.store(documentFile, fulltext, metadata);
        pdfFile = documentFile;
      }
      else if (ext.equals("tif") || ext.equals("tiff")) {
        createPdfDocumentFileFromTIFF(documentFile, pdfFile);
        documentID = God.archive.store(pdfFile, fulltext, metadata);
      }
      else {
        createPdfDocumentFileFromSingleImageFile(documentFile, pdfFile);
        documentID = God.archive.store(pdfFile, fulltext, metadata);
      }
      metadata.setProperty(Metadata.FULLTEXT, fulltext);
      metadata.setProperty(Metadata.OCRTEXT, ocrtext);
      metadata.setProperty(Metadata.REALTEXT, realtext);
      metadata.setProperty(Metadata.DOCUMENT_ID, Long.toString(documentID));

      File idPdfFile = new File("temp/" + documentID + ".pdf");
      if (!idPdfFile.equals(pdfFile)) {
        FileUtils.copyFile(pdfFile, idPdfFile);
      }
    }
    catch (Exception e) {
      LogUtils.log(e);
      if (!pdfFile.equals(documentFile)) {
        FileUtils.deleteFile(pdfFile);
      }
      return null;
    }
    return pdfFile;
  }

  private void createPdfDocumentFileFromTIFF(File tiffFile, File pdfFile) throws Exception {
    Document document = new Document(PageSize.A4, 0.0f, 0.0f, 0.0f, 0.0f);
    PdfWriter.getInstance(document, new FileOutputStream(pdfFile));
    document.open();
    RandomAccessFileOrArray ra = new RandomAccessFileOrArray(tiffFile.getCanonicalPath());
    int pageCount = TiffImage.getNumberOfPages(ra);
    for (int i = 0; i < pageCount; ++i) {
      God.appWindow.getStatusBar().setInitProgress(i, pageCount);
      Image img = TiffImage.getTiffImage(ra, i + 1);
      if (img != null) {
        if (i != 0) {
          document.newPage();
        }
        if (img.getWidth() > PageSize.A4.getWidth() || img.getHeight() > PageSize.A4.getHeight()) {
          img.scaleToFit(PageSize.A4.getWidth(), PageSize.A4.getHeight());
        }
        document.add(img);
      }
    }
    God.appWindow.getStatusBar().setInitProgress(pageCount, pageCount);
    ra.close();
    document.close();
  }

  private void createPdfDocumentFileFromSingleImageFile(File imageFile, File pdfFile) throws Exception {
    Document document = new Document(PageSize.A4, 0.0f, 0.0f, 0.0f, 0.0f);
    PdfWriter.getInstance(document, new FileOutputStream(pdfFile));
    document.open();
    Image image = Image.getInstance(imageFile.getCanonicalPath());
    image.scaleToFit(PageSize.A4.getWidth(), PageSize.A4.getHeight());
    document.add(image);
    document.close();
  }

  private boolean sendDocumentViaMail(File pdfFile, Properties metadata) {
    if (!Boolean.parseBoolean(AppProperties.getProperty(MailKeys.PROP_KEY_SHALL_SEND_MAIL))) {
      return true;
    }
    SendHtmlMailUtils mail = new SendHtmlMailUtils();
    if (Boolean.parseBoolean(AppProperties.getProperty(MailKeys.PROP_KEY_ADD_DOCUMENT_AS_ATTACHMENT))) {
      mail.addAttachment(pdfFile);
    }
    mail.addTO(AppProperties.getProperty(MailKeys.PROP_KEY_RECEIVER_MAIL_ADDRESS));
    mail.setFrom(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_MAIL_ADDRESS));
    mail.setLogin(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_MAIL_ADDRESS));
    mail.setPassword(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_MAIL_PW));
    mail.setPop3Host(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_POP3_SERVER));
    int smptPort = Integer.parseInt(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_SMTP_PORT));
    mail.setSmtp(AppProperties.getProperty(MailKeys.PROP_KEY_SENDER_SMTP_SERVER), smptPort);
    String docID = metadata.getProperty(Metadata.DOCUMENT_ID);
    mail.setSubject("::from::OCR:: " + docID);
    if (Boolean.parseBoolean(AppProperties.getProperty(MailKeys.PROP_KEY_SET_FULLTEXT_INTO_MAIL_BODY))) {
      String fulltext = metadata.getProperty(Metadata.FULLTEXT);
      String htmltext = HtmlFormatter.getHtmlString(fulltext, false, false);
      mail.setText("<p>" + htmltext + "</p>");
    }
    try {
      mail.sendMail();
      return true;
    }
    catch (MessagingException e) {
      LogUtils.log(e);
      return false;
    }
  }

  private void pauseShortTime() {
    try {
      Thread.sleep(500);
    }
    catch (InterruptedException e) {
      // Ignore.
    }
  }

  /**
   * Check if the document input management has been stopped.
   * @return <i>true</i> if the thread is not running any more.
   */
  public boolean isStopped() {
    return !thread.isAlive();
  }
}
TOP

Related Classes of de.chris_soft.nanodoa.misc.DocumentInputManagement

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.