Package nz.govt.natlib.adapter.excel

Source Code of nz.govt.natlib.adapter.excel.ExcelAdapter$DocumentSummaryReader

/*
*  Copyright 2006 The National Library of New Zealand
*
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package nz.govt.natlib.adapter.excel;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Iterator;

import nz.govt.natlib.adapter.AdapterUtils;
import nz.govt.natlib.adapter.DataAdapter;
import nz.govt.natlib.adapter.word.OLE.OLEConstants;
import nz.govt.natlib.fx.DataSource;
import nz.govt.natlib.fx.ParserContext;

import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
* Adapter for Microsoft Excel files. This adapter serves as an example
* of using a third party library to extract metadata from a binary
* file format.
*
* @author unascribed
* @version 1.0
*/

public class ExcelAdapter extends DataAdapter {

  private static final int Offset_To_DOP = 0x192;

  private static final OLELanguageMap langMap = OLELanguageMap
      .getLanguageMap();

  /**
   * Determine if this Adapter can extract metadata from the file
   * provided. This adapter checks both the extension and looks
   * for an OLE header.
   *
   * @param file The file to check.
   * @return true if this adapter supports the filetype.
   */
  public boolean acceptsFile(File file) {
      // Convert the filename to lower case to help with the comparison.
    String name = file.getName().toLowerCase();

    // If it is an .xls file, then we are probably an Excel file,
    // but check that it also complies with the OLE header requirement
    // as well. This could be extended with additional checks.
    if (name.endsWith(".xls")) {
      return checkFileHeader(file, OLEConstants.OLE_HEADER);
    }
    return false;
  }

  public String getVersion() {
    return "1.0";
  }

  public String getOutputType() {
    return "excel.dtd";
  }

  public String getInputType() {
    return "application/vnd.ms-excel";
  }

  public String getName() {
    return "Microsoft Excel Adapter";
  }

  public String getDescription() {
    return "Handles Excel Spreadsheets from Excel 2.0 through to XP";
  }

  /**
   * Extract the metadata out of the file.
   *
   * @param file The file to extract metadata from.
   * @param ctx The context for raise metadata event to.
   */
  public void adapt(File file, ParserContext ctx) throws IOException {
      // Start the MSExcel tag.
    ctx.fireStartParseEvent("MSExcel");
   
    // Output the standard information (see DefaultAdapter).
    writeFileInfo(file, ctx);
   
    // Fire a version event.
    ctx.fireParseEvent("Version", "MSExcel");
    POIFSFileSystem fs = null;
    FileInputStream fin = null;
    try {
            // Initialise the POI library.
      POIFSReader r = new POIFSReader();
     
      // Register some listeners.
      r.registerListener(new MainStreamReader(ctx), "Workbook");
      r.registerListener(new SummaryReader(ctx), "\005SummaryInformation");
      r.registerListener(new DocumentSummaryReader()"\005DocumentSummaryInformation");

      // Read the file.
      fin = new FileInputStream(file);
      r.read(fin);
    } catch (Exception ex) {
      throw new RuntimeException(ex);
    } finally {
      AdapterUtils.close(fin);
      fs = null;
    }
   
    // Close the MSExcel tag.
    ctx.fireEndParseEvent("MSExcel");
  }

  public void readDirectory(POIFSFileSystem fs, DirectoryEntry dir)
      throws Exception {
    if (dir.getEntryCount() == 0) {
      return;
    }

    for (Iterator iter = dir.getEntries(); iter.hasNext();) {
      Entry entry = (Entry) iter.next();
      if (entry instanceof DirectoryEntry) {
        // .. recurse into this directory
        readDirectory(fs, (DirectoryEntry) entry);
      } else if (entry instanceof DocumentEntry) {
        // entry is a document, which you can read
        DocumentEntry doc = (DocumentEntry) entry;
        readDocument(fs, doc);
      } else {
        // currently, either an Entry is a DirectoryEntry or a
        // DocumentEntry,
        // but in the future, there may be other entry subinterfaces.
        // The
        // internal data structure certainly allows for a lot more entry
        // types.
      }
    }
  }

  public void readDocument(POIFSFileSystem fs, DocumentEntry doc)
      throws Exception {
    // load file system
    DocumentInputStream stream = new DocumentInputStream(doc);

    if (stream.available() > 256) {
      return;
    }

    // process data from stream
    byte[] content = new byte[stream.available()];
    stream.read(content);
    stream.close();

    for (int i = 0; i < content.length; i++) {
      int c = content[i];
      if (c < 0) {
        c = 0x100 + c;
      }
    }

  }

 
  /**
   * This class is responsible for listening to events from POI,
   * and then sending the appropriate metadata events to the
   * ParserContext.
   */
  class SummaryReader implements POIFSReaderListener {
    ParserContext into;

    SummaryReader(ParserContext into) {
      this.into = into;
    }

    public void processPOIFSReaderEvent(POIFSReaderEvent event) {
        // Read the event into a SummaryInformation object.
      SummaryInformation si = null;
      try {
        si = (SummaryInformation) PropertySetFactory.create(event
            .getStream());
      } catch (Exception ex) {
        throw new RuntimeException("Property set stream \""
            + event.getPath() + event.getName() + "\": " + ex);
      }
      SimpleDateFormat dateFormatter = new SimpleDateFormat();
      dateFormatter.applyPattern("yyyy-MM-dd HH:mm:ss");
     
      // Start a summary section.     
      into.fireStartParseEvent("summary");
     
      // Fire all our events to the ParserContext.
      into.fireParseEvent("title", si.getTitle());
      into.fireParseEvent("subject", si.getSubject());
      into.fireParseEvent("keywords", si.getKeywords());
      into.fireParseEvent("comments", si.getComments());
      into.fireParseEvent("lastReviewedBy", si.getLastAuthor());
      into.fireParseEvent("author", si.getAuthor());
     
      // End the summary section.
      into.fireEndParseEvent("summary");

      // Start a properties section.
      into.fireStartParseEvent("properties");
     
      // Fire the properties metadata events.
      if(si.getCreateDateTime() != null) {
        into.fireParseEvent("created", dateFormatter.format(si
          .getCreateDateTime()));
      }
      else {
        into.fireParseEvent("created","");
      }
      if(si.getLastSaveDateTime() != null) {
        into.fireParseEvent("revision", dateFormatter.format(si
            .getLastSaveDateTime()));
      }
      else {
        into.fireParseEvent("revision","");
      }
     
      if (si.getLastPrinted() != null) {
        into.fireParseEvent("printed", dateFormatter.format(si
            .getLastPrinted()));
      } else {
        into.fireParseEvent("printed", "");
      }

      into.fireParseEvent("os", si.getOSVersion());
      into.fireParseEvent("application", si.getApplicationName());
     
      // End the properties section.
      into.fireEndParseEvent("properties");

    }
  }


  class TableStreamReader implements POIFSReaderListener {

    ParserContext into;

    public TableStreamReader(ParserContext into) {
      this.into = into;
    }

    public void processPOIFSReaderEvent(POIFSReaderEvent event) {

      DataSource ftk = null;
      try {
        long dopPos = into.getIntAttribute("MSExcel.FIB.dopOffset");
        ftk = new DocumentDataSource(event.getStream());
        into.fireStartParseEvent("Properties");
        OLEUtils.getDOPProperties(ftk, (int) dopPos, into);
        into.fireEndParseEvent("Properties");
      } catch (Exception ex) {
        ex.printStackTrace();
      } finally {
        if (ftk != null) {
          try {
            ftk.close();
          } catch (Exception ex) {
            ;
          }
        }
      }
    }
  }

  /**
   * Read elements out of the OLE Header element.
   */
  class MainStreamReader implements POIFSReaderListener {
    ParserContext into;

    public MainStreamReader(ParserContext into) {
      this.into = into;
    }

    public void processPOIFSReaderEvent(POIFSReaderEvent event) {
      DataSource ftk = null;
      try {
        ftk = new DocumentDataSource(event.getStream());

        into.fireStartParseEvent("header");
        OLEUtils.getHeader(ftk, into);
        into.fireEndParseEvent("header");

      } catch (Exception ex) {
        ex.printStackTrace();
      } finally {
        if (ftk != null) {
          try {
            ftk.close();
          } catch (Exception ex) {
            ;
          }
        }
      }
    }
  }

  class DocumentSummaryReader implements POIFSReaderListener {
    DocumentSummaryReader() {
    }

    public void processPOIFSReaderEvent(POIFSReaderEvent event) {
      DocumentSummaryInformation si = null;
      try {
        si = (DocumentSummaryInformation) PropertySetFactory
            .create(event.getStream());
      } catch (Exception ex) {
        throw new RuntimeException("Property set stream \""
            + event.getPath() + event.getName() + "\": " + ex);
      }
    }
  }
}
TOP

Related Classes of nz.govt.natlib.adapter.excel.ExcelAdapter$DocumentSummaryReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.