Package org.apache.slide.extractor

Source Code of org.apache.slide.extractor.OfficeExtractor

package org.apache.slide.extractor;

import java.io.InputStream;
import java.util.*;

import org.apache.poi.hpsf.*;
import org.apache.poi.poifs.eventfilesystem.*;
import org.apache.slide.util.conf.Configurable;
import org.apache.slide.util.conf.Configuration;
import org.apache.slide.util.conf.ConfigurationException;

/**
* The OfficeExtractor class
*
*/
public class OfficeExtractor extends AbstractPropertyExtractor implements Configurable {
  protected List instructions = new ArrayList();
  protected Map propertyMap = new HashMap();
 
  public OfficeExtractor(String uri, String contentType, String namespace) {
    super(uri, contentType, namespace);
  }

  public Map extract(InputStream content) throws ExtractorException {
    OfficePropertiesListener listener = new OfficePropertiesListener();
    try {
      POIFSReader r = new POIFSReader();
      r.registerListener(listener);
      r.read(content);
    } catch (Exception e) {
      throw new ExtractorException("Exception while extracting properties in OfficeExtractor");
    }
    return listener.getProperties();
  }

  class OfficePropertiesListener implements POIFSReaderListener {

    private HashMap properties = new HashMap();

    public Map getProperties() {
        return properties;
    }

    public void processPOIFSReaderEvent(POIFSReaderEvent event) {
      PropertySet ps = null;
      try {
        ps = PropertySetFactory.create(event.getStream());
      } catch (NoPropertySetStreamException ex) {
        return;
      } catch (Exception ex) {
        throw new RuntimeException("Property set stream \"" + event.getPath() + event.getName() + "\": " + ex);
      }
      String eventName = event.getName().trim();
      final long sectionCount = ps.getSectionCount();
      List sections = ps.getSections();
      int nr = 0;
      for (Iterator i = sections.iterator(); i.hasNext();) {
        Section sec = (Section) i.next();
        int propertyCount = sec.getPropertyCount();
        Property[] props = sec.getProperties();
        for (int i2 = 0; i2 < props.length; i2++) {
          Property p = props[i2];
          int id = p.getID();
          long type = p.getType();
          Object value = p.getValue();
          String key = eventName + "-" + nr + "-" + id;
          if ( propertyMap.containsKey(key) ) {
            properties.put(propertyMap.get(key), value);
          }
        }
      }
    }
  }

  public void configure(Configuration configuration) throws ConfigurationException {
        Enumeration instructions = configuration.getConfigurations("instruction");
        while (instructions.hasMoreElements()) {
            Configuration extract = (Configuration)instructions.nextElement();
            String property = extract.getAttribute("property");
            String id = extract.getAttribute("id");
      propertyMap.put(id, property);
        }
  }
}
TOP

Related Classes of org.apache.slide.extractor.OfficeExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.