Package com.gentics.cr.lucene.indexer.transformer

Source Code of com.gentics.cr.lucene.indexer.transformer.POIContentTransformer

package com.gentics.cr.lucene.indexer.transformer;

import java.io.ByteArrayInputStream;
import java.io.IOException;

import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.xmlbeans.XmlException;

import com.gentics.cr.CRResolvableBean;
import com.gentics.cr.configuration.GenericConfiguration;
import com.gentics.cr.exceptions.CRException;

/**
* Transformer for all types of POI documents (Microsoft OLE2 documents) which automatically
* figures out how to parse the document.
* Last changed: $Date$
* @version $Revision$
* @author $Author$
*
*/
public class POIContentTransformer extends ContentTransformer {

  /**
   * Config key to set the attribute to use for parsing.
   */
  private static final String TRANSFORMER_ATTRIBUTE_KEY = "attribute";

  /**
   * Field to store the config value representing the attribute to use for parsing.
   */
  private String attribute = "";

  /**
   * Get new instance of POIContentTransformer.
   * @param config Configuration for the transformer.
   */
  public POIContentTransformer(final GenericConfiguration config) {
    super(config);
    attribute = (String) config.get(TRANSFORMER_ATTRIBUTE_KEY);
  }

  /**
   * Converts a byte array that contains a word file into a string with its contents.
   * @param obj Object to retrieve the content from.
   * @return parsed text from the object
   * @throws CRException
   */
  private String getStringContents(final Object obj) throws CRException {
    ByteArrayInputStream is;
    if (obj instanceof byte[]) {
      is = new ByteArrayInputStream((byte[]) obj);
    } else if (obj instanceof String) {
      String str = (String) obj;
      is = new ByteArrayInputStream(str.getBytes());
    } else {
      throw new IllegalArgumentException("Parameter must be instance of byte[]");
    }
    String ret = null;
    POITextExtractor extractor;

    try {
      extractor = ExtractorFactory.createExtractor(is);

      ret = extractor.getText();

    } catch (IOException e) {
      throw new CRException(e);
    } catch (InvalidFormatException e) {
      throw new CRException(e);
    } catch (OpenXML4JException e) {
      throw new CRException(e);
    } catch (XmlException e) {
      throw new CRException(e);
    }
    return (ret);
  }

  @Override
  public void processBean(final CRResolvableBean bean) throws CRException {
    if (this.attribute != null) {
      Object obj = bean.get(this.attribute);
      if (obj != null) {
        String newString = getStringContents(obj);
        if (newString != null) {
          bean.set(this.attribute, newString);
        }
      }
    } else {
      LOGGER.error("Configured attribute is null. Bean will not be processed");
    }
  }

  @Override
  public void destroy() {
    // TODO Auto-generated method stub

  }
}
TOP

Related Classes of com.gentics.cr.lucene.indexer.transformer.POIContentTransformer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.