/*
* Copyright (C) 2009 eXo Platform SAS.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.exoplatform.services.document.impl;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.exoplatform.services.document.DocumentReadException;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
/**
* Created by The eXo Platform SAS A parser of Microsoft Excel 2007 files (xlsx).
*
* @author <a href="mailto:phunghainam@gmail.com">Phung Hai Nam</a>
* @author Gennady Azarenkov
* @author <a href="mailto:nikolazius@gmail.com">Nikolay Zamosenchuk</a>
* @version $Id: MSXExcelDocumentReader.java 34360 2009-07-22 23:58:59Z nzamosenchuk $
*
*/
public class MSXExcelDocumentReader extends BaseDocumentReader
{
private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSZ";
/**
* @see org.exoplatform.services.document.DocumentReader#getMimeTypes()
*/
public String[] getMimeTypes()
{
return new String[]{"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"};
}
/**
* Returns only a text from .xlsx file content.
*
* @param is an input stream with .xls file content.
* @return The string only with text from file content.
*/
public String getContentAsText(InputStream is) throws IOException, DocumentReadException
{
if (is == null)
{
throw new NullPointerException("InputStream is null.");
}
StringBuilder builder = new StringBuilder("");
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
try
{
if (is.available() == 0)
{
return "";
}
XSSFWorkbook wb;
try
{
wb = new XSSFWorkbook(is);
}
catch (IOException e)
{
throw new DocumentReadException("Can't open spreadsheet.", e);
}
catch (OpenXML4JRuntimeException e)
{
return builder.toString();
}
for (int sheetNum = 0; sheetNum < wb.getNumberOfSheets(); sheetNum++)
{
XSSFSheet sheet = wb.getSheetAt(sheetNum);
if (sheet != null)
{
for (int rowNum = sheet.getFirstRowNum(); rowNum <= sheet.getLastRowNum(); rowNum++)
{
XSSFRow row = sheet.getRow(rowNum);
if (row != null)
{
int lastcell = row.getLastCellNum();
for (int k = 0; k < lastcell; k++)
{
XSSFCell cell = row.getCell(k);
if (cell != null)
{
switch (cell.getCellType())
{
case XSSFCell.CELL_TYPE_NUMERIC : {
double d = cell.getNumericCellValue();
if (isCellDateFormatted(cell))
{
Date date = HSSFDateUtil.getJavaDate(d);
String cellText = dateFormat.format(date);
builder.append(cellText).append(" ");
}
else
{
builder.append(d).append(" ");
}
break;
}
case XSSFCell.CELL_TYPE_FORMULA :
builder.append(cell.getCellFormula().toString()).append(" ");
break;
case XSSFCell.CELL_TYPE_BOOLEAN :
builder.append(cell.getBooleanCellValue()).append(" ");
break;
case XSSFCell.CELL_TYPE_ERROR :
builder.append(cell.getErrorCellValue()).append(" ");
break;
case XSSFCell.CELL_TYPE_STRING :
builder.append(cell.getStringCellValue().toString()).append(" ");
break;
default :
break;
}
}
}
}
}
}
}
}
finally
{
if (is != null)
{
try
{
is.close();
}
catch (IOException e)
{
}
}
}
return builder.toString();
}
public String getContentAsText(InputStream is, String encoding) throws IOException, DocumentReadException
{
// Ignore encoding
return getContentAsText(is);
}
/*
* (non-Javadoc)
*
* @see org.exoplatform.services.document.DocumentReader#getProperties(java.io.
* InputStream)
*/
public Properties getProperties(InputStream is) throws IOException, DocumentReadException
{
POIPropertiesReader reader = new POIPropertiesReader();
reader.readDCProperties(new XSSFWorkbook(is));
return reader.getProperties();
}
public static boolean isCellDateFormatted(XSSFCell cell)
{
boolean bDate = false;
double d = cell.getNumericCellValue();
if (HSSFDateUtil.isValidExcelDate(d))
{
XSSFCellStyle style = cell.getCellStyle();
int i = style.getDataFormat();
switch (i)
{
case 0xe : // m/d/yy
case 0xf : // d-mmm-yy
case 0x10 : // d-mmm
case 0x11 : // mmm-yy
case 0x12 : // h:mm AM/PM
case 0x13 : // h:mm:ss AM/PM
case 0x14 : // h:mm
case 0x15 : // h:mm:ss
case 0x16 : // m/d/yy h:mm
case 0x2d : // mm:ss
case 0x2e : // [h]:mm:ss
case 0x2f : // mm:ss.0
case 0xa5 : // ??
case 0xa7 : // ??
case 0xa9 : // ??
case 0xac : // mm:dd:yy not specified in javadoc
case 0xad : // yyyy-mm-dd not specified in javadoc
case 0xae : // mm:dd:yyyy not specified in javadoc
case 0xaf : // m:d:yy not specified in javadoc
bDate = true;
break;
default :
bDate = false;
break;
}
}
return bDate;
}
}