/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.util;
import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import javax.imageio.IIOException;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageTypeSpecifier;
import javax.imageio.ImageWriteParam;
import javax.imageio.ImageWriter;
import javax.imageio.metadata.IIOInvalidTreeException;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.metadata.IIOMetadataNode;
import javax.imageio.stream.ImageOutputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* This class will take a PDF document and strip out all of the text and ignore the
* formatting and such. Please note; it is up to clients of this class to verify that
* a specific user has the correct permissions to extract text from the
* PDF document.
* <p>
* Patterned after PDFTextStripper.
*
* @author <a href="mailto:DanielWilson@Users.SourceForge.net">Daniel Wilson</a>
* @version $Revision: 1.1 $
*/
public class PDFImageWriter extends PDFStreamEngine
{
/**
* Instantiate a new PDFImageWriter object.
*/
public PDFImageWriter()
{
}
/**
* Instantiate a new PDFImageWriter object. Loading all of the operator mappings
* from the properties object that is passed in.
*
* @param props The properties containing the mapping of operators to PDFOperator
* classes.
*
* @throws IOException If there is an error reading the properties.
*/
public PDFImageWriter( Properties props ) throws IOException
{
super( props );
}
/**
* Converts a given page range of a PDF document to bitmap images.
* @param document the PDF document
* @param imageType the target format (ex. "png")
* @param password the password (needed if the PDF is encrypted)
* @param startPage the start page (1 is the first page)
* @param endPage the end page (set to Integer.MAX_VALUE for all pages)
* @param outputPrefix used to construct the filename for the individual images
* @return true if the images were produced, false if there was an error
* @throws IOException if an I/O error occurs
*/
public boolean writeImage(PDDocument document, String imageType, String password,
int startPage, int endPage, String outputPrefix)
throws IOException
{
int resolution;
try
{
resolution = Toolkit.getDefaultToolkit().getScreenResolution();
}
catch( HeadlessException e )
{
resolution = 96;
}
return writeImage(document, imageType, password, startPage, endPage, outputPrefix,
8, resolution);
}
/**
* Converts a given page range of a PDF document to bitmap images.
* @param document the PDF document
* @param imageFormat the target format (ex. "png")
* @param password the password (needed if the PDF is encrypted)
* @param startPage the start page (1 is the first page)
* @param endPage the end page (set to Integer.MAX_VALUE for all pages)
* @param outputPrefix used to construct the filename for the individual images
* @param imageType the image type (see {@link BufferedImage}.TYPE_*)
* @param resolution the resolution in dpi (dots per inch)
* @return true if the images were produced, false if there was an error
* @throws IOException if an I/O error occurs
*/
public boolean writeImage(PDDocument document, String imageFormat, String password,
int startPage, int endPage, String outputPrefix, int imageType, int resolution)
throws IOException
{
boolean bSuccess = true;
List pages = document.getDocumentCatalog().getAllPages();
for( int i = startPage - 1; i < endPage && i < pages.size(); i++ )
{
ImageOutputStream output = null;
ImageWriter imageWriter = null;
try
{
PDPage page = (PDPage)pages.get( i );
BufferedImage image = page.convertToImage(imageType, resolution);
String fileName = outputPrefix + (i + 1) + "." + imageFormat;
System.out.println( "Writing: " + fileName );
output = ImageIO.createImageOutputStream( new File( fileName ) );
boolean foundWriter = false;
Iterator writerIter = ImageIO.getImageWritersByFormatName( imageFormat );
while( writerIter.hasNext() && !foundWriter )
{
try
{
imageWriter = (ImageWriter)writerIter.next();
ImageWriteParam writerParams = imageWriter.getDefaultWriteParam();
if( writerParams.canWriteCompressed() )
{
writerParams.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
writerParams.setCompressionQuality(1.0f);
}
IIOMetadata meta = createMetadata( image, imageWriter, writerParams, resolution);
imageWriter.setOutput( output );
imageWriter.write( null, new IIOImage( image, null, meta ), writerParams );
foundWriter = true;
}
catch( IIOException io )
{
throw new IOException( io.getMessage() );
}
finally
{
if( imageWriter != null )
{
imageWriter.dispose();
}
}
}
if( !foundWriter )
{
bSuccess = false;
}
}
finally
{
if( output != null )
{
output.flush();
output.close();
}
}
}
return bSuccess;
}
private IIOMetadata createMetadata(RenderedImage image, ImageWriter imageWriter,
ImageWriteParam writerParams, int resolution)
{
ImageTypeSpecifier type;
if (writerParams.getDestinationType() != null)
{
type = writerParams.getDestinationType();
}
else
{
type = ImageTypeSpecifier.createFromRenderedImage( image );
}
IIOMetadata meta = imageWriter.getDefaultImageMetadata( type, writerParams );
return (addResolution(meta, resolution) ? meta : null);
}
private static final String STANDARD_METADATA_FORMAT = "javax_imageio_1.0";
private boolean addResolution(IIOMetadata meta, int resolution)
{
if (meta.isStandardMetadataFormatSupported())
{
IIOMetadataNode root = (IIOMetadataNode)meta.getAsTree(STANDARD_METADATA_FORMAT);
IIOMetadataNode dim = getChildNode(root, "Dimension");
IIOMetadataNode child;
child = getChildNode(dim, "HorizontalPixelSize");
if (child == null)
{
child = new IIOMetadataNode("HorizontalPixelSize");
dim.appendChild(child);
}
child.setAttribute("value",
Double.toString(resolution / 25.4));
child = getChildNode(dim, "VerticalPixelSize");
if (child == null)
{
child = new IIOMetadataNode("VerticalPixelSize");
dim.appendChild(child);
}
child.setAttribute("value",
Double.toString(resolution / 25.4));
try
{
meta.mergeTree(STANDARD_METADATA_FORMAT, root);
}
catch (IIOInvalidTreeException e)
{
throw new RuntimeException("Cannot update image metadata: "
+ e.getMessage());
}
return true;
}
return false;
}
private static IIOMetadataNode getChildNode(Node n, String name)
{
NodeList nodes = n.getChildNodes();
for (int i = 0; i < nodes.getLength(); i++)
{
Node child = nodes.item(i);
if (name.equals(child.getNodeName()))
{
return (IIOMetadataNode)child;
}
}
return null;
}
}