Source Code of org.pdfbox.util.Splitter

/**
 * Copyright (c) 2004-2005, www.pdfbox.org
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of pdfbox; nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * http://www.pdfbox.org
 *
 */
package org.pdfbox.util;


import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.PDPage;


import java.io.IOException;


import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;


/**
 * Split a document into several other documents.
 *
 * @author Mario Ivankovits (mario@ops.co.at)
 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
 * @version $Revision: 1.7 $
 */
public class Splitter
{


    /**
     * The source PDF document.
     */
    protected PDDocument pdfDocument;
    
    /**
     * The current PDF document that contains the splitted page.
     */
    protected PDDocument currentDocument = null;
    
    private int splitAtPage=1;
    private List newDocuments = null;


    /**
     * The current page number that we are processing, zero based.
     */
    protected int pageNumber = 0;


    /**
     * This will take a document and split into several other documents.
     *
     * @param document The document to split.
     *
     * @return A list of all the split documents.
     *
     * @throws IOException If there is an IOError
     */
    public List split( PDDocument document ) throws IOException
    {
        newDocuments = new ArrayList();
        pdfDocument = document;


        List pages = pdfDocument.getDocumentCatalog().getAllPages();
        processPages(pages);
        return newDocuments;
    }


    /**
     * This will tell the splitting algorithm where to split the pages.  The default
     * is 1, so every page will become a new document.  If it was to then each document would
     * contain 2 pages.  So it the source document had 5 pages it would split into
     * 3 new documents, 2 documents containing 2 pages and 1 document containing one
     * page.
     *
     * @param split The number of pages each split document should contain.
     */
    public void setSplitAtPage( int split )
    {
        if( split <= 0 )
        {
            throw new RuntimeException( "Error split must be at least one page." );
        }
        splitAtPage = split;
    }


    /**
     * This will return how many pages each split document will contain.
     *
     * @return The split parameter.
     */
    public int getSplitAtPage()
    {
        return splitAtPage;
    }


    /**
     * Interface method to handle the start of the page processing.
     *
     * @param pages The list of pages from the source document.
     *
     * @throws IOException If an IO error occurs.
     */
    protected void processPages(List pages) throws IOException
    {
        Iterator iter = pages.iterator();
        while( iter.hasNext() )
        {
            PDPage page = (PDPage)iter.next();
            processNextPage( page );
        }
    }
    
    /**
     * Interface method, you can control where a document gets split by implementing
     * this method.  By default a split occurs at every page.  If you wanted to split
     * based on some complex logic then you could override this method.  For example.
     * <code>
     * protected void createNewDocumentIfNecessary()
     * {
     *     if( isPrime( pageNumber ) )
     *     {
     *         super.createNewDocumentIfNecessary();
     *     }
     * }
     * </code>
     *
     * @throws IOException If there is an error creating the new document.
     */
    protected void createNewDocumentIfNecessary() throws IOException 
    {
        if (isNewDocNecessary()) 
        {
            createNewDocument();
        }
    }
    
    /**
     * Check if it is necessary to create a new document.
     * 
     * @return true If a new document should be created.
     */
    protected boolean isNewDocNecessary() 
    {
        return pageNumber % splitAtPage == 0 || currentDocument == null;
    }
    
    /**
     * Create a new document to write the splitted contents to.
     * 
     * @throws IOException If there is an problem creating the new document.
     */
    protected void createNewDocument() throws IOException 
    {
        currentDocument = new PDDocument();
        currentDocument.setDocumentInformation(pdfDocument.getDocumentInformation());
        currentDocument.getDocumentCatalog().setViewerPreferences(
        pdfDocument.getDocumentCatalog().getViewerPreferences());
        newDocuments.add(currentDocument);
    }






    /**
     * Interface to start processing a new page.
     *
     * @param page The page that is about to get processed.
     *
     * @throws IOException If there is an error creating the new document.
     */
    protected void processNextPage( PDPage page ) throws IOException
    {
        createNewDocumentIfNecessary();
        PDPage imported = currentDocument.importPage( page );
        imported.setCropBox( page.findCropBox() );
        imported.setMediaBox( page.findMediaBox() );
        imported.setResources( page.findResources() );
        imported.setRotation( page.findRotation() );
        pageNumber++;
    }
}
Source Code of org.pdfbox.util.Splitter

Related Classes of org.pdfbox.util.Splitter