Package org.apache.pdfbox.contentstream

Source Code of org.apache.pdfbox.contentstream.PDFStreamEngine

* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.pdfbox.contentstream;

import java.awt.geom.Area;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontFactory;
import org.apache.pdfbox.pdmodel.font.PDType3CharProc;
import org.apache.pdfbox.pdmodel.font.PDType3Font;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorProcessor;

* Processes a PDF content stream and executes certain operations.
* Provides a callback interface for clients that want to do things with the stream.
* @author Ben Litchfield
public class PDFStreamEngine
    private static final Log LOG = LogFactory.getLog(PDFStreamEngine.class);

    private final Map<String, OperatorProcessor> operators = new HashMap<String, OperatorProcessor>();

    private Matrix textMatrix;
    private Matrix textLineMatrix;
    protected Matrix subStreamMatrix = new Matrix();

    private final Stack<PDGraphicsState> graphicsStack = new Stack<PDGraphicsState>();

    private PDResources resources;
    private PDPage currentPage;
    private boolean isProcessingPage;

    // skip malformed or otherwise unparseable input where possible
    private boolean forceParsing;

     * Creates a new PDFStreamEngine.
    public PDFStreamEngine()

     * Indicates if force parsing is activated.
     * @return true if force parsing is active
    public boolean isForceParsing()
        return forceParsing;

     * Enable/Disable force parsing.
     * @param forceParsingValue true activates force parsing
    public void setForceParsing(boolean forceParsingValue)
        forceParsing = forceParsingValue;

     * Register a custom operator processor with the engine.
     * @param operator The operator as a string.
     * @param op Processor instance.
     * @deprecated Use {@link #addOperator(OperatorProcessor)} instead
    public void registerOperatorProcessor(String operator, OperatorProcessor op)
        operators.put(operator, op);

     * Adds an operator processor to the engine.
     * @param op operator processor
    public final void addOperator(OperatorProcessor op)
        operators.put(op.getName(), op);

     * Initialises the stream engine for the given page.
    private void initPage(PDPage page)
        if (page == null)
            throw new IllegalArgumentException("Page cannot be null");
        currentPage = page;
        graphicsStack.push(new PDGraphicsState(page.getCropBox()));
        textMatrix = null;
        textLineMatrix = null;
        resources = null;

     * This will initialise and process the contents of the stream.
     * @param page the page to process
     * @throws IOException if there is an error accessing the stream
    public void processPage(PDPage page) throws IOException
        if (page.getStream() != null)
            isProcessingPage = true;
            isProcessingPage = false;

     * Shows a transparency group from the content stream.
     * @param form transparency group (form) XObject
     * @throws IOException if the transparency group cannot be processed
    public void showTransparencyGroup(PDFormXObject form) throws IOException

     * Shows a form from the content stream.
     * @param form form XObject
     * @throws IOException if the form cannot be processed
    public void showForm(PDFormXObject form) throws IOException

     * Process a child stream of the current page. For use with #processPage(PDPage).
     * @param contentStream the child content stream
     * @throws IOException if there is an exception while processing the stream
    public void processChildStream(PDContentStream contentStream) throws IOException
        if (currentPage == null)
            throw new IllegalStateException("No current page, call " +
                    "#processChildStream(PDContentStream, PDPage) instead");

     * Processes a transparency group stream.
    protected void processTransparencyGroup(PDFormXObject group)
            throws IOException
        if (currentPage == null)
            throw new IllegalStateException("No current page, call " +
                    "#processChildStream(PDContentStream, PDPage) instead");

        PDResources parent = pushResources(group);

        // transform the CTM using the stream's matrix

        // clip to bounding box



     * Processes a Type 3 character stream.
     * @param charProc Type 3 character procedure
     * @param textRenderingMatrix the Text Rendering Matrix
    protected void processType3Stream(PDType3CharProc charProc, Matrix textRenderingMatrix)
            throws IOException
        if (currentPage == null)
            throw new IllegalStateException("No current page, call " +
                    "#processChildStream(PDContentStream, PDPage) instead");

        PDResources parent = pushResources(charProc);

        // replace the CTM with the TRM

        // transform the CTM using the stream's matrix (this is the FontMatrix)

        // note: we don't clip to the BBox as it is often wrong, see PDFBOX-1917

        // save text matrices (Type 3 stream may contain BT/ET, see PDFBOX-2137)
        Matrix textMatrixOld = textMatrix;
        textMatrix = new Matrix();
        Matrix textLineMatrixOld = textLineMatrix;
        textLineMatrix = new Matrix();


        // restore text matrices
        textMatrix = textMatrixOld;
        textLineMatrix = textLineMatrixOld;


     * Process the given annotation with the specified appearance stream.
     * @param annotation The annotation containing the appearance stream to process.
     * @param appearance The appearance stream to process.
    protected void processAnnotation(PDAnnotation annotation, PDAppearanceStream appearance)
            throws IOException
        PDResources parent = pushResources(appearance);

        PDRectangle bbox = appearance.getBBox();
        PDRectangle rect = annotation.getRectangle();
        Matrix matrix = appearance.getMatrix();

        // zero-sized rectangles are not valid
        if (rect.getWidth() > 0 && rect.getHeight() > 0)
            // transformed appearance box
            PDRectangle transformedBox = bbox.transform(matrix);

            // compute a matrix which scales and translates the transformed appearance box to align
            // with the edges of the annotation's rectangle
            Matrix a = Matrix.getTranslatingInstance(rect.getLowerLeftX(), rect.getLowerLeftY());
            a.concatenate(Matrix.getScaleInstance(rect.getWidth() / transformedBox.getWidth(),
                    rect.getHeight() / transformedBox.getHeight()));

            // Matrix shall be concatenated with A to form a matrix AA that maps from the appearance��s
            // coordinate system to the annotation’s rectangle in default user space
            Matrix aa = Matrix.concatenate(matrix, a);

            // make matrix AA the CTM

            // clip to bounding box



     * Processes the given tiling pattern.
     * @param tilingPattern tiling patten
    protected final void processTilingPattern(PDTilingPattern tilingPattern) throws IOException
        PDResources parent = pushResources(tilingPattern);

        // note: we don't transform the CTM using the stream's matrix, as TilingPaint handles this

        // clip to bounding box
        PDRectangle bbox = tilingPattern.getBBox();



     * Shows the given annotation.
     * @param annotation An annotation on the current page.
     * @throws IOException If an error occurred reading the annotation
    public void showAnnotation(PDAnnotation annotation) throws IOException
        PDAppearanceStream appearanceStream = getAppearance(annotation);
        if (appearanceStream != null)
            processAnnotation(annotation, appearanceStream);

     * Returns the appearance stream to process for the given annotation. May be used to render
     * a specific appearance such as "hover".
     * @param annotation The current annotation.
     * @return The stream to process.
    public PDAppearanceStream getAppearance(PDAnnotation annotation)
        return annotation.getNormalAppearanceStream();

     * Process a child stream of the given page. Cannot be used with #processPage(PDPage).
     * @param contentStream the child content stream
     * @throws IOException if there is an exception while processing the stream
    protected void processChildStream(PDContentStream contentStream, PDPage page) throws IOException
        if (isProcessingPage)
            throw new IllegalStateException("Current page has already been set via " +
                    " #processPage(PDPage) call #processChildStream(PDContentStream) instead");
        currentPage = null;

     * Process a content stream.
     * @param contentStream the content stream
     * @throws IOException if there is an exception while processing the stream
    private void processStream(PDContentStream contentStream) throws IOException
        processStream(contentStream, null);

     * Process a content stream.
     * @param contentStream the content stream
     * @param patternBBox fixme: temporary workaround for tiling patterns
     * @throws IOException if there is an exception while processing the stream
    private void processStream(PDContentStream contentStream, PDRectangle patternBBox)
            throws IOException
        PDResources parent = pushResources(contentStream);

        // transform the CTM using the stream's matrix

        // clip to bounding box
        PDRectangle bbox = contentStream.getBBox();
        if (patternBBox != null)
            bbox = patternBBox;



     * Processes the operators of the given content stream.
    private void processStreamOperators(PDContentStream contentStream) throws IOException
        // fixme: stream matrix
        Matrix oldSubStreamMatrix = subStreamMatrix;
        subStreamMatrix = getGraphicsState().getCurrentTransformationMatrix();

        List<COSBase> arguments = new ArrayList<COSBase>();
        PDFStreamParser parser = new PDFStreamParser(contentStream.getContentStream(), forceParsing);
            Iterator<Object> iter = parser.getTokenIterator();
            while (iter.hasNext())
                Object token =;
                if (token instanceof COSObject)
                    arguments.add(((COSObject) token).getObject());
                else if (token instanceof Operator)
                    processOperator((Operator) token, arguments);
                    arguments = new ArrayList<COSBase>();
                    arguments.add((COSBase) token);

        // fixme: stream matrix
        subStreamMatrix = oldSubStreamMatrix;

     * Pushes the given stream's resources, returning the previous resources.
    private PDResources pushResources(PDContentStream contentStream)
        // resource lookup: first look for stream resources, then fallback to the current page
        PDResources parentResources = resources;
        PDResources streamResources = contentStream.getResources();
        if (streamResources != null)
            resources = streamResources;
            resources = currentPage.getResources();

        // resources are required in PDF
        if (resources == null)
            resources = new PDResources();
        return parentResources;

     * Pops the current resources, replacing them with the given resources.
    private void popResources(PDResources parentResources)
        resources = parentResources;

     * Transforms the given rectangle using the CTM and then intersects it with the current
     * clipping area.
    private void clipToRect(PDRectangle rectangle)
        if (rectangle != null)
            PDRectangle clip = rectangle.transform(getGraphicsState().getCurrentTransformationMatrix());
            getGraphicsState().intersectClippingPath(new Area(clip.toGeneralPath()));

     * Called when the BT operator is encountered. This method is for overriding in subclasses, the
     * default implementation does nothing.
     * @throws IOException if there was an error processing the text
    public void beginText() throws IOException
        // overridden in subclasses

     * Called when the ET operator is encountered. This method is for overriding in subclasses, the
     * default implementation does nothing.
     * @throws IOException if there was an error processing the text
    public void endText() throws IOException
        // overridden in subclasses

     * Called when a string of text is to be shown.
     * @param string the encoded text
     * @throws IOException if there was an error showing the text
    public void showTextString(byte[] string) throws IOException

     * Called when a string of text with spacing adjustments is to be shown.
     * @param array array of encoded text strings and adjustments
     * @throws IOException if there was an error showing the text
    public void showTextStrings(COSArray array) throws IOException
        PDTextState textState = getGraphicsState().getTextState();
        float fontSize = textState.getFontSize();
        float horizontalScaling = textState.getHorizontalScaling() / 100f;
        boolean isVertical = textState.getFont().isVertical();

        for (COSBase obj : array)
            if (obj instanceof COSNumber)
                float tj = ((COSNumber)obj).floatValue();

                // calculate the combined displacements
                float tx, ty;
                if (isVertical)
                    tx = 0;
                    ty = -tj / 1000 * fontSize;
                    tx = -tj / 1000 * fontSize * horizontalScaling;
                    ty = 0;

                applyTextAdjustment(tx, ty);
            else if(obj instanceof COSString)
                byte[] string = ((COSString)obj).getBytes();
                throw new IOException("Unknown type in array for TJ operation:" + obj);

     * Applies a text position adjustment from the TJ operator. May be overridden in subclasses.
     * @param tx x-translation
     * @param ty y-translation
    protected void applyTextAdjustment(float tx, float ty) throws IOException
        // update the text matrix
        textMatrix.concatenate(Matrix.getTranslatingInstance(tx, ty));

     * Process text from the PDF Stream. You should override this method if you want to
     * perform an action when encoded text is being processed.
     * @param string the encoded text
     * @throws IOException if there is an error processing the string
    protected void showText(byte[] string) throws IOException
        PDGraphicsState state = getGraphicsState();
        PDTextState textState = state.getTextState();

        // get the current font
        PDFont font = textState.getFont();
        if (font == null)
            LOG.warn("No current font, will use default");
            font = PDFontFactory.createDefaultFont();

        float fontSize = textState.getFontSize();
        float horizontalScaling = textState.getHorizontalScaling() / 100f;
        float charSpacing = textState.getCharacterSpacing();

        // put the text state parameters into matrix form
        Matrix parameters = new Matrix(
                fontSize * horizontalScaling, 0, // 0
                0, fontSize,                     // 0
                0, textState.getRise());         // 1

        // read the stream until it is empty
        InputStream in = new ByteArrayInputStream(string);
        while (in.available() > 0)
            // decode a character
            int before = in.available();
            int code = font.readCode(in);
            int codeLength = before - in.available();
            String unicode = font.toUnicode(code);

            // Word spacing shall be applied to every occurrence of the single-byte character code
            // 32 in a string when using a simple font or a composite font that defines code 32 as
            // a single-byte code.
            float wordSpacing = 0;
            if (codeLength == 1 && code == 32)
                wordSpacing += textState.getWordSpacing();

            // text rendering matrix (text space -> device space)
            Matrix ctm = state.getCurrentTransformationMatrix();
            Matrix textRenderingMatrix = parameters.multiply(textMatrix).multiply(ctm);

            // get glyph's position vector if this is vertical text
            // changes to vertical text should be tested with PDFBOX-2294 and PDFBOX-1422
            if (font.isVertical())
                // position vector, in text space
                Vector v = font.getPositionVector(code);

                // apply the position vector to the horizontal origin to get the vertical origin

            // get glyph's horizontal and vertical displacements, in text space
            Vector w = font.getDisplacement(code);

            // process the decoded glyph
            showGlyph(textRenderingMatrix, font, code, unicode, w);

            // calculate the combined displacements
            float tx, ty;
            if (font.isVertical())
                tx = 0;
                ty = w.getY() * fontSize + charSpacing + wordSpacing;
                tx = (w.getX() * fontSize + charSpacing + wordSpacing) * horizontalScaling;
                ty = 0;

            // update the text matrix
            textMatrix.concatenate(Matrix.getTranslatingInstance(tx, ty));

     * Called when a glyph is to be processed.This method is intended for overriding in subclasses,
     * the default implementation does nothing.
     * @param textRenderingMatrix the current text rendering matrix, T<sub>rm</sub>
     * @param font the current font
     * @param code internal PDF character code for the glyph
     * @param unicode the Unicode text for this glyph, or null if the PDF does provide it
     * @param displacement the displacement (i.e. advance) of the glyph in text space
     * @throws IOException if the glyph cannot be processed
    protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode,
                             Vector displacement) throws IOException
        if (font instanceof PDType3Font)
            showType3Glyph(textRenderingMatrix, (PDType3Font)font, code, unicode, displacement);
            showFontGlyph(textRenderingMatrix, font, code, unicode, displacement);

     * Called when a glyph is to be processed.This method is intended for overriding in subclasses,
     * the default implementation does nothing.
     * @param textRenderingMatrix the current text rendering matrix, T<sub>rm</sub>
     * @param font the current font
     * @param code internal PDF character code for the glyph
     * @param unicode the Unicode text for this glyph, or null if the PDF does provide it
     * @param displacement the displacement (i.e. advance) of the glyph in text space
     * @throws IOException if the glyph cannot be processed
    protected void showFontGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode,
                                 Vector displacement) throws IOException
        // overridden in subclasses

     * Called when a glyph is to be processed.This method is intended for overriding in subclasses,
     * the default implementation does nothing.
     * @param textRenderingMatrix the current text rendering matrix, T<sub>rm</sub>
     * @param font the current font
     * @param code internal PDF character code for the glyph
     * @param unicode the Unicode text for this glyph, or null if the PDF does provide it
     * @param displacement the displacement (i.e. advance) of the glyph in text space
     * @throws IOException if the glyph cannot be processed
    protected void showType3Glyph(Matrix textRenderingMatrix, PDType3Font font, int code,
                                  String unicode, Vector displacement) throws IOException
        PDType3CharProc charProc = font.getCharProc(code);
        if (charProc != null)
            processType3Stream(charProc, textRenderingMatrix);

     * This is used to handle an operation.
     * @param operation The operation to perform.
     * @param arguments The list of arguments.
     * @throws IOException If there is an error processing the operation.
    public void processOperator(String operation, List<COSBase> arguments) throws IOException
        Operator operator = Operator.getOperator(operation);
        processOperator(operator, arguments);

     * This is used to handle an operation.
     * @param operator The operation to perform.
     * @param arguments The list of arguments.
     * @throws IOException If there is an error processing the operation.
    protected void processOperator(Operator operator, List<COSBase> arguments) throws IOException
        String name = operator.getName();
        OperatorProcessor processor = operators.get(name);
        if (processor != null)
            processor.process(operator, arguments);
            unsupportedOperator(operator, arguments);

     * Called when an unsupported operator is encountered.
     * @param operator The unknown operator.
     * @param arguments The list of arguments.
    protected void unsupportedOperator(Operator operator, List<COSBase> arguments) throws IOException
        // overridden in subclasses

     * Pushes the current graphics state to the stack.
    public void saveGraphicsState()

     * Pops the current graphics state from the stack.
    public void restoreGraphicsState()

     * @return Returns the size of the graphicsStack.
    public int getGraphicsStackSize()
        return graphicsStack.size();

     * @return Returns the graphicsState.
    public PDGraphicsState getGraphicsState()
        return graphicsStack.peek();

     * @return Returns the textLineMatrix.
    public Matrix getTextLineMatrix()
        return textLineMatrix;

     * @param value The textLineMatrix to set.
    public void setTextLineMatrix(Matrix value)
        textLineMatrix = value;

     * @return Returns the textMatrix.
    public Matrix getTextMatrix()
        return textMatrix;

     * @param value The textMatrix to set.
    public void setTextMatrix(Matrix value)
        textMatrix = value;

     * Returns the subStreamMatrix.
    protected Matrix getSubStreamMatrix()
        return subStreamMatrix;
     * Returns the stream' resources.
    public PDResources getResources()
        return resources;

     * Returns the current page.
    public PDPage getCurrentPage()
        return currentPage;

     * use the current transformation matrix to transformPoint a single point.
     * @param x x-coordinate of the point to be transformPoint
     * @param y y-coordinate of the point to be transformPoint
     * @return the transformed coordinates as Point2D.Double
    public Point2D.Double transformedPoint(double x, double y)
        double[] position = { x, y };
                .transform(position, 0, position, 0, 1);
        return new Point2D.Double(position[0], position[1]);
    // transforms a width using the CTM
    protected float transformWidth(float width)
        Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
        float x = ctm.getValue(0, 0) + ctm.getValue(1, 0);
        float y = ctm.getValue(0, 1) + ctm.getValue(1, 1);
        return width * (float)Math.sqrt((x * x + y * y) * 0.5);

Related Classes of org.apache.pdfbox.contentstream.PDFStreamEngine

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact