Source Code of org.apache.pdfbox.pdmodel.font.PDFont

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel.font;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.afm.AFMParser;
import org.apache.fontbox.afm.FontMetric;
import org.apache.fontbox.cmap.CMapParser;
import org.apache.fontbox.cmap.CMap;


import org.apache.pdfbox.encoding.AFMEncoding;
import org.apache.pdfbox.encoding.DictionaryEncoding;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.encoding.EncodingManager;
import org.apache.pdfbox.encoding.Type1Encoding;
import org.apache.pdfbox.encoding.conversion.CMapSubstitution;


import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;


import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.PDMatrix;
import org.apache.pdfbox.pdmodel.common.PDRectangle;


import org.apache.pdfbox.util.ResourceLoader;


import java.awt.Graphics;
import java.awt.geom.AffineTransform;


import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;


import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;


/**
 * This is the base class for all PDF fonts.
 *
 * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
 * @version $Revision: 1.46 $
 */
public abstract class PDFont implements COSObjectable
{


    /**
     * Log instance.
     */
    private static final Log log = LogFactory.getLog(PDFont.class);


    /**
     * The cos dictionary for this font.
     */
    protected COSDictionary font;


    /**
     * This is only used if this is a font object and it has an encoding.
     */
    private Encoding fontEncoding = null;


    /**
     * This is only used if this is a font object and it has an encoding and it is
     * a type0 font with a cmap.
     */
    protected CMap cmap = null;


    private static Map<String, CMap> cmapObjects =
        Collections.synchronizedMap( new HashMap<String, CMap>() );


    /**
     * The static map of the default Adobe font metrics.
     */
    private static final Map<String, FontMetric> afmObjects =
        Collections.unmodifiableMap( getAdobeFontMetrics() );


    private static Map<String, FontMetric> getAdobeFontMetrics() {
        Map<String, FontMetric> metrics = new HashMap<String, FontMetric>();
        addAdobeFontMetric( metrics, "Courier-Bold" );
        addAdobeFontMetric( metrics, "Courier-BoldOblique" );
        addAdobeFontMetric( metrics, "Courier" );
        addAdobeFontMetric( metrics, "Courier-Oblique" );
        addAdobeFontMetric( metrics, "Helvetica" );
        addAdobeFontMetric( metrics, "Helvetica-Bold" );
        addAdobeFontMetric( metrics, "Helvetica-BoldOblique" );
        addAdobeFontMetric( metrics, "Helvetica-Oblique" );
        addAdobeFontMetric( metrics, "Symbol" );
        addAdobeFontMetric( metrics, "Times-Bold" );
        addAdobeFontMetric( metrics, "Times-BoldItalic" );
        addAdobeFontMetric( metrics, "Times-Italic" );
        addAdobeFontMetric( metrics, "Times-Roman" );
        addAdobeFontMetric( metrics, "ZapfDingbats" );
        return metrics;
    }


    private final static String resourceRootCMAP = "org/apache/pdfbox/resources/cmap/";
    private final static String resourceRootAFM = "org/apache/pdfbox/resources/afm/";


    private static void addAdobeFontMetric(
            Map<String, FontMetric> metrics, String name ) {
        try {
            String resource = resourceRootAFM + name + ".afm";
            InputStream afmStream = ResourceLoader.loadResource( resource );
            if( afmStream != null )
            {
                try {
                    AFMParser parser = new AFMParser( afmStream );
                    parser.parse();
                    metrics.put( name, parser.getResult() );
                } finally {
                    afmStream.close();
                }
            }
        } catch (Exception e) {
            // ignore
        }
    }


    /**
     * This will clear AFM resources that are stored statically.
     * This is usually not a problem unless you want to reclaim
     * resources for a long running process.
     *
     * SPECIAL NOTE: The font calculations are currently in COSObject, which
     * is where they will reside until PDFont is mature enough to take them over.
     * PDFont is the appropriate place for them and not in COSObject but we need font
     * calculations for text extraction.  THIS METHOD WILL BE MOVED OR REMOVED
     * TO ANOTHER LOCATION IN A FUTURE VERSION OF PDFBOX.
     */
    public static void clearResources()
    {
        cmapObjects.clear();
    }


    /**
     * Constructor.
     */
    public PDFont()
    {
        font = new COSDictionary();
        font.setItem( COSName.TYPE, COSName.FONT );
    }


    /**
     * Constructor.
     *
     * @param fontDictionary The font dictionary according to the PDF specification.
     */
    public PDFont( COSDictionary fontDictionary )
    {
        font = fontDictionary;
        determineEncoding();
    }


     private void determineEncoding()
    {
        String cmapName = null;
        COSName encodingName = null;
        COSBase toUnicode = font.getDictionaryObject( COSName.TO_UNICODE );
        COSBase encoding = getEncodingObject(); 
        if( toUnicode != null )
        {
            if ( toUnicode instanceof COSStream )
            {
                try {
                    parseCmap(null, ((COSStream)toUnicode).getUnfilteredStream(), null);
                }
                catch(IOException exception) 
                {
                    log.error("Error: Could not load embedded CMAP" );
                }
            }
            else if ( toUnicode instanceof COSName)
            {
                encodingName = (COSName)toUnicode;
                cmap = cmapObjects.get( encodingName.getName() );
                if (cmap == null) 
                {
                    cmapName = encodingName.getName();
                }
            }
        }
        if (encoding != null) 
        {
            if (encoding instanceof COSName) 
            {
                if (cmap == null)
                {
                    encodingName = (COSName)encoding;
                    cmap = cmapObjects.get( encodingName.getName() );
                    if (cmap == null) 
                    {
                        cmapName = encodingName.getName();
                    }
                }
                if (cmap == null && cmapName != null)
                {
                    try 
                    {
                        fontEncoding =
                            EncodingManager.INSTANCE.getEncoding(encodingName);
                    }
                    catch(IOException exception) 
                    {
                        log.debug("Debug: Could not find encoding for " + encodingName );
                    }
                }
            }
            else if (encoding instanceof COSDictionary) 
            {
                try 
                {
                    fontEncoding = new DictionaryEncoding((COSDictionary)encoding);
                }
                catch(IOException exception) 
                {
                    log.error("Error: Could not create the DictionaryEncoding" );
                }
            }
            else if(encoding instanceof COSStream )
            {
                if (cmap == null)
                {
                    COSStream encodingStream = (COSStream)encoding;
                    try 
                    {
                        parseCmap( null, encodingStream.getUnfilteredStream(), null );
                    }
                    catch(IOException exception) 
                    {
                        log.error("Error: Could not parse the embedded CMAP" );
                    }
                }
            }
        }
        COSDictionary cidsysteminfo = (COSDictionary)font.getDictionaryObject(COSName.CIDSYSTEMINFO);
        if (cidsysteminfo != null) 
        {
            String ordering = cidsysteminfo.getString(COSName.ORDERING);
            String registry = cidsysteminfo.getString(COSName.REGISTRY);
            int supplement = cidsysteminfo.getInt(COSName.SUPPLEMENT);
            cmapName = registry + "-" + ordering+ "-" + supplement;
            cmapName = CMapSubstitution.substituteCMap( cmapName );
            cmap = cmapObjects.get( cmapName );
        }
        FontMetric metric = getAFM();
        if( metric != null )
        {
            fontEncoding = new AFMEncoding( metric );
        }
        
        if (cmap == null && cmapName != null) 
        {
            String resourceName = resourceRootCMAP + cmapName;
            try {
                parseCmap( resourceRootCMAP, ResourceLoader.loadResource( resourceName ), encodingName );
                if( cmap == null && encodingName == null)
                {
                    log.error("Error: Could not parse predefined CMAP file for '" + cmapName + "'" );
                }
            }
            catch(IOException exception) 
            {
                log.error("Error: Could not find predefined CMAP file for '" + cmapName + "'" );
            }
        }
        getEncodingFromFont();
    }


    /**
     * {@inheritDoc}
     */
    public COSBase getCOSObject()
    {
        return font;
    }


    /**
     * This will get the font width for a character.
     *
     * @param c The character code to get the width for.
     * @param offset The offset into the array.
     * @param length The length of the data.
     *
     * @return The width is in 1000 unit of text space, ie 333 or 777
     *
     * @throws IOException If an error occurs while parsing.
     */
    public abstract float getFontWidth( byte[] c, int offset, int length ) throws IOException;


    /**
     * This will get the font width for a character.
     *
     * @param c The character code to get the width for.
     * @param offset The offset into the array.
     * @param length The length of the data.
     *
     * @return The width is in 1000 unit of text space, ie 333 or 777
     *
     * @throws IOException If an error occurs while parsing.
     */
    public abstract float getFontHeight( byte[] c, int offset, int length ) throws IOException;


    /**
     * This will get the width of this string for this font.
     *
     * @param string The string to get the width of.
     *
     * @return The width of the string in 1000 units of text space, ie 333 567...
     *
     * @throws IOException If there is an error getting the width information.
     */
    public float getStringWidth( String string ) throws IOException
    {
        byte[] data = string.getBytes("ISO-8859-1");
        float totalWidth = 0;
        for( int i=0; i<data.length; i++ )
        {
            totalWidth+=getFontWidth( data, i, 1 );
        }
        return totalWidth;
    }


    /**
     * This will get the average font width for all characters.
     *
     * @return The width is in 1000 unit of text space, ie 333 or 777
     *
     * @throws IOException If an error occurs while parsing.
     */
    public abstract float getAverageFontWidth() throws IOException;


    /**
     * This will draw a string on a canvas using the font.
     *
     * @param string The string to draw.
     * @param g The graphics to draw onto.
     * @param fontSize The size of the font to draw.
     * @param at The transformation matrix with all infos for scaling and shearing of the font.
     * @param x The x coordinate to draw at.
     * @param y The y coordinate to draw at.
     *
     * @throws IOException If there is an error drawing the specific string.
     */
    public abstract void drawString( String string, Graphics g, float fontSize,
        AffineTransform at, float x, float y ) throws IOException;


    /**
     * Used for multibyte encodings.
     *
     * @param data The array of data.
     * @param offset The offset into the array.
     * @param length The number of bytes to use.
     *
     * @return The int value of data from the array.
     */
    protected int getCodeFromArray( byte[] data, int offset, int length )
    {
        int code = 0;
        for( int i=0; i<length; i++ )
        {
            code <<= 8;
            code |= (data[offset+i]+256)%256;
        }
        return code;
    }


    /**
     * This will attempt to get the font width from an AFM file.
     *
     * @param code The character code we are trying to get.
     *
     * @return The font width from the AFM file.
     *
     * @throws IOException if we cannot find the width.
     */
    protected float getFontWidthFromAFMFile( int code ) throws IOException
    {
        float retval = 0;
        FontMetric metric = getAFM();
        if( metric != null )
        {
            Encoding encoding = getEncoding();
            String characterName = encoding.getName( code );
            retval = metric.getCharacterWidth( characterName );
        }
        return retval;
    }


    /**
     * This will attempt to get the average font width from an AFM file.
     *
     * @return The average font width from the AFM file.
     *
     * @throws IOException if we cannot find the width.
     */
    protected float getAverageFontWidthFromAFMFile() throws IOException
    {
        float retval = 0;
        FontMetric metric = getAFM();
        if( metric != null )
        {
            retval = metric.getAverageCharacterWidth();
        }
        return retval;
    }


    /**
     * This will get an AFM object if one exists.
     *
     * @return The afm object from the name.
     *
     */
    protected FontMetric getAFM()
    {
        if(afm==null){
            COSBase baseFont = font.getDictionaryObject( COSName.BASE_FONT );
            String name = null;
            if( baseFont instanceof COSName )
            {
                name = ((COSName)baseFont).getName();
                if (name.indexOf("+") > -1)
                {
                    name = name.substring(name.indexOf("+")+1);
                }


            }
            else if( baseFont instanceof COSString )
            {
                COSString string = (COSString)baseFont;
                name = string.getString();
            }
            if( name != null )
            {
                afm = afmObjects.get( name );
            }
        }
        return afm;
    }


    private FontMetric afm = null;
    
    private COSBase encodingObject = null;
    /**
     * cache the {@link COSName#ENCODING} object from
     * the font's dictionary since it is called so often.
     * <p>
     * Use this method instead of
     * <pre>
     *   font.getDictionaryObject(COSName.ENCODING);
     * </pre>
     * @return
     */
    private COSBase getEncodingObject(){
      if(encodingObject==null){
        encodingObject = font.getDictionaryObject( COSName.ENCODING );
      }
      return encodingObject;
    }
    
    /**
     * This will perform the encoding of a character if needed.
     *
     * @param c The character to encode.
     * @param offset The offset into the array to get the data
     * @param length The number of bytes to read.
     *
     * @return The value of the encoded character.
     *
     * @throws IOException If there is an error during the encoding.
     */
    public String encode( byte[] c, int offset, int length ) throws IOException
    {
        String retval = null;
        if( cmap != null )
        {
            if (length == 1 && cmap.hasOneByteMappings()) 
            {
                retval = cmap.lookup( c, offset, length );
            }
            else if (length == 2 && cmap.hasTwoByteMappings())
            {
                retval = cmap.lookup( c, offset, length );
            }
        }
        
        // there is no cmap but probably an encoding with a suitable mapping
        if( retval == null )
        {
            Encoding encoding = getEncoding();
            if( encoding != null )
            {
                retval = encoding.getCharacter( getCodeFromArray( c, offset, length ) );
            }
            if( retval == null && (cmap == null || length == 2))
            {
                retval = getStringFromArray( c, offset, length );
            }
        }
        return retval;
    }


    private static final String[] SINGLE_CHAR_STRING = new String[256];
    private static final String[][] DOUBLE_CHAR_STRING = new String[256][256];
    static
    {
        for( int i=0; i<256; i++ )
        {
            SINGLE_CHAR_STRING[i] = new String( new byte[] {(byte)i} );
            for( int j=0; j<256; j++ )
            {
                try
                {
                    DOUBLE_CHAR_STRING[i][j] = new String( new byte[] {(byte)i, (byte)j}, "UTF-16BE" );
                }
                catch (UnsupportedEncodingException e)
                {
                    // Nothing should happen here
                    e.printStackTrace();
                }
            }
        }
    }


    private static String getStringFromArray( byte[] c, int offset, int length ) throws IOException
    {
        String retval = null;
        if( length == 1 )
        {
            retval = SINGLE_CHAR_STRING[(c[offset]+256)%256];
        }
        else if( length == 2 )
        {
            retval = DOUBLE_CHAR_STRING[(c[offset]+256)%256][(c[offset+1]+256)%256];
        }
        else
        {
            throw new IOException( "Error:Unknown character length:" + length );
        }
        return retval;
    }


    private void parseCmap( String cmapRoot, InputStream cmapStream, COSName encodingName )
    {
        if( cmapStream != null )
        {
            CMapParser parser = new CMapParser();
            try 
            {
                cmap = parser.parse( cmapRoot, cmapStream );
                if( encodingName != null )
                {
                    cmapObjects.put( encodingName.getName(), cmap );
                }
            }
            catch (IOException exception) {}
        }
    }


    /**
     * The will set the encoding for this font.
     *
     * @param enc The font encoding.
     */
    public void setEncoding( Encoding enc )
    {
        font.setItem( COSName.ENCODING, enc );
        fontEncoding = enc;
    }


    /**
     * This will get or create the encoder.
     *
     * modified by Christophe Huault : DGBS Strasbourg huault@free.fr october 2004
     *
     * @return The encoding to use.
     *
     * @throws IOException If there is an error getting the encoding.
     */
    public Encoding getEncoding() throws IOException
    {
        return fontEncoding;
    }


    /**
     * This will always return "Font" for fonts.
     *
     * @return The type of object that this is.
     */
    public String getType()
    {
        return font.getNameAsString( COSName.TYPE );
    }


    // Memorized values to avoid repeated dictionary lookups
    private String subtype = null;
    private boolean type1Font;
    private boolean trueTypeFont;
    private boolean typeFont;


    /**
     * This will get the subtype of font, Type1, Type3, ...
     *
     * @return The type of font that this is.
     */
    public String getSubType()
    {
        if (subtype == null) {
            subtype = font.getNameAsString( COSName.SUBTYPE );
            type1Font = "Type1".equals(subtype);
            trueTypeFont = "TrueType".equals(subtype);
            typeFont = type1Font || "Type0".equals(subtype) || trueTypeFont;
        }
        return subtype;
    }


    private boolean isType1Font() {
        getSubType();
        return type1Font;
    }


    private boolean isTrueTypeFont() {
        getSubType();
        return trueTypeFont;
    }


    private boolean isTypeFont() {
        getSubType();
        return typeFont;
    }


    /**
     * The PostScript name of the font.
     *
     * @return The postscript name of the font.
     */
    public String getBaseFont()
    {
        return font.getNameAsString( COSName.BASE_FONT );
    }


    /**
     * Set the PostScript name of the font.
     *
     * @param baseFont The postscript name for the font.
     */
    public void setBaseFont( String baseFont )
    {
        font.setName( COSName.BASE_FONT, baseFont );
    }


    /**
     * The code for the first char or -1 if there is none.
     *
     * @return The code for the first character.
     */
    public int getFirstChar()
    {
        return font.getInt( COSName.FIRST_CHAR, -1 );
    }


    /**
     * Set the first character this font supports.
     *
     * @param firstChar The first character.
     */
    public void setFirstChar( int firstChar )
    {
        font.setInt( COSName.FIRST_CHAR, firstChar );
    }


    /**
     * The code for the last char or -1 if there is none.
     *
     * @return The code for the last character.
     */
    public int getLastChar()
    {
        return font.getInt( COSName.LAST_CHAR, -1 );
    }


    /**
     * Set the last character this font supports.
     *
     * @param lastChar The last character.
     */
    public void setLastChar( int lastChar )
    {
        font.setInt( COSName.LAST_CHAR, lastChar );
    }


    /**
     * The widths of the characters.  This will be null for the standard 14 fonts.
     *
     * @return The widths of the characters.
     */
    public List getWidths()
    {
        COSArray array = (COSArray)font.getDictionaryObject( COSName.WIDTHS );
        return COSArrayList.convertFloatCOSArrayToList( array );
    }


    /**
     * Set the widths of the characters code.
     *
     * @param widths The widths of the character codes.
     */
    public void setWidths( List widths )
    {
        font.setItem( COSName.WIDTHS, COSArrayList.converterToCOSArray( widths ) );
    }


    /**
     * This will get the matrix that is used to transform glyph space to
     * text space.  By default there are 1000 glyph units to 1 text space
     * unit, but type3 fonts can use any value.
     *
     * Note:If this is a type3 font then it can be modified via the PDType3Font.setFontMatrix, otherwise this
     * is a read-only property.
     *
     * @return The matrix to transform from glyph space to text space.
     */
    public PDMatrix getFontMatrix()
    {
        PDMatrix matrix = null;
        COSArray array = (COSArray)font.getDictionaryObject( COSName.FONT_MATRIX );
        if( array == null )
        {
            array = new COSArray();
            array.add( new COSFloat( 0.001f ) );
            array.add( COSInteger.ZERO );
            array.add( COSInteger.ZERO );
            array.add( new COSFloat( 0.001f ) );
            array.add( COSInteger.ZERO );
            array.add( COSInteger.ZERO );
        }
        matrix = new PDMatrix(array);


        return matrix;
    }


    /**
     * Tries to get the encoding for the type1 font.
     *
     */
    private void getEncodingFromFont()
    {
        // This whole section of code needs to be replaced with an actual type1 font parser!!
        // Get the font program from the embedded type font.
        if (isType1Font()) {
            COSDictionary fontDescriptor = (COSDictionary) font.getDictionaryObject(
                COSName.FONT_DESC);
            if( fontDescriptor != null )
            {
                COSStream fontFile = (COSStream) fontDescriptor.getDictionaryObject(
                    COSName.FONT_FILE);
                if( fontFile != null )
                {
                    try 
                    {
                        BufferedReader in =
                                new BufferedReader(new InputStreamReader(fontFile.getUnfilteredStream()));
                        
                        // this section parses the font program stream searching for a /Encoding entry
                        // if it contains an array of values a Type1Encoding will be returned
                        // if it encoding contains an encoding name the corresponding Encoding will be returned
                        String line = "";
                        Type1Encoding encoding = null;
                        while( (line = in.readLine()) != null)
                        {
                            if (line.startsWith("currentdict end")) {
                                if (encoding != null)
                                    fontEncoding = encoding;
                                break;
                            }
                            if (line.startsWith("/Encoding")) 
                            {
                                if(line.endsWith("array")) 
                                {
                                    StringTokenizer st = new StringTokenizer(line);
                                    // ignore the first token
                                    st.nextElement();
                                    int arraySize = Integer.parseInt(st.nextToken());
                                    encoding = new Type1Encoding(arraySize);
                                }
                                // if there is already an encoding, we don't need to
                                // assign another one
                                else if (fontEncoding == null)
                                {
                                    StringTokenizer st = new StringTokenizer(line);
                                    // ignore the first token
                                    st.nextElement();
                                    String type1Encoding = st.nextToken();
                                    fontEncoding =
                                        EncodingManager.INSTANCE.getEncoding(
                                                COSName.getPDFName(type1Encoding));
                                    break;
                                }
                            }
                            else if (line.startsWith("dup")) {
                                StringTokenizer st = new StringTokenizer(line);
                                // ignore the first token
                                st.nextElement();
                                int index = Integer.parseInt(st.nextToken());
                                String name = st.nextToken();
                                encoding.addCharacterEncoding(index, name.replace("/", ""));
                            }
                        }
                        in.close();
                    }
                    catch(IOException exception) 
                    {
                        log.error("Error: Could not extract the encoding from the embedded type1 font.");
                    }
                }
            }
        }
    }


    /**
     * This will get the fonts bounding box.
     *
     * @return The fonts bounding box.
     *
     * @throws IOException If there is an error getting the bounding box.
     */
    public abstract PDRectangle getFontBoundingBox() throws IOException;


    /**
     * {@inheritDoc}
     */
    public boolean equals( Object other )
    {
        return other instanceof PDFont && ((PDFont)other).getCOSObject() == this.getCOSObject();
    }


    /**
     * {@inheritDoc}
     */
    public int hashCode()
    {
        return this.getCOSObject().hashCode();
    }


}
Source Code of org.apache.pdfbox.pdmodel.font.PDFont

Related Classes of org.apache.pdfbox.pdmodel.font.PDFont