/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.font;
import org.apache.fontbox.afm.AFMParser;
import org.apache.fontbox.afm.FontMetric;
import org.apache.fontbox.cmap.CMapParser;
import org.apache.fontbox.cmap.CMap;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.PDMatrix;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.util.ResourceLoader;
import java.awt.Graphics;
import java.awt.geom.AffineTransform;
import java.io.InputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* This is the base class for all PDF fonts.
*
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.46 $
*/
public abstract class PDFont implements COSObjectable
{
/**
* The cos dictionary for this font.
*/
protected COSDictionary font;
/**
* This is only used if this is a font object and it has an encoding.
*/
private Encoding fontEncoding = null;
/**
* The descriptor of the font.
*/
private PDFontDescriptor fontDescriptor = null;
/**
* The font matrix.
*/
protected PDMatrix fontMatrix = null;
/**
* This is only used if this is a font object and it has an encoding and it is
* a type0 font with a cmap.
*/
protected CMap cmap = null;
/**
* The CMap holding the ToUnicode mapping
*/
protected CMap toUnicodeCmap = null;
private boolean hasToUnicode = false;
protected static Map<String, CMap> cmapObjects =
Collections.synchronizedMap( new HashMap<String, CMap>() );
/**
* A list a floats representing the widths.
*/
private List<Float> widths = null;
/**
* The static map of the default Adobe font metrics.
*/
private static final Map<String, FontMetric> afmObjects =
Collections.unmodifiableMap( getAdobeFontMetrics() );
// TODO move the Map to PDType1Font as these are the 14 Standard fonts
// which are definitely Type 1 fonts
private static Map<String, FontMetric> getAdobeFontMetrics()
{
Map<String, FontMetric> metrics = new HashMap<String, FontMetric>();
addAdobeFontMetric( metrics, "Courier-Bold" );
addAdobeFontMetric( metrics, "Courier-BoldOblique" );
addAdobeFontMetric( metrics, "Courier" );
addAdobeFontMetric( metrics, "Courier-Oblique" );
addAdobeFontMetric( metrics, "Helvetica" );
addAdobeFontMetric( metrics, "Helvetica-Bold" );
addAdobeFontMetric( metrics, "Helvetica-BoldOblique" );
addAdobeFontMetric( metrics, "Helvetica-Oblique" );
addAdobeFontMetric( metrics, "Symbol" );
addAdobeFontMetric( metrics, "Times-Bold" );
addAdobeFontMetric( metrics, "Times-BoldItalic" );
addAdobeFontMetric( metrics, "Times-Italic" );
addAdobeFontMetric( metrics, "Times-Roman" );
addAdobeFontMetric( metrics, "ZapfDingbats" );
return metrics;
}
protected final static String resourceRootCMAP = "org/apache/pdfbox/resources/cmap/";
private final static String resourceRootAFM = "org/apache/pdfbox/resources/afm/";
private static void addAdobeFontMetric(
Map<String, FontMetric> metrics, String name )
{
try
{
String resource = resourceRootAFM + name + ".afm";
InputStream afmStream = ResourceLoader.loadResource( resource );
if( afmStream != null )
{
try
{
AFMParser parser = new AFMParser( afmStream );
parser.parse();
metrics.put( name, parser.getResult() );
}
finally
{
afmStream.close();
}
}
}
catch (Exception e)
{
// ignore
}
}
/**
* This will clear AFM resources that are stored statically.
* This is usually not a problem unless you want to reclaim
* resources for a long running process.
*
* SPECIAL NOTE: The font calculations are currently in COSObject, which
* is where they will reside until PDFont is mature enough to take them over.
* PDFont is the appropriate place for them and not in COSObject but we need font
* calculations for text extraction. THIS METHOD WILL BE MOVED OR REMOVED
* TO ANOTHER LOCATION IN A FUTURE VERSION OF PDFBOX.
*/
public static void clearResources()
{
cmapObjects.clear();
}
/**
* Constructor.
*/
public PDFont()
{
font = new COSDictionary();
font.setItem( COSName.TYPE, COSName.FONT );
}
/**
* Constructor.
*
* @param fontDictionary The font dictionary according to the PDF specification.
*/
public PDFont( COSDictionary fontDictionary )
{
font = fontDictionary;
determineEncoding();
}
/**
* This will get the font descriptor for this font.
*
* @return The font descriptor for this font.
*
*/
public PDFontDescriptor getFontDescriptor()
{
if(fontDescriptor == null)
{
COSDictionary fd = (COSDictionary)font.getDictionaryObject( COSName.FONT_DESC );
if (fd != null)
{
fontDescriptor = new PDFontDescriptorDictionary( fd );
}
else
{
getAFM();
if( afm != null )
{
fontDescriptor = new PDFontDescriptorAFM( afm );
}
}
}
return fontDescriptor;
}
/**
* This will set the font descriptor.
*
* @param fdDictionary The font descriptor.
*/
public void setFontDescriptor( PDFontDescriptorDictionary fdDictionary )
{
COSDictionary dic = null;
if( fdDictionary != null )
{
dic = fdDictionary.getCOSDictionary();
}
font.setItem( COSName.FONT_DESC, dic );
fontDescriptor = fdDictionary;
}
/**
* Determines the encoding for the font.
* This method as to be overwritten, as there are different
* possibilities to define a mapping.
*/
protected abstract void determineEncoding();
/**
* {@inheritDoc}
*/
public COSBase getCOSObject()
{
return font;
}
/**
* This will get the font width for a character.
*
* @param c The character code to get the width for.
* @param offset The offset into the array.
* @param length The length of the data.
*
* @return The width is in 1000 unit of text space, ie 333 or 777
*
* @throws IOException If an error occurs while parsing.
*/
public abstract float getFontWidth( byte[] c, int offset, int length ) throws IOException;
/**
* This will get the font width for a character.
*
* @param c The character code to get the width for.
* @param offset The offset into the array.
* @param length The length of the data.
*
* @return The width is in 1000 unit of text space, ie 333 or 777
*
* @throws IOException If an error occurs while parsing.
*/
public abstract float getFontHeight( byte[] c, int offset, int length ) throws IOException;
/**
* This will get the width of this string for this font.
*
* @param string The string to get the width of.
*
* @return The width of the string in 1000 units of text space, ie 333 567...
*
* @throws IOException If there is an error getting the width information.
*/
public float getStringWidth( String string ) throws IOException
{
byte[] data = string.getBytes("ISO-8859-1");
float totalWidth = 0;
for( int i=0; i<data.length; i++ )
{
totalWidth+=getFontWidth( data, i, 1 );
}
return totalWidth;
}
/**
* This will get the average font width for all characters.
*
* @return The width is in 1000 unit of text space, ie 333 or 777
*
* @throws IOException If an error occurs while parsing.
*/
public abstract float getAverageFontWidth() throws IOException;
/**
* This will draw a string on a canvas using the font.
*
* @param string The string to draw.
* @param g The graphics to draw onto.
* @param fontSize The size of the font to draw.
* @param at The transformation matrix with all information for scaling and shearing of the font.
* @param x The x coordinate to draw at.
* @param y The y coordinate to draw at.
*
* @throws IOException If there is an error drawing the specific string.
* @deprecated use {@link PDFont#drawString(String, int[], Graphics, float, AffineTransform, float, float)} instead
*/
public void drawString( String string, Graphics g, float fontSize, AffineTransform at, float x, float y )
throws IOException
{
drawString(string, null, g, fontSize, at, x, y);
}
/**
* This will draw a string on a canvas using the font.
*
* @param string The string to draw.
* @param codePoints The codePoints of the given string.
* @param g The graphics to draw onto.
* @param fontSize The size of the font to draw.
* @param at The transformation matrix with all information for scaling and shearing of the font.
* @param x The x coordinate to draw at.
* @param y The y coordinate to draw at.
*
* @throws IOException If there is an error drawing the specific string.
*/
public abstract void drawString( String string, int[] codePoints, Graphics g, float fontSize,
AffineTransform at, float x, float y ) throws IOException;
/**
* Used for multibyte encodings.
*
* @param data The array of data.
* @param offset The offset into the array.
* @param length The number of bytes to use.
*
* @return The int value of data from the array.
*/
public int getCodeFromArray( byte[] data, int offset, int length )
{
int code = 0;
for( int i=0; i<length; i++ )
{
code <<= 8;
code |= (data[offset+i]+256)%256;
}
return code;
}
/**
* This will attempt to get the font width from an AFM file.
*
* @param code The character code we are trying to get.
*
* @return The font width from the AFM file.
*
* @throws IOException if we cannot find the width.
*/
protected float getFontWidthFromAFMFile( int code ) throws IOException
{
float retval = 0;
FontMetric metric = getAFM();
if( metric != null )
{
String characterName = fontEncoding.getName( code );
retval = metric.getCharacterWidth( characterName );
}
return retval;
}
/**
* This will attempt to get the average font width from an AFM file.
*
* @return The average font width from the AFM file.
*
* @throws IOException if we cannot find the width.
*/
protected float getAverageFontWidthFromAFMFile() throws IOException
{
float retval = 0;
FontMetric metric = getAFM();
if( metric != null )
{
retval = metric.getAverageCharacterWidth();
}
return retval;
}
/**
* This will get an AFM object if one exists.
*
* @return The afm object from the name.
*
*/
protected FontMetric getAFM()
{
if (isType1Font() && afm == null)
{
COSBase baseFont = font.getDictionaryObject( COSName.BASE_FONT );
String name = null;
if( baseFont instanceof COSName )
{
name = ((COSName)baseFont).getName();
if (name.indexOf("+") > -1)
{
name = name.substring(name.indexOf("+")+1);
}
}
else if ( baseFont instanceof COSString )
{
COSString string = (COSString)baseFont;
name = string.getString();
}
if ( name != null )
{
afm = afmObjects.get( name );
}
}
return afm;
}
private FontMetric afm = null;
private COSBase encoding = null;
/**
* cache the {@link COSName#ENCODING} object from
* the font's dictionary since it is called so often.
* <p>
* Use this method instead of
* <pre>
* font.getDictionaryObject(COSName.ENCODING);
* </pre>
* @return the encoding
*/
protected COSBase getEncoding()
{
if (encoding == null)
{
encoding = font.getDictionaryObject( COSName.ENCODING );
}
return encoding;
}
/**
* Set the encoding object from the fonts dictionary.
* @param encoding the given encoding.
*/
protected void setEncoding(COSBase encodingValue)
{
font.setItem( COSName.ENCODING, encodingValue );
encoding = encodingValue;
}
/**
* Encode the given value using the CMap of the font.
*
* @param code the code to encode.
* @param length the byte length of the given code.
* @param isCIDFont indicates that the used font is a CID font.
*
* @return The value of the encoded character.
* @throws IOException if something went wrong
*/
protected String cmapEncoding( int code, int length, boolean isCIDFont, CMap sourceCmap ) throws IOException
{
String retval = null;
if (sourceCmap != null)
{
retval = sourceCmap.lookup(code, length);
if (retval == null && isCIDFont)
{
retval = sourceCmap.lookupCID(code);
}
}
return retval;
}
/**
* This will perform the encoding of a character if needed.
*
* @param c The character to encode.
* @param offset The offset into the array to get the data
* @param length The number of bytes to read.
*
* @return The value of the encoded character.
*
* @throws IOException If there is an error during the encoding.
*/
public String encode( byte[] c, int offset, int length ) throws IOException
{
String retval = null;
int code = getCodeFromArray( c, offset, length );
if( toUnicodeCmap != null )
{
retval = cmapEncoding(code, length, false, toUnicodeCmap);
}
if( retval == null && cmap != null )
{
retval = cmapEncoding(code, length, false, cmap);
}
// there is no cmap but probably an encoding with a suitable mapping
if( retval == null )
{
if( fontEncoding != null )
{
retval = fontEncoding.getCharacter( code );
}
if( retval == null && (cmap == null || length == 2))
{
retval = getStringFromArray( c, offset, length );
}
}
return retval;
}
public int encodeToCID( byte[] c, int offset, int length ) throws IOException
{
int code = -1;
if (encode(c, offset, length) != null) {
code = getCodeFromArray( c, offset, length );
}
return code;
}
private static final String[] SINGLE_CHAR_STRING = new String[256];
private static final String[][] DOUBLE_CHAR_STRING = new String[256][256];
static
{
for( int i=0; i<256; i++ )
{
try
{
SINGLE_CHAR_STRING[i] = new String( new byte[] {(byte)i}, "ISO-8859-1" );
}
catch (UnsupportedEncodingException e)
{
// Nothing should happen here
e.printStackTrace();
}
for( int j=0; j<256; j++ )
{
try
{
DOUBLE_CHAR_STRING[i][j] = new String( new byte[] {(byte)i, (byte)j}, "UTF-16BE" );
}
catch (UnsupportedEncodingException e)
{
// Nothing should happen here
e.printStackTrace();
}
}
}
}
private static String getStringFromArray( byte[] c, int offset, int length ) throws IOException
{
String retval = null;
if( length == 1 )
{
retval = SINGLE_CHAR_STRING[(c[offset]+256)%256];
}
else if( length == 2 )
{
retval = DOUBLE_CHAR_STRING[(c[offset]+256)%256][(c[offset+1]+256)%256];
}
else
{
throw new IOException( "Error:Unknown character length:" + length );
}
return retval;
}
protected CMap parseCmap( String cmapRoot, InputStream cmapStream)
{
CMap targetCmap = null;
if( cmapStream != null )
{
CMapParser parser = new CMapParser();
try
{
targetCmap = parser.parse( cmapRoot, cmapStream );
// limit the cache to external CMaps
if (cmapRoot != null)
{
cmapObjects.put( targetCmap.getName(), targetCmap );
}
}
catch (IOException exception) {}
}
return targetCmap;
}
/**
* The will set the encoding for this font.
*
* @param enc The font encoding.
*/
public void setFontEncoding( Encoding enc )
{
fontEncoding = enc;
}
/**
* This will get or create the encoder.
*
* @return The encoding to use.
*/
public Encoding getFontEncoding()
{
return fontEncoding;
}
/**
* This will always return "Font" for fonts.
*
* @return The type of object that this is.
*/
public String getType()
{
return font.getNameAsString( COSName.TYPE );
}
// Memorized values to avoid repeated dictionary lookups
private String subtype = null;
private boolean type1Font;
private boolean trueTypeFont;
private boolean typeFont;
private boolean type0Font;
/**
* This will get the subtype of font, Type1, Type3, ...
*
* @return The type of font that this is.
*/
public String getSubType()
{
if (subtype == null)
{
subtype = font.getNameAsString( COSName.SUBTYPE );
type1Font = "Type1".equals(subtype);
trueTypeFont = "TrueType".equals(subtype);
type0Font = "Type0".equals(subtype);
typeFont = type1Font || "Type0".equals(subtype) || trueTypeFont;
}
return subtype;
}
/**
* Determines if the font is a type 1 font.
* @return returns true if the font is a type 1 font
*/
protected boolean isType1Font()
{
getSubType();
return type1Font;
}
/**
* Determines if the font is a type 0 font.
* @return returns true if the font is a type 0 font
*/
protected boolean isType0Font()
{
getSubType();
return type0Font;
}
private boolean isTrueTypeFont()
{
getSubType();
return trueTypeFont;
}
private boolean isTypeFont()
{
getSubType();
return typeFont;
}
/**
* The PostScript name of the font.
*
* @return The postscript name of the font.
*/
public String getBaseFont()
{
return font.getNameAsString( COSName.BASE_FONT );
}
/**
* Set the PostScript name of the font.
*
* @param baseFont The postscript name for the font.
*/
public void setBaseFont( String baseFont )
{
font.setName( COSName.BASE_FONT, baseFont );
}
/**
* The code for the first char or -1 if there is none.
*
* @return The code for the first character.
*/
public int getFirstChar()
{
return font.getInt( COSName.FIRST_CHAR, -1 );
}
/**
* Set the first character this font supports.
*
* @param firstChar The first character.
*/
public void setFirstChar( int firstChar )
{
font.setInt( COSName.FIRST_CHAR, firstChar );
}
/**
* The code for the last char or -1 if there is none.
*
* @return The code for the last character.
*/
public int getLastChar()
{
return font.getInt( COSName.LAST_CHAR, -1 );
}
/**
* Set the last character this font supports.
*
* @param lastChar The last character.
*/
public void setLastChar( int lastChar )
{
font.setInt( COSName.LAST_CHAR, lastChar );
}
/**
* The widths of the characters. This will be null for the standard 14 fonts.
*
* @return The widths of the characters.
*/
public List<Float> getWidths()
{
if (widths == null)
{
COSArray array = (COSArray)font.getDictionaryObject( COSName.WIDTHS );
if (array != null)
{
widths = COSArrayList.convertFloatCOSArrayToList( array );
}
}
return widths;
}
/**
* Set the widths of the characters code.
*
* @param widths The widths of the character codes.
*/
public void setWidths( List<Float> widthsList )
{
widths = widthsList;
font.setItem( COSName.WIDTHS, COSArrayList.converterToCOSArray( widths ) );
}
/**
* This will get the matrix that is used to transform glyph space to
* text space. By default there are 1000 glyph units to 1 text space
* unit, but type3 fonts can use any value.
*
* Note:If this is a type3 font then it can be modified via the PDType3Font.setFontMatrix, otherwise this
* is a read-only property.
*
* @return The matrix to transform from glyph space to text space.
*/
public PDMatrix getFontMatrix()
{
if (fontMatrix == null)
{
COSArray array = (COSArray)font.getDictionaryObject( COSName.FONT_MATRIX );
if( array == null )
{
array = new COSArray();
array.add( new COSFloat( 0.001f ) );
array.add( COSInteger.ZERO );
array.add( COSInteger.ZERO );
array.add( new COSFloat( 0.001f ) );
array.add( COSInteger.ZERO );
array.add( COSInteger.ZERO );
}
fontMatrix = new PDMatrix(array);
}
return fontMatrix;
}
/**
* This will get the fonts bounding box.
*
* @return The fonts bounding box.
*
* @throws IOException If there is an error getting the bounding box.
*/
public abstract PDRectangle getFontBoundingBox() throws IOException;
/**
* {@inheritDoc}
*/
public boolean equals( Object other )
{
return other instanceof PDFont && ((PDFont)other).getCOSObject() == this.getCOSObject();
}
/**
* {@inheritDoc}
*/
public int hashCode()
{
return this.getCOSObject().hashCode();
}
/**
* Determines the width of the given character.
* @param charCode the code of the given character
* @return the width of the character
*/
public float getFontWidth( int charCode )
{
float width = -1;
int firstChar = getFirstChar();
int lastChar = getLastChar();
if (charCode >= firstChar && charCode <= lastChar)
{
// maybe the font doesn't provide any widths
getWidths();
if (widths != null)
{
width = widths.get(charCode-firstChar).floatValue();
}
}
else
{
PDFontDescriptor fd = getFontDescriptor();
if (fd instanceof PDFontDescriptorDictionary)
{
width = fd.getMissingWidth();
}
}
return width;
}
/**
* Determines if a font as a ToUnicode entry.
* @return true if the font has a ToUnicode entry
*/
protected boolean hasToUnicode()
{
return hasToUnicode;
}
/**
* Sets hasToUnicode to the given value.
* @param hasToUnicode the given value for hasToUnicode
*/
protected void setHasToUnicode(boolean hasToUnicodeValue)
{
hasToUnicode = hasToUnicodeValue;
}
}