}
}
private void extractMetadata(PDDocument document, Metadata metadata)
throws TikaException {
PDDocumentInformation info = document.getDocumentInformation();
metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
addMetadata(metadata, Metadata.TITLE, info.getTitle());
addMetadata(metadata, Metadata.AUTHOR, info.getAuthor());
addMetadata(metadata, Metadata.CREATOR, info.getCreator());
addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords());
addMetadata(metadata, "producer", info.getProducer());
addMetadata(metadata, Metadata.SUBJECT, info.getSubject());
addMetadata(metadata, "trapped", info.getTrapped());
try {
addMetadata(metadata, "created", info.getCreationDate());
addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate());
} catch (IOException e) {
// Invalid date format, just ignore
}
try {
Calendar modified = info.getModificationDate();
addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
} catch (IOException e) {
// Invalid date format, just ignore
}
// All remaining metadata is custom
// Copy this over as-is
List<String> handledMetadata = Arrays.asList(new String[] {
"Author", "Creator", "CreationDate", "ModDate",
"Keywords", "Producer", "Subject", "Title", "Trapped"
});
for(COSName key : info.getDictionary().keySet()) {
String name = key.getName();
if(! handledMetadata.contains(name)) {
addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
}
}
}