InputStreamReader input = new InputStreamReader( new ByteArrayInputStream( contents ) );
// Add the tag-stripped contents as a Reader-valued Text field so it will
// get tokenized and indexed.
document.add(Field.Text("contents", input ));
PDDocumentInformation info = pdfDocument.getDocumentInformation();
if( info.getAuthor() != null )
{
document.add(Field.Text( "Author", info.getAuthor() ) );
}
if( info.getCreationDate() != null )
{
Date date = info.getCreationDate().getTime();
//for some reason lucene cannot handle dates before the epoch
//and throws a nasty RuntimeException, so we will check and
//verify that this does not happen
if( date.getTime() >= 0 )
{
document.add(Field.Text("CreationDate", DateField.dateToString( date ) ) );
}
}
if( info.getCreator() != null )
{
document.add( Field.Text( "Creator", info.getCreator() ) );
}
if( info.getKeywords() != null )
{
document.add( Field.Text( "Keywords", info.getKeywords() ) );
}
if( info.getModificationDate() != null )
{
Date date = info.getModificationDate().getTime();
//for some reason lucene cannot handle dates before the epoch
//and throws a nasty RuntimeException, so we will check and
//verify that this does not happen
if( date.getTime() >= 0 )
{
document.add(Field.Text("ModificationDate", DateField.dateToString( date ) ) );
}
}
if( info.getProducer() != null )
{
document.add( Field.Text( "Producer", info.getProducer() ) );
}
if( info.getSubject() != null )
{
document.add( Field.Text( "Subject", info.getSubject() ) );
}
if( info.getTitle() != null )
{
document.add( Field.Text( "Title", info.getTitle() ) );
}
if( info.getTrapped() != null )
{
document.add( Field.Text( "Trapped", info.getTrapped() ) );
}
int summarySize = Math.min( contents.length, 500 );
// Add the summary as an UnIndexed field, so that it is stored and returned
// with hit documents for display.