Package org.pdfbox.pdmodel

Examples of org.pdfbox.pdmodel.PDDocumentInformation


     *
     * @throws IOException If there is an error getting the page count.
     */
    public void printMetadata( PDDocument document ) throws IOException
    {
        PDDocumentInformation info = document.getDocumentInformation();
        System.out.println( "Page Count=" + document.getPageCount() );
        System.out.println( "Title=" + info.getTitle() );
        System.out.println( "Author=" + info.getAuthor() );
        System.out.println( "Subject=" + info.getSubject() );
        System.out.println( "Keywords=" + info.getKeywords() );
        System.out.println( "Creator=" + info.getCreator() );
        System.out.println( "Producer=" + info.getProducer() );
        System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) );
        System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) );
        System.out.println( "Trapped=" + info.getTrapped() );
    }
View Full Code Here


            InputStreamReader input = new InputStreamReader( new ByteArrayInputStream( contents ) );
            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            document.add(Field.Text("contents", input ));

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info.getAuthor() != null )
            {
                document.add(Field.Text( "Author", info.getAuthor() ) );
            }
            if( info.getCreationDate() != null )
            {
                Date date = info.getCreationDate().getTime();
                //for some reason lucene cannot handle dates before the epoch
                //and throws a nasty RuntimeException, so we will check and
                //verify that this does not happen
                if( date.getTime() >= 0 )
                {
                    document.add(Field.Text("CreationDate", DateField.dateToString( date ) ) );
                }
            }
            if( info.getCreator() != null )
            {
                document.add( Field.Text( "Creator", info.getCreator() ) );
            }
            if( info.getKeywords() != null )
            {
                document.add( Field.Text( "Keywords", info.getKeywords() ) );
            }
            if( info.getModificationDate() != null )
            {
                Date date = info.getModificationDate().getTime();
                //for some reason lucene cannot handle dates before the epoch
                //and throws a nasty RuntimeException, so we will check and
                //verify that this does not happen
                if( date.getTime() >= 0 )
                {
                    document.add(Field.Text("ModificationDate", DateField.dateToString( date ) ) );
                }
            }
            if( info.getProducer() != null )
            {
                document.add( Field.Text( "Producer", info.getProducer() ) );
            }
            if( info.getSubject() != null )
            {
                document.add( Field.Text( "Subject", info.getSubject() ) );
            }
            if( info.getTitle() != null )
            {
                document.add( Field.Text( "Title", info.getTitle() ) );
            }
            if( info.getTrapped() != null )
            {
                document.add( Field.Text( "Trapped", info.getTrapped() ) );
            }

            int summarySize = Math.min( contents.length, 500 );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
View Full Code Here

TOP

Related Classes of org.pdfbox.pdmodel.PDDocumentInformation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.