Package org.pdfbox.util

Examples of org.pdfbox.util.PDFTextStripperByArea.extractRegions()


                stripper.setSortByPosition( true );
                Rectangle rect = new Rectangle( 10, 280, 275, 60 );
                stripper.addRegion( "class1", rect );
                List allPages = document.getDocumentCatalog().getAllPages();
                PDPage firstPage = (PDPage)allPages.get( 0 );
                stripper.extractRegions( firstPage );
                System.out.println( "Text in the area:" + rect );
                System.out.println( stripper.getTextForRegion( "class1" ) );
               
            }
            finally
View Full Code Here


                            Rectangle2D.Float awtRect = new Rectangle2D.Float( x,y,width,height );
                            stripper.addRegion( "" + j, awtRect );
                        }
                    }
                   
                    stripper.extractRegions( page );
                   
                    for( int j=0; j<annotations.size(); j++ )
                    {
                        PDAnnotation annot = (PDAnnotation)annotations.get( j );
                        if( annot instanceof PDAnnotationLink )
View Full Code Here

        try {
          final PDFTextStripperByArea stripper = new PDFTextStripperByArea();
          final List linkAnnotations = new ArrayList();
          final List linkRegions = new ArrayList();
            extractAnnotations(page, stripper, linkAnnotations, linkRegions);
            stripper.extractRegions(page);
            final Map uriMap = new HashMap();
            final Map textMap = new HashMap();
            collateLinks(linkAnnotations, linkRegions, uriMap, textMap, stripper);
            final Iterator it = uriMap.keySet().iterator();
            while (it.hasNext()) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.