Package org.pdfclown.samples.cli

Source Code of org.pdfclown.samples.cli.ContentScanningSample

package org.pdfclown.samples.cli;

import org.pdfclown.documents.Document;
import org.pdfclown.documents.Page;
import org.pdfclown.documents.contents.ContentScanner;
import org.pdfclown.documents.contents.objects.ContainerObject;
import org.pdfclown.documents.contents.objects.ContentObject;
import org.pdfclown.documents.contents.objects.InlineImage;
import org.pdfclown.documents.contents.xObjects.ImageXObject;
import org.pdfclown.documents.contents.xObjects.XObject;
import org.pdfclown.files.File;

import java.awt.geom.Dimension2D;
import java.awt.geom.Rectangle2D;

/**
  This sample demonstrates <b>how to retrieve the precise position (page and coordinates)
  of each image</b> within a PDF document, using the page content scanning functionality.
  <h3>Remarks</h3>
  <p>This sample leverages the ContentScanner class, a powerful device for accessing
  each single content object within a page.</p>

  @author Stefano Chizzolini (http://www.stefanochizzolini.it)
  @version 0.1.0
*/
public class ContentScanningSample
  extends Sample
{
  @Override
  public boolean run(
    )
  {
    String filePath = promptPdfFileChoice("Please select a PDF file");

    // 1. Open the PDF file!
    File file;
    try
    {file = new File(filePath);}
    catch(Exception e)
    {throw new RuntimeException(filePath + " file access error.",e);}

    Document document = file.getDocument();

    // 2. Parsing the document...
    System.out.println("\nLooking for images...");
    for(Page page : document.getPages())
    {
      scan(
        new ContentScanner(page), // Wraps the page contents into the scanner.
        page
        );
    }
   
    return true;
  }

  /**
    Scans a content level looking for images.
  */
  /*
    NOTE: Page contents are represented by a sequence of content objects,
    possibly nested into multiple levels.
  */
  private void scan(
    ContentScanner level,
    Page page
    )
  {
    if(level == null)
      return;

    while(level.moveNext())
    {
      ContentObject object = level.getCurrent();
      if(object instanceof ContainerObject)
      {
        // Scan the inner level!
        scan(
          level.getChildLevel(),
          page
          );
      }
      else
      {
        ContentScanner.GraphicsObjectWrapper<?> objectWrapper = level.getCurrentWrapper();
        if(objectWrapper == null)
          continue;

        /*
          NOTE: Images can be represented on a page either as
          external objects (XObject) or inline objects.
        */
        Dimension2D imageSize = null; // Image native size.
        if(objectWrapper instanceof ContentScanner.XObjectWrapper)
        {
          ContentScanner.XObjectWrapper xObjectWrapper = (ContentScanner.XObjectWrapper)objectWrapper;
          XObject xObject = xObjectWrapper.getXObject();
          // Is the external object an image?
          if(xObject instanceof ImageXObject)
          {
            System.out.print(
              "External Image '" + xObjectWrapper.getName() + "' (" + xObject.getBaseObject() + ")" // Image key and indirect reference.
              );
            imageSize = xObject.getSize(); // Image native size.
          }
        }
        else if(objectWrapper instanceof ContentScanner.InlineImageWrapper)
        {
          System.out.print("Inline Image");
          InlineImage inlineImage = ((ContentScanner.InlineImageWrapper)objectWrapper).getInlineImage();
          imageSize = inlineImage.getSize(); // Image native size.
        }

        if(imageSize != null)
        {
          Rectangle2D box = objectWrapper.getBox(); // Image position (location and size) on the page.
          System.out.println(
            " on page " + (page.getIndex() + 1) + " (" + page.getBaseObject() + ")" // Page index and indirect reference.
            );
          System.out.println("  Coordinates:");
          System.out.println("     x: " + Math.round(box.getX()));
          System.out.println("     y: " + Math.round(box.getY()));
          System.out.println("     width: " + Math.round(box.getWidth()) + " (native: " + Math.round(imageSize.getWidth()) + ")");
          System.out.println("     height: " + Math.round(box.getHeight()) + " (native: " + Math.round(imageSize.getHeight()) + ")");
        }
      }
    }
  }
}
TOP

Related Classes of org.pdfclown.samples.cli.ContentScanningSample

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.