Package org.apache.poi.hslf.extractor

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor


        try {
            /*
             * create new PowerPointExtractor and extract text and notes
             * of the document
             */
            final PowerPointExtractor pptExtractor = new PowerPointExtractor(new BufferedInputStream(source));
            final String contents = pptExtractor.getText(true, true).trim();
            String title = contents.replaceAll("\r"," ").replaceAll("\n"," ").replaceAll("\t"," ").trim();
            if (title.length() > 80) title = title.substring(0, 80);
            int l = title.length();
            while (true) {
                title = title.replaceAll("  ", " ");
                if (title.length() == l) break;
                l = title.length();
            }
           
            /*
             * create the plasmaParserDocument for the database
             * and set shortText and bodyText properly
             */
            final Document[] docs = new Document[]{new Document(
                    location,
                    mimeType,
                    "UTF-8",
                    this,
                    null,
                    null,
                    title,
                    "", // TODO: AUTHOR
                    pptExtractor.getDocSummaryInformation().getCompany(),
                    null,
                    null,
                    0.0f, 0.0f,
                    UTF8.getBytes(contents),
                    null,
View Full Code Here


                }

                addTextIfAny(xhtml, "footer", extractor.getFooterText());
            } else if ("PowerPoint Document".equals(name)) {
                setType(metadata, "application/vnd.ms-powerpoint");
                PowerPointExtractor extractor =
                    new PowerPointExtractor(filesystem);
                xhtml.element("p", extractor.getText(true, true));
            } else if ("Workbook".equals(name)) {
                setType(metadata, "application/vnd.ms-excel");
                Locale locale = context.get(Locale.class, Locale.getDefault());
                new ExcelExtractor().parse(filesystem, xhtml, locale);
            } else if ("VisioDocument".equals(name)) {
                setType(metadata, "application/vnd.visio");
                VisioTextExtractor extractor =
                    new VisioTextExtractor(filesystem);
                for (String text : extractor.getAllText()) {
                    xhtml.element("p", text);
                }
            } else if (!outlookExtracted && name.startsWith("__substg1.0_")) {
                // TODO: Cleaner mechanism for detecting Outlook
                outlookExtracted = true;
View Full Code Here

      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
    }
View Full Code Here

                   }

                   addTextIfAny(xhtml, "footer", extractor.getFooterText());
               } else if ("PowerPoint Document".equals(name)) {
                   setType(metadata, "application/vnd.ms-powerpoint");
                   PowerPointExtractor extractor =
                       new PowerPointExtractor(filesystem);
                   xhtml.element("p", extractor.getText(true, true));
               } else if ("Workbook".equals(name)) {
                   setType(metadata, "application/vnd.ms-excel");
                   Locale locale = context.get(Locale.class, Locale.getDefault());
                   new ExcelExtractor().parse(filesystem, xhtml, locale);
               } else if ("VisioDocument".equals(name)) {
                   setType(metadata, "application/vnd.visio");
                   VisioTextExtractor extractor =
                       new VisioTextExtractor(filesystem);
                   for (String text : extractor.getAllText()) {
                       xhtml.element("p", text);
                   }
               } else if (!outlookExtracted && name.startsWith("__substg1.0_")) {
                   // TODO: Cleaner mechanism for detecting Outlook
                   outlookExtracted = true;
View Full Code Here

         if (is.available() == 0)
         {
            return "";
         }
        
         PowerPointExtractor ppe;
         try
         {
            ppe = new PowerPointExtractor(is);
         }
         catch (IOException e)
         {
            throw new DocumentReadException("Can't open presentation.", e);
         }
         return ppe.getText(true, true);
      }
      finally
      {
         if (is != null)
         {
View Full Code Here

      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
    }
View Full Code Here

         if (is.available() == 0)
         {
            return "";
         }
        
         PowerPointExtractor ppe;
         try
         {
            ppe = new PowerPointExtractor(is);
         }
         catch (IOException e)
         {
            throw new DocumentReadException("Can't open presentation.", e);
         }
         return ppe.getText(true, true);
      }
      finally
      {
         if (is != null)
         {
View Full Code Here

      {
         throw new NullPointerException("InputStream is null.");
      }
      try
      {
         PowerPointExtractor ppe;
         try
         {
            ppe = new PowerPointExtractor(is);
         }
         catch (IOException e)
         {
            return "";
         }
         return ppe.getText(true, true);
      }
      finally
      {
         if (is != null)
            try
View Full Code Here

                }

                addTextIfAny(xhtml, "footer", extractor.getFooterText());
            } else if ("PowerPoint Document".equals(name)) {
                setType(metadata, "application/vnd.ms-powerpoint");
                PowerPointExtractor extractor =
                    new PowerPointExtractor(filesystem);
                xhtml.element("p", extractor.getText(true, true));
            } else if ("Workbook".equals(name)) {
                setType(metadata, "application/vnd.ms-excel");
                new ExcelExtractor().parse(filesystem, xhtml);
            } else if ("VisioDocument".equals(name)) {
                setType(metadata, "application/vnd.visio");
                VisioTextExtractor extractor =
                    new VisioTextExtractor(filesystem);
                for (String text : extractor.getAllText()) {
                    xhtml.element("p", text);
                }
            } else if (!outlookExtracted && name.startsWith("__substg1.0_")) {
                // TODO: Cleaner mechanism for detecting Outlook
                outlookExtracted = true;
View Full Code Here

                    break;
                case WORDDOCUMENT:
                    new WordExtractor(context).parse(filesystem, xhtml);
                    break;
                case POWERPOINT:
                    PowerPointExtractor powerPointExtractor =
                        new PowerPointExtractor(filesystem);
                    xhtml.element("p", powerPointExtractor.getText(true, true));
                    break;
                case WORKBOOK:
                    Locale locale = context.get(Locale.class, Locale.getDefault());
                    new ExcelExtractor(context).parse(filesystem, xhtml, locale);
                    break;
View Full Code Here

TOP

Related Classes of org.apache.poi.hslf.extractor.PowerPointExtractor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.