Package org.apache.tika.sax

Examples of org.apache.tika.sax.XHTMLContentHandler.element()


            new RTFEditorKit().read(stream, sd, 0);

            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            xhtml.element("p", sd.getText(0, sd.getLength()));
            xhtml.endDocument();
        } catch (BadLocationException e) {
            throw new TikaException("Error parsing an RTF document", e);
        } catch (InternalError e) {
            throw new TikaException(
View Full Code Here


                WordExtractor extractor = new WordExtractor(filesystem);

                addTextIfAny(xhtml, "header", extractor.getHeaderText());

                for (String paragraph : extractor.getParagraphText()) {
                    xhtml.element("p", paragraph);
                }

                for (String paragraph : extractor.getFootnoteText()) {
                    xhtml.element("p", paragraph);
                }
View Full Code Here

                for (String paragraph : extractor.getParagraphText()) {
                    xhtml.element("p", paragraph);
                }

                for (String paragraph : extractor.getFootnoteText()) {
                    xhtml.element("p", paragraph);
                }

                for (String paragraph : extractor.getCommentsText()) {
                    xhtml.element("p", paragraph);
                }
View Full Code Here

                for (String paragraph : extractor.getFootnoteText()) {
                    xhtml.element("p", paragraph);
                }

                for (String paragraph : extractor.getCommentsText()) {
                    xhtml.element("p", paragraph);
                }

                for (String paragraph : extractor.getEndnoteText()) {
                    xhtml.element("p", paragraph);
                }
View Full Code Here

                for (String paragraph : extractor.getCommentsText()) {
                    xhtml.element("p", paragraph);
                }

                for (String paragraph : extractor.getEndnoteText()) {
                    xhtml.element("p", paragraph);
                }

                addTextIfAny(xhtml, "footer", extractor.getFooterText());
            } else if ("PowerPoint Document".equals(name)) {
                setType(metadata, "application/vnd.ms-powerpoint");
View Full Code Here

                addTextIfAny(xhtml, "footer", extractor.getFooterText());
            } else if ("PowerPoint Document".equals(name)) {
                setType(metadata, "application/vnd.ms-powerpoint");
                PowerPointExtractor extractor =
                    new PowerPointExtractor(filesystem);
                xhtml.element("p", extractor.getText(true, true));
            } else if ("Workbook".equals(name)) {
                setType(metadata, "application/vnd.ms-excel");
                Locale locale = context.get(Locale.class, Locale.getDefault());
                new ExcelExtractor().parse(filesystem, xhtml, locale);
            } else if ("VisioDocument".equals(name)) {
View Full Code Here

            } else if ("VisioDocument".equals(name)) {
                setType(metadata, "application/vnd.visio");
                VisioTextExtractor extractor =
                    new VisioTextExtractor(filesystem);
                for (String text : extractor.getAllText()) {
                    xhtml.element("p", text);
                }
            } else if (!outlookExtracted && name.startsWith("__substg1.0_")) {
                // TODO: Cleaner mechanism for detecting Outlook
                outlookExtracted = true;
                setType(metadata, "application/vnd.ms-outlook");
View Full Code Here

            if (entry instanceof DirectoryEntry) {
               if ("Quill".equals(name)) {
                  setType(metadata, "application/x-mspublisher");
                  PublisherTextExtractor extractor =
                      new PublisherTextExtractor(filesystem);
                  xhtml.element("p", extractor.getText());
               }
            } else if (entry instanceof DocumentEntry) {
               if ("WordDocument".equals(name)) {
                   setType(metadata, "application/msword");
                   WordExtractor extractor = new WordExtractor(filesystem);
View Full Code Here

                   WordExtractor extractor = new WordExtractor(filesystem);

                   addTextIfAny(xhtml, "header", extractor.getHeaderText());

                   for (String paragraph : extractor.getParagraphText()) {
                       xhtml.element("p", paragraph);
                   }

                   for (String paragraph : extractor.getFootnoteText()) {
                       xhtml.element("p", paragraph);
                   }
View Full Code Here

                   for (String paragraph : extractor.getParagraphText()) {
                       xhtml.element("p", paragraph);
                   }

                   for (String paragraph : extractor.getFootnoteText()) {
                       xhtml.element("p", paragraph);
                   }

                   for (String paragraph : extractor.getCommentsText()) {
                       xhtml.element("p", paragraph);
                   }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.