Examples of org.apache.tika.sax.XHTMLContentHandler.element()

Class org.apache.tika.sax.XHTMLContentHandler

Examples of org.apache.tika.sax.XHTMLContentHandler.element()

org.apache.tika.sax.XHTMLContentHandler.element()

            new RTFEditorKit().read(stream, sd, 0);


            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            xhtml.element("p", sd.getText(0, sd.getLength()));
            xhtml.endDocument();
        } catch (BadLocationException e) {
            throw new TikaException("Error parsing an RTF document", e);
        } catch (InternalError e) {
            throw new TikaException(

View Full Code Here

                WordExtractor extractor = new WordExtractor(filesystem);


                addTextIfAny(xhtml, "header", extractor.getHeaderText());


                for (String paragraph : extractor.getParagraphText()) {
                    xhtml.element("p", paragraph);
                }


                for (String paragraph : extractor.getFootnoteText()) {
                    xhtml.element("p", paragraph);
                }

View Full Code Here

                for (String paragraph : extractor.getParagraphText()) {
                    xhtml.element("p", paragraph);
                }


                for (String paragraph : extractor.getFootnoteText()) {
                    xhtml.element("p", paragraph);
                }


                for (String paragraph : extractor.getCommentsText()) {
                    xhtml.element("p", paragraph);
                }

View Full Code Here

                for (String paragraph : extractor.getFootnoteText()) {
                    xhtml.element("p", paragraph);
                }


                for (String paragraph : extractor.getCommentsText()) {
                    xhtml.element("p", paragraph);
                }


                for (String paragraph : extractor.getEndnoteText()) {
                    xhtml.element("p", paragraph);
                }

View Full Code Here

                for (String paragraph : extractor.getCommentsText()) {
                    xhtml.element("p", paragraph);
                }


                for (String paragraph : extractor.getEndnoteText()) {
                    xhtml.element("p", paragraph);
                }


                addTextIfAny(xhtml, "footer", extractor.getFooterText());
            } else if ("PowerPoint Document".equals(name)) {
                setType(metadata, "application/vnd.ms-powerpoint");

View Full Code Here

                addTextIfAny(xhtml, "footer", extractor.getFooterText());
            } else if ("PowerPoint Document".equals(name)) {
                setType(metadata, "application/vnd.ms-powerpoint");
                PowerPointExtractor extractor =
                    new PowerPointExtractor(filesystem);
                xhtml.element("p", extractor.getText(true, true));
            } else if ("Workbook".equals(name)) {
                setType(metadata, "application/vnd.ms-excel");
                Locale locale = context.get(Locale.class, Locale.getDefault());
                new ExcelExtractor().parse(filesystem, xhtml, locale);
            } else if ("VisioDocument".equals(name)) {

View Full Code Here

            } else if ("VisioDocument".equals(name)) {
                setType(metadata, "application/vnd.visio");
                VisioTextExtractor extractor =
                    new VisioTextExtractor(filesystem);
                for (String text : extractor.getAllText()) {
                    xhtml.element("p", text);
                }
            } else if (!outlookExtracted && name.startsWith("__substg1.0_")) {
                // TODO: Cleaner mechanism for detecting Outlook
                outlookExtracted = true;
                setType(metadata, "application/vnd.ms-outlook");

View Full Code Here

            if (entry instanceof DirectoryEntry) {
               if ("Quill".equals(name)) {
                  setType(metadata, "application/x-mspublisher");
                  PublisherTextExtractor extractor =
                      new PublisherTextExtractor(filesystem);
                  xhtml.element("p", extractor.getText());
               }
            } else if (entry instanceof DocumentEntry) {
               if ("WordDocument".equals(name)) {
                   setType(metadata, "application/msword");
                   WordExtractor extractor = new WordExtractor(filesystem);

View Full Code Here

                   WordExtractor extractor = new WordExtractor(filesystem);


                   addTextIfAny(xhtml, "header", extractor.getHeaderText());


                   for (String paragraph : extractor.getParagraphText()) {
                       xhtml.element("p", paragraph);
                   }


                   for (String paragraph : extractor.getFootnoteText()) {
                       xhtml.element("p", paragraph);
                   }

View Full Code Here

                   for (String paragraph : extractor.getParagraphText()) {
                       xhtml.element("p", paragraph);
                   }


                   for (String paragraph : extractor.getFootnoteText()) {
                       xhtml.element("p", paragraph);
                   }


                   for (String paragraph : extractor.getCommentsText()) {
                       xhtml.element("p", paragraph);
                   }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.