Examples of XHTMLContentHandler


Examples of org.apache.tika.sax.XHTMLContentHandler

     */
    public void parse(
            final InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml =
            new XHTMLContentHandler(handler, metadata);

        Process process = Runtime.getRuntime().exec(command);
        try {
            sendInput(process, stream);
            ignoreError(process);
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

     * @see org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor#getXHTML(org.xml.sax.ContentHandler,
     *      org.apache.tika.metadata.Metadata)
     */
    public void getXHTML(ContentHandler handler, Metadata metadata)
            throws SAXException, XmlException, IOException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        buildXHTML(xhtml);
        xhtml.endDocument();
    }
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws SAXException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

            int bom = reader.read();
            if (bom != '\ufeff') { // zero-width no-break space
                reader.reset();
            }

            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();

            xhtml.startElement("p");
            char[] buffer = new char[4096];
            int n = reader.read(buffer);
            while (n != -1) {
                xhtml.characters(buffer, 0, n);
                n = reader.read(buffer);
            }
            xhtml.endElement("p");

            xhtml.endDocument();
        } catch (UnsupportedEncodingException e) {
            throw new TikaException(
                    "Unsupported text encoding: " + encoding, e);
        }
    }
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

    private final XHTMLContentHandler handler;

    private PDF2XHTML(ContentHandler handler, Metadata metadata)
            throws IOException {
        this.handler = new XHTMLContentHandler(handler, metadata);

        // TIKA-292: Ignore unneeded PDF operators
        // TODO: Remove this once PDFBox is no longer so verbose
        OperatorProcessor ignore = new OperatorProcessor() {
            @Override @SuppressWarnings("unchecked")
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        final XHTMLContentHandler xhtml =
            new XHTMLContentHandler(handler,metadata);

        try {
            SAXParserFactory factory = SAXParserFactory.newInstance();
            factory.setValidating(false);
            factory.setNamespaceAware(true);
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

        }
    }

    public HtmlHandler(
            HtmlParser parser, ContentHandler handler, Metadata metadata) {
        this(parser, new XHTMLContentHandler(handler, metadata), metadata);
    }
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

            throw new TikaException("Can't read JPEG metadata", e);
        } catch (MetadataException e) {
            throw new TikaException("Can't read JPEG metadata", e);
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

                metadata.set("width", Integer.toString(reader.getWidth(0)));
                reader.dispose();
            }
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }
View Full Code Here

Examples of org.apache.tika.sax.XHTMLContentHandler

            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "application/x-gzip");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // At the end we want to close the gzip stream to release any associated
        // resources, but the underlying document stream should not be closed
        InputStream gzip =
            new GZIPInputStream(new CloseShieldInputStream(stream));
        try {
            Metadata entrydata = new Metadata();
            String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
            if (name != null && name.length() > 0) {
                entrydata.set(
                        Metadata.RESOURCE_NAME_KEY,
                        GzipUtils.getUncompressedFilename(name));
            }
            // Use the delegate parser to parse the compressed document
            super.parse(
                    new CloseShieldInputStream(gzip),
                    new EmbeddedContentHandler(
                            new BodyContentHandler(xhtml)),
                    entrydata, context);
        } finally {
            gzip.close();
        }

        xhtml.endDocument();
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.