Examples of EmbeddedContentHandler


Examples of org.apache.tika.sax.EmbeddedContentHandler

          metadata.set(Metadata.CONTENT_TYPE, type);

       if (embeddedExtractor.shouldParseEmbedded(metadata)) {
         embeddedExtractor.parseEmbedded(
                 TikaInputStream.get(contents),
                 new EmbeddedContentHandler(handler),
                 metadata, false);
       }
    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

              if(data != null) {
                  // nocommit same problem here?
                 HtmlParser htmlParser = new HtmlParser();
                 htmlParser.parse(
                       new ByteArrayInputStream(data),
                       new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                       new Metadata(), new ParseContext()
                 );
                 doneBody = true;
              }
           }
           if(rtfChunk != null && !doneBody) {
              ByteChunk chunk = (ByteChunk)rtfChunk;
              MAPIRtfAttribute rtf = new MAPIRtfAttribute(
                    MAPIProperty.RTF_COMPRESSED, Types.BINARY, chunk.getValue()
              );
              RTFParser rtfParser = new RTFParser();
              rtfParser.parse(
                              new ByteArrayInputStream(rtf.getData()),
                              new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                              new Metadata(), new ParseContext());
              doneBody = true;
           }
           if(textChunk != null && !doneBody) {
              xhtml.element("p", ((StringChunk)textChunk).getValue());
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

               // TIKA-704: OLE 2.0 embedded non-Office document?
               stream = TikaInputStream.get(
                     fs.createDocumentInputStream("CONTENTS"));
               if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                  embeddedExtractor.parseEmbedded(
                        stream, new EmbeddedContentHandler(handler),
                        metadata, false);
               }
            } else if (POIFSDocumentType.OLE10_NATIVE == type) {
                // TIKA-704: OLE 1.0 embedded document
                Ole10Native ole =
                        Ole10Native.createFromEmbeddedOleObject(fs);
                metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
                byte[] data = ole.getDataBuffer();
                if (data != null) {
                    stream = TikaInputStream.get(data);
                }

                if (stream != null
                        && embeddedExtractor.shouldParseEmbedded(metadata)) {
                    embeddedExtractor.parseEmbedded(
                            stream, new EmbeddedContentHandler(handler),
                            metadata, false);
                }
            } else {
                handleEmbeddedFile(part, handler);
            }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        // Call the recursing handler
        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
            embeddedExtractor.parseEmbedded(
                    TikaInputStream.get(part.getInputStream()),
                    new EmbeddedContentHandler(handler),
                    metadata, false);
        }
    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

            throws IOException, SAXException, TikaException {
        // Because an EPub file is often made up of multiple XHTML files,
        //  we need explicit control over the start and end of the document
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        ContentHandler childHandler = new EmbeddedContentHandler(
              new BodyContentHandler(xhtml));
      
        ZipInputStream zip = new ZipInputStream(stream);
        ZipEntry entry = zip.getNextEntry();
        while (entry != null) {
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        submd.set(Metadata.CONTENT_TYPE, body.getMimeType());
        submd.set(Metadata.CONTENT_ENCODING, body.getCharset());

        try {
            BodyContentHandler bch = new BodyContentHandler(handler);
            parser.parse(is, new EmbeddedContentHandler(bch), submd);
        } catch (SAXException e) {
            throw new MimeException(e);
        } catch (TikaException e) {
            throw new MimeException(e);
        }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        submd.set(Metadata.CONTENT_TYPE, body.getMimeType());
        submd.set(Metadata.CONTENT_ENCODING, body.getCharset());

        try {
            BodyContentHandler bch = new BodyContentHandler(handler);
            parser.parse(is, new EmbeddedContentHandler(bch), submd, context);
        } catch (SAXException e) {
            throw new MimeException(e);
        } catch (TikaException e) {
            throw new MimeException(e);
        }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

                            throw new TikaException("Unable to process: document is encrypted");
                        }

                        OOXMLParser parser = new OOXMLParser();

                        parser.parse(d.getDataStream(filesystem), new EmbeddedContentHandler(
                                        new BodyContentHandler(xhtml)),
                                        metadata, context);
                    } catch (GeneralSecurityException ex) {
                        throw new TikaException("Unable to process encrypted document", ex);
                    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        submd.set(Metadata.CONTENT_TYPE, body.getMimeType());
        submd.set(Metadata.CONTENT_ENCODING, body.getCharset());

        try {
            BodyContentHandler bch = new BodyContentHandler(handler);
            parser.parse(is, new EmbeddedContentHandler(bch), submd);
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (TikaException e) {
            e.printStackTrace();
        }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        xhtml.startDocument();
        xhtml.startElement("p");

        context.getSAXParser().parse(
                new CloseShieldInputStream(stream),
                new OfflineContentHandler(new EmbeddedContentHandler(
                        getContentHandler(handler, metadata))));

        xhtml.endElement("p");
        xhtml.endDocument();
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.