Package org.apache.tika.sax

Examples of org.apache.tika.sax.EmbeddedContentHandler


                entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
            }
            // Use the delegate parser to parse the compressed document
            parser.parse(
                    new CloseShieldInputStream(stream),
                    new EmbeddedContentHandler(
                            new BodyContentHandler(xhtml)),
                    entrydata, context);
        } finally {
            stream.close();
        }
View Full Code Here


                    }
                    try {
                        // Use the delegate parser to parse this entry
                        parser.parse(
                                new CloseShieldInputStream(archive),
                                new EmbeddedContentHandler(
                                        new BodyContentHandler(xhtml)),
                                        entrydata, context);
                    } catch (TikaException e) {
                        // Could not parse the entry, just skip the content
                    }
View Full Code Here

              // Decrypt the OLE2 stream, and delegate the resulting OOXML
              //  file to the regular OOXML parser for normal handling
              OOXMLParser parser = new OOXMLParser();

              parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
                    new BodyContentHandler(xhtml)),
                    metadata, context);
           } catch (GeneralSecurityException ex) {
              throw new EncryptedDocumentException(ex);
           }
View Full Code Here

            if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                TikaInputStream stream = TikaInputStream.get(file.createInputStream());
                try {
                    embeddedExtractor.parseEmbedded(
                            stream,
                            new EmbeddedContentHandler(handler),
                            metadata, false);
                } finally {
                    stream.close();
                }
            }
View Full Code Here

                        GzipUtils.getUncompressedFilename(name));
            }
            // Use the delegate parser to parse the compressed document
            super.parse(
                    new CloseShieldInputStream(gzip),
                    new EmbeddedContentHandler(
                            new BodyContentHandler(xhtml)),
                    entrydata, context);
        } finally {
            gzip.close();
        }
View Full Code Here

                }
                try {
                    // Use the delegate parser to parse this entry
                    super.parse(
                            new CloseShieldInputStream(archive),
                            new EmbeddedContentHandler(
                                    new BodyContentHandler(xhtml)),
                            entrydata, context);
                } catch (TikaException e) {
                    // Could not parse the entry, just skip the content
                }
View Full Code Here

                entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
            }
            // Use the delegate parser to parse the compressed document
            super.parse(
                    new CloseShieldInputStream(bzip2),
                    new EmbeddedContentHandler(
                            new BodyContentHandler(xhtml)),
                    entrydata, context);
        } finally {
            bzip2.close();
        }
View Full Code Here

                            throw new TikaException("Unable to process: document is encrypted");
                        }

                        OOXMLParser parser = new OOXMLParser();

                        parser.parse(d.getDataStream(filesystem), new EmbeddedContentHandler(
                                        new BodyContentHandler(xhtml)),
                                        metadata, context);
                    } catch (GeneralSecurityException ex) {
                        throw new TikaException("Unable to process encrypted document", ex);
                    }
View Full Code Here

              // Decrypt the OLE2 stream, and delegate the resulting OOXML
              //  file to the regular OOXML parser for normal handling
              OOXMLParser parser = new OOXMLParser();

              parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
                    new BodyContentHandler(xhtml)),
                    metadata, context);
           } catch (GeneralSecurityException ex) {
              throw new EncryptedDocumentException(ex);
           }
View Full Code Here

              }
              if(data != null) {
                 HtmlParser htmlParser = new HtmlParser();
                 htmlParser.parse(
                       new ByteArrayInputStream(data),
                       new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                       new Metadata(), new ParseContext()
                 );
                 doneBody = true;
              }
           }
           if(rtfChunk != null && !doneBody) {
              ByteChunk chunk = (ByteChunk)rtfChunk;
              MAPIRtfAttribute rtf = new MAPIRtfAttribute(
                    MAPIProperty.RTF_COMPRESSED, Types.BINARY, chunk.getValue()
              );
              RTFParser rtfParser = new RTFParser();
              rtfParser.parse(
                              new ByteArrayInputStream(rtf.getData()),
                              new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                              new Metadata(), new ParseContext());
              doneBody = true;
           }
           if(textChunk != null && !doneBody) {
              xhtml.element("p", ((StringChunk)textChunk).getValue());
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.EmbeddedContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.