Examples of EmbeddedContentHandler


Examples of org.apache.tika.sax.EmbeddedContentHandler

                            throw new EncryptedDocumentException();
                        }

                        OOXMLParser parser = new OOXMLParser();

                        parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
                                        new BodyContentHandler(xhtml)),
                                        metadata, context);
                    } catch (GeneralSecurityException ex) {
                        throw new EncryptedDocumentException(ex);
                    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

                    newStream.setOpenContainer(container);
                }
            }
            DELEGATING_PARSER.parse(
                                    newStream,
                                    new EmbeddedContentHandler(new BodyContentHandler(handler)),
                                    metadata, context);
        } catch (TikaException e) {
            // TODO: can we log a warning somehow?
            // Could not parse the entry, just skip the content
        } finally {
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

                        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                            TikaInputStream stream = TikaInputStream.get(file.createInputStream());
                            try {
                                embeddedExtractor.parseEmbedded(
                                                                stream,
                                                                new EmbeddedContentHandler(handler),
                                                                metadata, false);
                            } finally {
                                stream.close();
                            }
                        }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

          metadata.set(Metadata.CONTENT_TYPE, type);

       if (embeddedExtractor.shouldParseEmbedded(metadata)) {
         embeddedExtractor.parseEmbedded(
                 TikaInputStream.get(contents),
                 new EmbeddedContentHandler(handler),
                 metadata, false);
       }
    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

              }
              if(data != null) {
                 HtmlParser htmlParser = new HtmlParser();
                 htmlParser.parse(
                       new ByteArrayInputStream(data),
                       new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                       new Metadata(), new ParseContext()
                 );
                 doneBody = true;
              }
           }
           if(rtfChunk != null && !doneBody) {
              ByteChunk chunk = (ByteChunk)rtfChunk;
              MAPIRtfAttribute rtf = new MAPIRtfAttribute(
                    MAPIProperty.RTF_COMPRESSED, Types.BINARY.getId(), chunk.getValue()
              );
              RTFParser rtfParser = new RTFParser();
              rtfParser.parse(
                              new ByteArrayInputStream(rtf.getData()),
                              new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                              new Metadata(), new ParseContext());
              doneBody = true;
           }
           if(textChunk != null && !doneBody) {
              xhtml.element("p", ((StringChunk)textChunk).getValue());
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

               // TIKA-704: OLE 2.0 embedded non-Office document?
               stream = TikaInputStream.get(
                     fs.createDocumentInputStream("CONTENTS"));
               if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                  embeddedExtractor.parseEmbedded(
                        stream, new EmbeddedContentHandler(handler),
                        metadata, false);
               }
            } else if (POIFSDocumentType.OLE10_NATIVE == type) {
                // TIKA-704: OLE 1.0 embedded document
                Ole10Native ole =
                        Ole10Native.createFromEmbeddedOleObject(fs);
                metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
                byte[] data = ole.getDataBuffer();
                if (data != null) {
                    stream = TikaInputStream.get(data);
                }

                if (stream != null
                        && embeddedExtractor.shouldParseEmbedded(metadata)) {
                    embeddedExtractor.parseEmbedded(
                            stream, new EmbeddedContentHandler(handler),
                            metadata, false);
                }
            } else {
                handleEmbeddedFile(part, handler, rel);
            }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        // Call the recursing handler
        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
            embeddedExtractor.parseEmbedded(
                    TikaInputStream.get(part.getInputStream()),
                    new EmbeddedContentHandler(handler),
                    metadata, false);
        }
    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

                    newStream.setOpenContainer(container);
                }
            }
            DELEGATING_PARSER.parse(
                                    newStream,
                                    new EmbeddedContentHandler(new BodyContentHandler(handler)),
                                    metadata, context);
        } catch (TikaException e) {
            // TODO: can we log a warning somehow?
            // Could not parse the entry, just skip the content
        } finally {
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

              // Decrypt the OLE2 stream, and delegate the resulting OOXML
              //  file to the regular OOXML parser for normal handling
              OOXMLParser parser = new OOXMLParser();

              parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
                    new BodyContentHandler(xhtml)),
                    metadata, context);
           } catch (GeneralSecurityException ex) {
              throw new EncryptedDocumentException(ex);
           }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        TaggedContentHandler tagged = new TaggedContentHandler(handler);
        try {
            context.getSAXParser().parse(
                    new CloseShieldInputStream(stream),
                    new OfflineContentHandler(new EmbeddedContentHandler(
                            getContentHandler(tagged, metadata, context))));
        } catch (SAXException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("XML parse error", e);
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.