Examples of EmbeddedContentHandler


Examples of org.apache.tika.sax.EmbeddedContentHandler

               // TIKA-704: OLE 2.0 embedded non-Office document?
               stream = TikaInputStream.get(
                     fs.createDocumentInputStream("CONTENTS"));
               if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                  embeddedExtractor.parseEmbedded(
                        stream, new EmbeddedContentHandler(handler),
                        metadata, false);
               }
            } else if (POIFSDocumentType.OLE10_NATIVE == type) {
                // TIKA-704: OLE 1.0 embedded document
                Ole10Native ole =
                        Ole10Native.createFromEmbeddedOleObject(fs);
                metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
                byte[] data = ole.getDataBuffer();
                if (data != null) {
                    stream = TikaInputStream.get(data);
                }

                if (stream != null
                        && embeddedExtractor.shouldParseEmbedded(metadata)) {
                    embeddedExtractor.parseEmbedded(
                            stream, new EmbeddedContentHandler(handler),
                            metadata, false);
                }
            } else {
                handleEmbeddedFile(part, handler, rel);
            }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        // Call the recursing handler
        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
            embeddedExtractor.parseEmbedded(
                    TikaInputStream.get(part.getInputStream()),
                    new EmbeddedContentHandler(handler),
                    metadata, false);
        }
    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        TaggedContentHandler tagged = new TaggedContentHandler(handler);
        try {
            context.getSAXParser().parse(
                    new CloseShieldInputStream(stream),
                    new OfflineContentHandler(new EmbeddedContentHandler(
                            getContentHandler(tagged, metadata, context))));
        } catch (SAXException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("XML parse error", e);
        }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

                        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                            TikaInputStream stream = TikaInputStream.get(file.createInputStream());
                            try {
                                embeddedExtractor.parseEmbedded(
                                                                stream,
                                                                new EmbeddedContentHandler(handler),
                                                                metadata, false);
                            } finally {
                                stream.close();
                            }
                        }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

       metadata.set(Metadata.CONTENT_TYPE, type);
      
       Parser parser = context.get(Parser.class, EmptyParser.INSTANCE);
       parser.parse(
               TikaInputStream.get(part.getInputStream()),
               new EmbeddedContentHandler(handler),
               metadata, context
       );
    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        // Use the delegate parser to parse this entry
        try {
            DELEGATING_PARSER.parse(
                    new CloseShieldInputStream(stream),
                    new EmbeddedContentHandler(new BodyContentHandler(handler)),
                    metadata, context);
        } catch (TikaException e) {
            // Could not parse the entry, just skip the content
        }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

                            throw new EncryptedDocumentException();
                        }

                        OOXMLParser parser = new OOXMLParser();

                        parser.parse(d.getDataStream(filesystem), new EmbeddedContentHandler(
                                        new BodyContentHandler(xhtml)),
                                        metadata, context);
                    } catch (GeneralSecurityException ex) {
                        throw new EncryptedDocumentException(ex);
                    }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

        }

        try {
            getParser().parse(
                    new CloseShieldInputStream(stream),
                    new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                    metadata);
        } catch (TikaException e) {
            // Could not parse the entry, just skip the content
        }
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

              }
              if(data != null) {
                 HtmlParser htmlParser = new HtmlParser();
                 htmlParser.parse(
                       new ByteArrayInputStream(data),
                       new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                       new Metadata(), new ParseContext()
                 );
                 doneBody = true;
              }
           }
           if(rtfChunk != null && !doneBody) {
              ByteChunk chunk = (ByteChunk)rtfChunk;
              MAPIRtfAttribute rtf = new MAPIRtfAttribute(
                    MAPIProperty.RTF_COMPRESSED, Types.BINARY, chunk.getValue()
              );
              RTFParser rtfParser = new RTFParser();
              rtfParser.parse(
                              new ByteArrayInputStream(rtf.getData()),
                              new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                              new Metadata(), new ParseContext());
              doneBody = true;
           }
           if(textChunk != null && !doneBody) {
              xhtml.element("p", ((StringChunk)textChunk).getValue());
View Full Code Here

Examples of org.apache.tika.sax.EmbeddedContentHandler

               // TIKA-704: OLE 2.0 embedded non-Office document?
               stream = TikaInputStream.get(
                     fs.createDocumentInputStream("CONTENTS"));
               if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                  embeddedExtractor.parseEmbedded(
                        stream, new EmbeddedContentHandler(handler),
                        metadata, false);
               }
            } else if (POIFSDocumentType.OLE10_NATIVE == type) {
                // TIKA-704: OLE 1.0 embedded document
                Ole10Native ole =
                        Ole10Native.createFromEmbeddedOleObject(fs);
                metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
                byte[] data = ole.getDataBuffer();
                if (data != null) {
                    stream = TikaInputStream.get(data);
                }

                if (stream != null
                        && embeddedExtractor.shouldParseEmbedded(metadata)) {
                    embeddedExtractor.parseEmbedded(
                            stream, new EmbeddedContentHandler(handler),
                            metadata, false);
                }
            } else {
                handleEmbeddedFile(part, handler);
            }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.