Package org.apache.tika.sax

Examples of org.apache.tika.sax.EmbeddedContentHandler


       metadata.set(Metadata.CONTENT_TYPE, type);

       if (embeddedExtractor.shouldParseEmbedded(metadata)) {
         embeddedExtractor.parseEmbedded(
                 TikaInputStream.get(part.getInputStream()),
                 new EmbeddedContentHandler(handler),
                 metadata, false);
       }
    }
View Full Code Here


        // Use the delegate parser to parse this entry
        TemporaryFiles tmp = new TemporaryFiles();
        try {
            DELEGATING_PARSER.parse(
                    TikaInputStream.get(new CloseShieldInputStream(stream), tmp),
                    new EmbeddedContentHandler(new BodyContentHandler(handler)),
                    metadata, context);
        } catch (TikaException e) {
            // Could not parse the entry, just skip the content
        } finally {
            tmp.dispose();
View Full Code Here

                thumbnailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, thumbName);
                thumbnailMetadata.set(Metadata.CONTENT_TYPE, tPart.getContentType());
                thumbnailMetadata.set(TikaCoreProperties.TITLE, tPart.getPartName().getName());
               
                if (embeddedExtractor.shouldParseEmbedded(thumbnailMetadata)) {
                    embeddedExtractor.parseEmbedded(TikaInputStream.get(tStream), new EmbeddedContentHandler(handler), thumbnailMetadata, false);
                }
               
                tStream.close();
            }
         } catch (Exception ex) {
View Full Code Here

               // TIKA-704: OLE 2.0 embedded non-Office document?
               stream = TikaInputStream.get(
                     fs.createDocumentInputStream("CONTENTS"));
               if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                  embeddedExtractor.parseEmbedded(
                        stream, new EmbeddedContentHandler(handler),
                        metadata, false);
               }
            } else if (POIFSDocumentType.OLE10_NATIVE == type) {
                // TIKA-704: OLE 1.0 embedded document
                Ole10Native ole =
                        Ole10Native.createFromEmbeddedOleObject(fs);
                if (ole.getLabel() != null) {
                    metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
                }
                byte[] data = ole.getDataBuffer();
                if (data != null) {
                    stream = TikaInputStream.get(data);
                }

                if (stream != null
                        && embeddedExtractor.shouldParseEmbedded(metadata)) {
                    embeddedExtractor.parseEmbedded(
                            stream, new EmbeddedContentHandler(handler),
                            metadata, false);
                }
            } else {
                handleEmbeddedFile(part, handler, rel);
            }
View Full Code Here

        // Call the recursing handler
        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
            embeddedExtractor.parseEmbedded(
                    TikaInputStream.get(part.getInputStream()),
                    new EmbeddedContentHandler(handler),
                    metadata, false);
        }
    }
View Full Code Here

        submd.set(Metadata.CONTENT_TYPE, body.getMimeType());
        submd.set(Metadata.CONTENT_ENCODING, body.getCharset());

        try {
            BodyContentHandler bch = new BodyContentHandler(handler);
            parser.parse(is, new EmbeddedContentHandler(bch), submd, context);
        } catch (SAXException e) {
            throw new MimeException(e);
        } catch (TikaException e) {
            throw new MimeException(e);
        }
View Full Code Here

              // Decrypt the OLE2 stream, and delegate the resulting OOXML
              //  file to the regular OOXML parser for normal handling
              OOXMLParser parser = new OOXMLParser();

              parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
                    new BodyContentHandler(xhtml)),
                    metadata, context);
           } catch (GeneralSecurityException ex) {
              throw new EncryptedDocumentException(ex);
           }
View Full Code Here

              }
              if(data != null) {
                 HtmlParser htmlParser = new HtmlParser();
                 htmlParser.parse(
                       new ByteArrayInputStream(data),
                       new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                       new Metadata(), new ParseContext()
                 );
                 doneBody = true;
              }
           }
           if(rtfChunk != null && !doneBody) {
              ByteChunk chunk = (ByteChunk)rtfChunk;
              MAPIRtfAttribute rtf = new MAPIRtfAttribute(
                    MAPIProperty.RTF_COMPRESSED, Types.BINARY.getId(), chunk.getValue()
              );
              RTFParser rtfParser = new RTFParser();
              rtfParser.parse(
                              new ByteArrayInputStream(rtf.getData()),
                              new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
                              new Metadata(), new ParseContext());
              doneBody = true;
           }
           if(textChunk != null && !doneBody) {
              xhtml.element("p", ((StringChunk)textChunk).getValue());
View Full Code Here

        TaggedContentHandler tagged = new TaggedContentHandler(handler);
        try {
            context.getSAXParser().parse(
                    new CloseShieldInputStream(stream),
                    new OfflineContentHandler(new EmbeddedContentHandler(
                            getContentHandler(tagged, metadata, context))));
        } catch (SAXException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("XML parse error", e);
        } finally {
View Full Code Here

                }
            }
            try {
                embeddedExtractor.parseEmbedded(
                        stream,
                        new EmbeddedContentHandler(handler),
                        metadata, false);
            } finally {
                stream.close();
            }
        }
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.EmbeddedContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.