Examples of TemporaryResources


Examples of org.apache.tika.io.TemporaryResources

            handler.characters(chars, 0, chars.length);
            handler.endElement(XHTML, "h1", "h1");
        }

        // Use the delegate parser to parse this entry
        TemporaryResources tmp = new TemporaryResources();
        try {
            final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp);
            if (stream instanceof TikaInputStream) {
                final Object container = ((TikaInputStream) stream).getOpenContainer();

                // TODO: we can't let ZipPackage through,
                // becase of POI bug 51949.  This is less
                // efficient because the inner parser will
                // have to re-open the zip archive again.
                // Once we upgrade to POI 3.8 beta 5 we can
                // remove this:
                if ((container != null && !(container.getClass().getSimpleName().equals("ZipPackage")))) {
                    newStream.setOpenContainer(container);
                }
            }
            DELEGATING_PARSER.parse(
                                    newStream,
                                    new EmbeddedContentHandler(new BodyContentHandler(handler)),
                                    metadata, context);
        } catch (TikaException e) {
            // TODO: can we log a warning somehow?
            // Could not parse the entry, just skip the content
        } finally {
            tmp.close();
        }

        if(outputHtml) {
           handler.endElement(XHTML, "div", "div");
        }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
      
        PDDocument pdfDocument = null;
        TemporaryResources tmp = new TemporaryResources();

        try {
            // PDFBox can process entirely in memory, or can use a temp file
            //  for unpacked / processed resources
            // Decide which to do based on if we're reading from a file or not already
            TikaInputStream tstream = TikaInputStream.cast(stream);
            if (tstream != null && tstream.hasFile()) {
               // File based, take that as a cue to use a temporary file
               RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw");
               pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true);
            } else {
               // Go for the normal, stream based in-memory parsing
               pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
            }
          
            if (pdfDocument.isEncrypted()) {
                String password = null;
               
                // Did they supply a new style Password Provider?
                PasswordProvider passwordProvider = context.get(PasswordProvider.class);
                if (passwordProvider != null) {
                   password = passwordProvider.getPassword(metadata);
                }
               
                // Fall back on the old style metadata if set
                if (password == null && metadata.get(PASSWORD) != null) {
                   password = metadata.get(PASSWORD);
                }
               
                // If no password is given, use an empty string as the default
                if (password == null) {
                   password = "";
                }
              
                try {
                    pdfDocument.decrypt(password);
                } catch (Exception e) {
                    // Ignore
                }
            }
            metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
            extractMetadata(pdfDocument, metadata);
            PDF2XHTML.process(pdfDocument, handler, metadata,
                              extractAnnotationText, enableAutoSpace,
                              suppressDuplicateOverlappingText, sortByPosition);

            extractEmbeddedDocuments(context, pdfDocument, handler);
        } finally {
            if (pdfDocument != null) {
               pdfDocument.close();
            }
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        Parser parser = getParser(metadata);
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
            TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
            try {
                parser.parse(taggedStream, taggedHandler, metadata, context);
            } catch (RuntimeException e) {
                throw new TikaException(
                        "Unexpected RuntimeException from " + parser, e);
            } catch (IOException e) {
                taggedStream.throwIfCauseOf(e);
                throw new TikaException(
                        "TIKA-198: Illegal IOException from " + parser, e);
            } catch (SAXException e) {
                taggedHandler.throwIfCauseOf(e);
                throw new TikaException(
                        "TIKA-237: Illegal SAXException from " + parser, e);
            }
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tis = TikaInputStream.get(stream, tmp);
            parse(tis, handler, metadata, context);
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tis = TikaInputStream.get(stream, tmp);

            // Automatically detect the MIME type of the document
            MediaType type = detector.detect(tis, metadata);
            metadata.set(Metadata.CONTENT_TYPE, type.toString());

            // TIKA-216: Zip bomb prevention
            SecureContentHandler sch = new SecureContentHandler(handler, tis);
            try {
                // Parse the document
                super.parse(tis, sch, metadata, context);
            } catch (SAXException e) {
                // Convert zip bomb exceptions to TikaExceptions
                sch.throwIfCauseOf(e);
                throw e;
            }
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml =
            new XHTMLContentHandler(handler, metadata);

        TemporaryResources tmp = new TemporaryResources();
        try {
            parse(TikaInputStream.get(stream, tmp),
                    xhtml, metadata, tmp);
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tis = TikaInputStream.get(stream, tmp);
            new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile());
            new JempboxExtractor(metadata).parse(tis);
        } finally {
            tmp.dispose();
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
      
        PDDocument pdfDocument = null;
        TemporaryResources tmp = new TemporaryResources();

        try {
            // PDFBox can process entirely in memory, or can use a temp file
            //  for unpacked / processed resources
            // Decide which to do based on if we're reading from a file or not already
            TikaInputStream tstream = TikaInputStream.cast(stream);
            if (tstream != null && tstream.hasFile()) {
               // File based, take that as a cue to use a temporary file
               RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw");
               pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true);
            } else {
               // Go for the normal, stream based in-memory parsing
               pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
            }
          
            if (pdfDocument.isEncrypted()) {
                String password = null;
               
                // Did they supply a new style Password Provider?
                PasswordProvider passwordProvider = context.get(PasswordProvider.class);
                if (passwordProvider != null) {
                   password = passwordProvider.getPassword(metadata);
                }
               
                // Fall back on the old style metadata if set
                if (password == null && metadata.get(PASSWORD) != null) {
                   password = metadata.get(PASSWORD);
                }
               
                // If no password is given, use an empty string as the default
                if (password == null) {
                   password = "";
                }
              
                try {
                    pdfDocument.decrypt(password);
                } catch (Exception e) {
                    // Ignore
                }
            }
            metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
            extractMetadata(pdfDocument, metadata);
            PDF2XHTML.process(pdfDocument, handler, metadata,
                              extractAnnotationText, enableAutoSpace,
                              suppressDuplicateOverlappingText, sortByPosition);
        } finally {
            if (pdfDocument != null) {
               pdfDocument.close();
            }
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

        public void parse(
                InputStream stream, ContentHandler ignored,
                Metadata metadata, ParseContext context)
                throws IOException, SAXException, TikaException {
            TemporaryResources tmp = new TemporaryResources();
            try {
                TikaInputStream tis = TikaInputStream.get(stream, tmp);

                // Figure out what we have to process
                String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
                MediaType type = detector.detect(tis, metadata);

                if (extractor == null) {
                    // Let the handler process the embedded resource
                    handler.handle(filename, type, tis);
                } else {
                    // Use a temporary file to process the stream twice
                    File file = tis.getFile();

                    // Let the handler process the embedded resource
                    InputStream input = TikaInputStream.get(file);
                    try {
                        handler.handle(filename, type, input);
                    } finally {
                        input.close();
                    }

                    // Recurse
                    extractor.extract(tis, extractor, handler);
                }
            } finally {
                tmp.dispose();
            }
        }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            handler.characters(chars, 0, chars.length);
            handler.endElement(XHTML, "h1", "h1");
        }

        // Use the delegate parser to parse this entry
        TemporaryResources tmp = new TemporaryResources();
        try {
            final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp);
            if (stream instanceof TikaInputStream) {
                final Object container = ((TikaInputStream) stream).getOpenContainer();
                if (container != null) {
                    newStream.setOpenContainer(container);
                }
            }
            DELEGATING_PARSER.parse(
                                    newStream,
                                    new EmbeddedContentHandler(new BodyContentHandler(handler)),
                                    metadata, context);
        } catch (TikaException e) {
            // TODO: can we log a warning somehow?
            // Could not parse the entry, just skip the content
        } finally {
            tmp.close();
        }

        if(outputHtml) {
           handler.endElement(XHTML, "div", "div");
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.