Examples of TemporaryResources


Examples of org.apache.tika.io.TemporaryResources

final class EmbeddedExtractor {

  public boolean parseEmbedded(InputStream stream, Record record, String name, Command child) {
    // Use the delegate parser to parse this entry
   
    TemporaryResources tmp = new TemporaryResources();
    try {
      final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp);
      if (stream instanceof TikaInputStream) {
        final Object container = ((TikaInputStream) stream).getOpenContainer();
        if (container != null) {
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

        Record entrydata = new Record(); // TODO: or pass myself?
        //Record entrydata = record.copy();
       
        // For detectors to work, we need a mark/reset supporting
        // InputStream, which ArchiveInputStream isn't, so wrap
        TemporaryResources tmp = new TemporaryResources();
        try {
          TikaInputStream tis = TikaInputStream.get(archive, tmp);
          return extractor.parseEmbedded(tis, entrydata, name, getChild());
        } finally {
          try {
            tmp.dispose();
          } catch (TikaException e) {
            LOG.warn("Cannot dispose of tmp Tika resources", e);
          }
        }
      } else {
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            InputStream stream = binaryValue.getStream();
            if (stream instanceof SelfClosingInputStream) {
                //because of the "all detectors" approach (see below), we need to avoid a self-closing stream here
                stream = ((SelfClosingInputStream) stream).wrappedStream();
            }
            TemporaryResources tmp = new TemporaryResources();
            try {
                TikaInputStream tikaInputStream = TikaInputStream.get(stream, tmp);
                // There is content and possibly a name ...
                autoDetectedMimeType = allDetectors.detect(tikaInputStream, metadata);
            } catch (Exception e) {
                LOGGER.debug(e, "Unable to extract mime-type");
            } finally {
                try {
                    tmp.close();
                } finally {
                    if (stream != null) {
                        stream.close();
                    }
                }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            handler.characters(chars, 0, chars.length);
            handler.endElement(XHTML, "h1", "h1");
        }

        // Use the delegate parser to parse this entry
        TemporaryResources tmp = new TemporaryResources();
        try {
            DELEGATING_PARSER.parse(
                    TikaInputStream.get(new CloseShieldInputStream(stream), tmp),
                    new EmbeddedContentHandler(new BodyContentHandler(handler)),
                    metadata, context);
        } catch (TikaException e) {
            // Could not parse the entry, just skip the content
        } finally {
            tmp.close();
        }

        if(outputHtml) {
           handler.endElement(XHTML, "div", "div");
        }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tis = TikaInputStream.get(stream, tmp);
            new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile());
            new JempboxExtractor(metadata).parse(tis);
        } finally {
            tmp.dispose();
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        Parser parser = getParser(metadata, context);
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
            TaggedContentHandler taggedHandler =
                handler != null ? new TaggedContentHandler(handler) : null;
      metadata.add("X-Parsed-By", parser.getClass().getName());
            try {
                parser.parse(taggedStream, taggedHandler, metadata, context);
            } catch (RuntimeException e) {
                throw new TikaException(
                        "Unexpected RuntimeException from " + parser, e);
            } catch (IOException e) {
                taggedStream.throwIfCauseOf(e);
                throw new TikaException(
                        "TIKA-198: Illegal IOException from " + parser, e);
            } catch (SAXException e) {
                if (taggedHandler != null) taggedHandler.throwIfCauseOf(e);
                throw new TikaException(
                        "TIKA-237: Illegal SAXException from " + parser, e);
            }
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tis = TikaInputStream.get(stream, tmp);

            // Automatically detect the MIME type of the document
            MediaType type = detector.detect(tis, metadata);
            metadata.set(Metadata.CONTENT_TYPE, type.toString());

            // TIKA-216: Zip bomb prevention
            SecureContentHandler sch =
                handler != null ? new SecureContentHandler(handler, tis) : null;
            try {
                // Parse the document
                super.parse(tis, sch, metadata, context);
            } catch (SAXException e) {
                // Convert zip bomb exceptions to TikaExceptions
                sch.throwIfCauseOf(e);
                throw e;
            }
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml =
            new XHTMLContentHandler(handler, metadata);

        TemporaryResources tmp = new TemporaryResources();
        try {
            parse(TikaInputStream.get(stream, tmp),
                    xhtml, metadata, tmp);
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
      
        PDDocument pdfDocument = null;
        TemporaryResources tmp = new TemporaryResources();
        //config from context, or default if not set via context
        PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig);
        try {
            // PDFBox can process entirely in memory, or can use a temp file
            //  for unpacked / processed resources
            // Decide which to do based on if we're reading from a file or not already
            TikaInputStream tstream = TikaInputStream.cast(stream);
            if (tstream != null && tstream.hasFile()) {
                // File based, take that as a cue to use a temporary file
                RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw");
                if (localConfig.getUseNonSequentialParser() == true) {
                    pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile);
                } else {
                    pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true);
                }
            } else {
                // Go for the normal, stream based in-memory parsing
                if (localConfig.getUseNonSequentialParser() == true) {
                    pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), new RandomAccessBuffer());
                } else {
                    pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
                }
            }
           
          
            if (pdfDocument.isEncrypted()) {
                String password = null;
               
                // Did they supply a new style Password Provider?
                PasswordProvider passwordProvider = context.get(PasswordProvider.class);
                if (passwordProvider != null) {
                   password = passwordProvider.getPassword(metadata);
                }
               
                // Fall back on the old style metadata if set
                if (password == null && metadata.get(PASSWORD) != null) {
                   password = metadata.get(PASSWORD);
                }
               
                // If no password is given, use an empty string as the default
                if (password == null) {
                   password = "";
                }
              
                try {
                    pdfDocument.decrypt(password);
                } catch (Exception e) {
                    // Ignore
                }
            }
            metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
            extractMetadata(pdfDocument, metadata);
            if (handler != null) {
                PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
            }
           
        } finally {
            if (pdfDocument != null) {
               pdfDocument.close();
            }
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

                entrydata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, name);
            }
            if (extractor.shouldParseEmbedded(entrydata)) {
                // For detectors to work, we need a mark/reset supporting
                // InputStream, which ArchiveInputStream isn't, so wrap
                TemporaryResources tmp = new TemporaryResources();
                try {
                    TikaInputStream tis = TikaInputStream.get(archive, tmp);
                    extractor.parseEmbedded(tis, xhtml, entrydata, true);
                } finally {
                    tmp.dispose();
                }
            }
        } else if (name != null && name.length() > 0) {
            xhtml.element("p", name);
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.