Examples of TemporaryResources


Examples of org.apache.tika.io.TemporaryResources

                            entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
                        }
                        if (extractor.shouldParseEmbedded(entrydata)) {
                            // For detectors to work, we need a mark/reset supporting
                            //  InputStream, which ArchiveInputStream isn't, so wrap
                            TemporaryResources tmp = new TemporaryResources();
                            try {
                                TikaInputStream stream = TikaInputStream.get(archive, tmp);
                                extractor.parseEmbedded(stream, xhtml, entrydata, true);
                            } finally {
                                tmp.dispose();
                            }
                        }
                    } else if (name != null && name.length() > 0) {
                        xhtml.element("p", name);
                    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

      return;
    }
   
    // first set up and run GDAL
    // process the command
    TemporaryResources tmp = new TemporaryResources();
    TikaInputStream tis = TikaInputStream.get(stream, tmp);

    String runCommand = processCommand(tis);
    String output = execCommand(new String[] { runCommand });
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

        IsoFile isoFile;
       
        // The MP4Parser library accepts either a File, or a byte array
        // As MP4 video files are typically large, always use a file to
        //  avoid OOMs that may occur with in-memory buffering
        TemporaryResources tmp = new TemporaryResources();
        TikaInputStream tstream = TikaInputStream.get(stream, tmp);
        try {
            isoFile = new IsoFile(new DirectFileReadDataSource(tstream.getFile()));
            tmp.addResource(isoFile);

            // Grab the file type box
            FileTypeBox fileType = getOrNull(isoFile, FileTypeBox.class);
            if (fileType != null) {
               // Identify the type
               MediaType type = MediaType.application("mp4");
               for (MediaType t : typesMap.keySet()) {
                  if (typesMap.get(t).contains(fileType.getMajorBrand())) {
                     type = t;
                     break;
                  }
               }
               metadata.set(Metadata.CONTENT_TYPE, type.toString());

               if (type.getType().equals("audio")) {
                  metadata.set(XMPDM.AUDIO_COMPRESSOR, fileType.getMajorBrand().trim());
               }
            } else {
               // Some older QuickTime files lack the FileType
               metadata.set(Metadata.CONTENT_TYPE, "video/quicktime");
            }


            // Get the main MOOV box
            MovieBox moov = getOrNull(isoFile, MovieBox.class);
            if (moov == null) {
               // Bail out
               return;
            }


            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();


            // Pull out some information from the header box
            MovieHeaderBox mHeader = getOrNull(moov, MovieHeaderBox.class);
            if (mHeader != null) {
               // Get the creation and modification dates
               metadata.set(Metadata.CREATION_DATE, mHeader.getCreationTime());
               metadata.set(TikaCoreProperties.MODIFIED, mHeader.getModificationTime());

               // Get the duration
               double durationSeconds = ((double)mHeader.getDuration()) / mHeader.getTimescale();
               // TODO Use this

               // The timescale is normally the sampling rate
               metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)mHeader.getTimescale());
            }


            // Get some more information from the track header
            // TODO Decide how to handle multiple tracks
            List<TrackBox> tb = moov.getBoxes(TrackBox.class);
            if (tb.size() > 0) {
               TrackBox track = tb.get(0);

               TrackHeaderBox header = track.getTrackHeaderBox();
               // Get the creation and modification dates
               metadata.set(TikaCoreProperties.CREATED, header.getCreationTime());
               metadata.set(TikaCoreProperties.MODIFIED, header.getModificationTime());

               // Get the video with and height
               metadata.set(Metadata.IMAGE_WIDTH,  (int)header.getWidth());
               metadata.set(Metadata.IMAGE_LENGTH, (int)header.getHeight());

               // Get the sample information
               SampleTableBox samples = track.getSampleTableBox();
               SampleDescriptionBox sampleDesc = samples.getSampleDescriptionBox();
               if (sampleDesc != null) {
                  // Look for the first Audio Sample, if present
                  AudioSampleEntry sample = getOrNull(sampleDesc, AudioSampleEntry.class);
                  if (sample != null) {
                     XMPDM.ChannelTypePropertyConverter.convertAndSet(metadata, sample.getChannelCount());
                     //metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, sample.getSampleSize());    // TODO Num -> Type mapping
                     metadata.set(XMPDM.AUDIO_SAMPLE_RATE, (int)sample.getSampleRate());
                     //metadata.set(XMPDM.AUDIO_, sample.getSamplesPerPacket());
                     //metadata.set(XMPDM.AUDIO_, sample.getBytesPerSample());
                  }
               }
            }

            // Get metadata from the User Data Box
            UserDataBox userData = getOrNull(moov, UserDataBox.class);
            if (userData != null) {
               MetaBox meta = getOrNull(userData, MetaBox.class);

               // Check for iTunes Metadata
               // See http://atomicparsley.sourceforge.net/mpeg-4files.html and
               //  http://code.google.com/p/mp4v2/wiki/iTunesMetadata for more on these
               AppleItemListBox apple = getOrNull(meta, AppleItemListBox.class);
               if (apple != null) {
                  // Title
                  AppleNameBox title = getOrNull(apple, AppleNameBox.class);
                  addMetadata(TikaCoreProperties.TITLE, metadata, title);

                  // Artist
                  AppleArtistBox artist = getOrNull(apple, AppleArtistBox.class);
                  addMetadata(TikaCoreProperties.CREATOR, metadata, artist);
                  addMetadata(XMPDM.ARTIST, metadata, artist);

                  // Album Artist
                  AppleArtist2Box artist2 = getOrNull(apple, AppleArtist2Box.class);
                  addMetadata(XMPDM.ALBUM_ARTIST, metadata, artist2);

                  // Album
                  AppleAlbumBox album = getOrNull(apple, AppleAlbumBox.class);
                  addMetadata(XMPDM.ALBUM, metadata, album);

                  // Composer
                  AppleTrackAuthorBox composer = getOrNull(apple, AppleTrackAuthorBox.class);
                  addMetadata(XMPDM.COMPOSER, metadata, composer);

                  // Genre
                  AppleGenreBox genre = getOrNull(apple, AppleGenreBox.class);
                  addMetadata(XMPDM.GENRE, metadata, genre);

                  // Year
                  AppleRecordingYear2Box year = getOrNull(apple, AppleRecordingYear2Box.class);
                  if (year != null) {
                      metadata.set(XMPDM.RELEASE_DATE, year.getValue());
                  }

                  // Track number
                  AppleTrackNumberBox trackNum = getOrNull(apple, AppleTrackNumberBox.class);
                  if (trackNum != null) {
                     metadata.set(XMPDM.TRACK_NUMBER, trackNum.getA());
                     //metadata.set(XMPDM.NUMBER_OF_TRACKS, trackNum.getB()); // TODO
                  }

                  // Disc number
                  AppleDiskNumberBox discNum = getOrNull(apple, AppleDiskNumberBox.class);
                  if (discNum != null) {
                     metadata.set(XMPDM.DISC_NUMBER, discNum.getA());
                  }

                  // Compilation
                  AppleCompilationBox compilation = getOrNull(apple, AppleCompilationBox.class);
                  if (compilation != null) {
                      metadata.set(XMPDM.COMPILATION, (int)compilation.getValue());
                  }

                  // Comment
                  AppleCommentBox comment = getOrNull(apple, AppleCommentBox.class);
                  addMetadata(XMPDM.LOG_COMMENT, metadata, comment);

                  // Encoder
                  AppleEncoderBox encoder = getOrNull(apple, AppleEncoderBox.class);
                  if (encoder != null) {
                      metadata.set(XMP.CREATOR_TOOL, encoder.getValue());
                  }


                  // As text
                  for (Box box : apple.getBoxes()) {
                     if (box instanceof Utf8AppleDataBox) {
                        xhtml.element("p", ((Utf8AppleDataBox)box).getValue());
                     }
                  }
               }

               // TODO Check for other kinds too
            }

            // All done
            xhtml.endDocument();

        } finally {
            tmp.dispose();
        }

    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        Parser parser = getParser(metadata, context);
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
            TaggedContentHandler taggedHandler =
                handler != null ? new TaggedContentHandler(handler) : null;
            if (parser instanceof ParserDecorator){
                metadata.add("X-Parsed-By", ((ParserDecorator) parser).getWrappedParser().getClass().getName());
            } else {
                metadata.add("X-Parsed-By", parser.getClass().getName());
            }
            try {
                parser.parse(taggedStream, taggedHandler, metadata, context);
            } catch (RuntimeException e) {
                throw new TikaException(
                        "Unexpected RuntimeException from " + parser, e);
            } catch (IOException e) {
                taggedStream.throwIfCauseOf(e);
                throw new TikaException(
                        "TIKA-198: Illegal IOException from " + parser, e);
            } catch (SAXException e) {
                if (taggedHandler != null) taggedHandler.throwIfCauseOf(e);
                throw new TikaException(
                        "TIKA-237: Illegal SAXException from " + parser, e);
            }
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tis = TikaInputStream.get(stream, tmp);

            // Automatically detect the MIME type of the document
            MediaType type = detector.detect(tis, metadata);
            metadata.set(Metadata.CONTENT_TYPE, type.toString());

            // TIKA-216: Zip bomb prevention
            SecureContentHandler sch =
                handler != null ? new SecureContentHandler(handler, tis) : null;
            try {
                // Parse the document
                super.parse(tis, sch, metadata, context);
            } catch (SAXException e) {
                // Convert zip bomb exceptions to TikaExceptions
                sch.throwIfCauseOf(e);
                throw e;
            }
        } finally {
            tmp.dispose();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
      
        PDDocument pdfDocument = null;
        TemporaryResources tmp = new TemporaryResources();
        //config from context, or default if not set via context
        PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig);
        try {
            // PDFBox can process entirely in memory, or can use a temp file
            //  for unpacked / processed resources
            // Decide which to do based on if we're reading from a file or not already
            TikaInputStream tstream = TikaInputStream.cast(stream);
            if (tstream != null && tstream.hasFile()) {
                // File based, take that as a cue to use a temporary file
                RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw");
                if (localConfig.getUseNonSequentialParser() == true) {
                    pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile);
                } else {
                    pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true);
                }
            } else {
                // Go for the normal, stream based in-memory parsing
                if (localConfig.getUseNonSequentialParser() == true) {
                    pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), new RandomAccessBuffer());
                } else {
                    pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
                }
            }

            metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted()));

            if (pdfDocument.isEncrypted()) {
                String password = null;
               
                // Did they supply a new style Password Provider?
                PasswordProvider passwordProvider = context.get(PasswordProvider.class);
                if (passwordProvider != null) {
                   password = passwordProvider.getPassword(metadata);
                }
               
                // Fall back on the old style metadata if set
                if (password == null && metadata.get(PASSWORD) != null) {
                   password = metadata.get(PASSWORD);
                }
               
                // If no password is given, use an empty string as the default
                if (password == null) {
                   password = "";
                }
              
                try {
                    pdfDocument.decrypt(password);
                } catch (Exception e) {
                    // Ignore
                }
            }
            metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
            extractMetadata(pdfDocument, metadata);
            if (handler != null) {
                PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
            }
           
        } finally {
            if (pdfDocument != null) {
               pdfDocument.close();
            }
            tmp.dispose();
            //TODO: once we migrate to PDFBox 2.0, remove this (PDFBOX-2200)
            PDFont.clearResources();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

        AttributesImpl attributes = new AttributesImpl();
        attributes.addAttribute("", "class", "class", "CDATA", "embedded");
        attributes.addAttribute("", "id", "id", "CDATA", filename);
        xhtml.startElement("div", attributes);
        if (embeddedExtractor.shouldParseEmbedded(attachMeta)) {
          TemporaryResources tmp = new TemporaryResources();
          try {
            TikaInputStream tis = TikaInputStream.get(attach.getFileInputStream(), tmp);
            embeddedExtractor.parseEmbedded(tis, xhtml, attachMeta, true);
          } finally {
            tmp.dispose();
          }
        }
        xhtml.endElement("div");

      } catch (Exception e) {
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

        // Ensure that the stream supports the mark feature
        if (! TikaInputStream.isTikaInputStream(stream))
            stream = new BufferedInputStream(stream);
       
       
        TemporaryResources tmp = new TemporaryResources();
        ArchiveInputStream ais = null;
        try {
            ArchiveStreamFactory factory = context.get(ArchiveStreamFactory.class, new ArchiveStreamFactory());
            // At the end we want to close the archive stream to release
            // any associated resources, but the underlying document stream
            // should not be closed
            ais = factory.createArchiveInputStream(new CloseShieldInputStream(stream));
           
        } catch (StreamingNotSupportedException sne) {
            // Most archive formats work on streams, but a few need files
            if (sne.getFormat().equals(ArchiveStreamFactory.SEVEN_Z)) {
                // Rework as a file, and wrap
                stream.reset();
                TikaInputStream tstream = TikaInputStream.get(stream, tmp);
               
                // Pending a fix for COMPRESS-269, this bit is a little nasty
                ais = new SevenZWrapper(new SevenZFile(tstream.getFile()));
               
            } else {
                tmp.close();
                throw new TikaException("Unknown non-streaming format " + sne.getFormat(), sne);
            }
        } catch (ArchiveException e) {
            tmp.close();
            throw new TikaException("Unable to unpack document stream", e);
        }

        MediaType type = getMediaType(ais);
        if (!type.equals(MediaType.OCTET_STREAM)) {
            metadata.set(CONTENT_TYPE, type.toString());
        }

        // Use the delegate parser to parse the contained document
        EmbeddedDocumentExtractor extractor = context.get(
                EmbeddedDocumentExtractor.class,
                new ParsingEmbeddedDocumentExtractor(context));

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        try {
            ArchiveEntry entry = ais.getNextEntry();
            while (entry != null) {
                if (!entry.isDirectory()) {
                    parseEntry(ais, entry, extractor, xhtml);
                }
                entry = ais.getNextEntry();
            }
        } finally {
            ais.close();
            tmp.close();
        }

        xhtml.endDocument();
    }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

            }
           
            if (extractor.shouldParseEmbedded(entrydata)) {
                // For detectors to work, we need a mark/reset supporting
                // InputStream, which ArchiveInputStream isn't, so wrap
                TemporaryResources tmp = new TemporaryResources();
                try {
                    TikaInputStream tis = TikaInputStream.get(archive, tmp);
                    extractor.parseEmbedded(tis, xhtml, entrydata, true);
                } finally {
                    tmp.dispose();
                }
            }
        } else if (name != null && name.length() > 0) {
            xhtml.element("p", name);
        }
View Full Code Here

Examples of org.apache.tika.io.TemporaryResources

  }

  public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException,
      SAXException, TikaException {

    TemporaryResources tmp = new TemporaryResources();
    FileOutputStream fos = null;
    TikaInputStream tis = null;
    try {
      int w = image.getWidth(null);
      int h = image.getHeight(null);
      BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB);
      Graphics2D g2 = bImage.createGraphics();
      g2.drawImage(image, 0, 0, null);
      g2.dispose();
      File file = tmp.createTemporaryFile();
      fos = new FileOutputStream(file);
      ImageIO.write(bImage, "png", fos);
      bImage = null;
      tis = TikaInputStream.get(file);
      parse(tis, handler, metadata, context);

    } finally {
      tmp.dispose();
      if (tis != null)
        tis.close();
      if (fos != null)
        fos.close();
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.