Examples of TikaInputStream


Examples of org.apache.tika.io.TikaInputStream

        assertEquals(
          MediaType.application("vnd.ms-powerpoint"),
          d.detect(input, new Metadata())
        );
       
        TikaInputStream tis = TikaInputStream.get(getTestDoc("testPPT.ppt"));
        assertEquals(
          MediaType.application("vnd.ms-powerpoint"),
          d.detect(tis, new Metadata())
        );
       
        assertNotNull(tis.getOpenContainer());
        assertEquals(POIFSFileSystem.class, tis.getOpenContainer().getClass());
       
        // Try some ones that POI doesn't handle, that are still OLE2 based
        tis = TikaInputStream.get(getTestDoc("testWORKS.wps"));
        assertEquals(
           MediaType.application("vnd.ms-works"),
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

        assertEquals(
          MediaType.application("vnd.oasis.opendocument.formula"),
          d.detect(input, new Metadata())
        );
       
        TikaInputStream tis = TikaInputStream.get(getTestDoc("testOpenOffice2.odf"));
        assertEquals(
          MediaType.application("vnd.oasis.opendocument.formula"),
          d.detect(tis, new Metadata())
        );
        // Doesn't store the zip parser yet
        assertNull(tis.getOpenContainer());
    }
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

              MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation"),
              d.detect(input, new Metadata())
        );

        // Try with a tika input stream
        TikaInputStream tis = TikaInputStream.get(getTestDoc("testPPT.pptx"));
        assertEquals(
              MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation"),
              d.detect(tis, new Metadata())
        );
       
        // There should be an attached OPCPackage as an open container
        assertNotNull(tis.getOpenContainer());
        assertTrue(
              "Open container should be OPCPackage, not " + tis.getOpenContainer().getClass(),
              tis.getOpenContainer() instanceof OPCPackage
        );
       
        // The underlying TikaInputStream should still be open, and file based
        assertTrue(
              "TikaInputStream should still have a file",
              tis.hasFile()
        );
    }
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

              tis.hasFile()
        );
    }
   
    public void testDetectIWork() throws Exception {
       TikaInputStream tis;

       tis = TikaInputStream.get(getTestDoc("testKeynote.key"));
       assertEquals(
             MediaType.application("vnd.apple.keynote"),
             d.detect(tis, new Metadata())
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

             d.detect(tis, new Metadata())
       );
    }
   
    public void testDetectZip() throws Exception {
       TikaInputStream tis;

       tis = TikaInputStream.get(getTestDoc("test-documents.zip"));
       assertEquals(
             MediaType.application("zip"),
             d.detect(tis, new Metadata())
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

    }

    public MediaType detect(InputStream input, Metadata metadata)
             throws IOException {
        if (TikaInputStream.isTikaInputStream(input)) {
            TikaInputStream stream = TikaInputStream.get(input);
            return detect(stream, metadata);
        } else {
           // We can't do proper detection if we don't
           //  have a TikaInputStream
           return DEFAULT;
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

      public void write(OutputStream outputStream) throws IOException, WebApplicationException {
        Writer writer = new OutputStreamWriter(outputStream, "UTF-8");

        BodyContentHandler body = new BodyContentHandler(new RichTextContentHandler(writer));

        TikaInputStream tis = TikaInputStream.get(is);

        try {
          tis.getFile();

          parser.parse(tis, body, metadata);
        } catch (SAXException e) {
          throw new WebApplicationException(e);
        } catch (EncryptedDocumentException e) {
          logger.warn(String.format(
                  "%s: Encrypted document",
                  info.getPath()
          ), e);

          throw new WebApplicationException(e, Response.status(422).build());
        } catch (TikaException e) {
          logger.warn(String.format(
            "%s: Text extraction failed",
            info.getPath()
          ), e);

          if (e.getCause()!=null && e.getCause() instanceof WebApplicationException) {
            throw (WebApplicationException) e.getCause();
          }

          if (e.getCause()!=null && e.getCause() instanceof IllegalStateException) {
            throw new WebApplicationException(Response.status(422).build());
          }

          if (e.getCause()!=null && e.getCause() instanceof OldWordFileFormatException) {
            throw new WebApplicationException(Response.status(422).build());
          }

          throw new WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR);
        } finally {
          tis.close();
        }
      }
    };
  }
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

            throws IOException, SAXException, TikaException {
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        final DirectoryNode root;
        TikaInputStream tstream = TikaInputStream.cast(stream);
        if (tstream == null) {
            root = new NPOIFSFileSystem(new CloseShieldInputStream(stream)).getRoot();
        } else {
            final Object container = tstream.getOpenContainer();
            if (container instanceof NPOIFSFileSystem) {
                root = ((NPOIFSFileSystem) container).getRoot();
            } else if (container instanceof DirectoryNode) {
                root = (DirectoryNode) container;
            } else if (tstream.hasFile()) {
                root = new NPOIFSFileSystem(tstream.getFileChannel()).getRoot();
            } else {
                root = new NPOIFSFileSystem(new CloseShieldInputStream(tstream)).getRoot();
            }
        }
        parse(root, context, metadata, xhtml);
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

    private void handleEmbeddedOLE(PackagePart part, ContentHandler handler, String rel)
            throws IOException, SAXException {
        POIFSFileSystem fs = new POIFSFileSystem(part.getInputStream());
        try {
            Metadata metadata = new Metadata();
            TikaInputStream stream = null;
            metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, rel);

            DirectoryNode root = fs.getRoot();
            POIFSDocumentType type = POIFSDocumentType.detectType(root);
           
View Full Code Here

Examples of org.apache.tika.io.TikaInputStream

            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        TrueTypeFont font;
        TTFParser parser = new TTFParser();
        TikaInputStream tis = TikaInputStream.cast(stream);
        if (tis != null && tis.hasFile()) {
            font = parser.parseTTF(tis.getFile());
        } else {
            font = parser.parseTTF(stream);
        }

        metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.