Package org.apache.tika.TikaTest

Examples of org.apache.tika.TikaTest.TrackingHandler


             "testEXCEL.xls", "testWORD.doc", "testPPT.ppt",
             "testVISIO.vsd", "test-outlook.msg"
       };
       for(String file : files) {
          // Process it without recursing
          TrackingHandler handler = process(file, extractor, false);
         
          // Won't have fired
          assertEquals(0, handler.filenames.size());
          assertEquals(0, handler.mediaTypes.size());
         
View Full Code Here


     *  office files in them
     */
    @Test
    public void testEmbeddedImages() throws Exception {
       ContainerExtractor extractor = new ParserContainerExtractor();
       TrackingHandler handler;
      
       // Excel with 1 image
       handler = process("testEXCEL_1img.xls", extractor, false);
       assertEquals(1, handler.filenames.size());
       assertEquals(1, handler.mediaTypes.size());
View Full Code Here

     *           -> image
     */
    @Test
    public void testEmbeddedOfficeFiles() throws Exception {
       ContainerExtractor extractor = new ParserContainerExtractor();
       TrackingHandler handler;
      
      
       // Excel with a word doc and a powerpoint doc, both of which have images in them
       // Without recursion, should see both documents + the images
       handler = process("testEXCEL_embeded.xls", extractor, false);
View Full Code Here

    }

    @Test
    public void testEmbeddedOfficeFilesXML() throws Exception {
        ContainerExtractor extractor = new ParserContainerExtractor();
        TrackingHandler handler;

        handler = process("EmbeddedDocument.docx", extractor, false);
        assertTrue(handler.filenames.contains("Microsoft_Office_Excel_97-2003_Worksheet1.bin"));
        assertEquals(2, handler.filenames.size());
    }
View Full Code Here

    }

    @Test
    public void testPowerpointImages() throws Exception {
        ContainerExtractor extractor = new ParserContainerExtractor();
        TrackingHandler handler;

        handler = process("pictures.ppt", extractor, false);
        assertTrue(handler.mediaTypes.contains(new MediaType("image", "jpeg")));
        assertTrue(handler.mediaTypes.contains(new MediaType("image", "png")));
    }
View Full Code Here

       assertEmbeddedFiles(1 /* thumbnail as default */, "testPPT.pptx" );
    }

    private void assertEmbeddedFiles(int expectedNbFiles, String file ) throws Exception {
    // Process it without recursing
      TrackingHandler handler = process(file, extractor, false);
     
      // Won't have fired
      assertEquals(expectedNbFiles, handler.filenames.size());
      assertEquals(expectedNbFiles, handler.mediaTypes.size());
     
View Full Code Here

     * Office files with embedded images, but no other
     *  office files in them
     */
    @Test
    public void testEmbeddedImages() throws Exception {
       TrackingHandler handler;
      
       // Excel with 1 image
       handler = process("testEXCEL_1img.xlsx", extractor, false);
       assertEquals(1, handler.filenames.size());
       assertEquals(1, handler.mediaTypes.size());
View Full Code Here

     *       -> excel
     *           -> image
     */
    @Test
    public void testEmbeddedOfficeFiles() throws Exception {
       TrackingHandler handler;
      
      
       // Excel with a word doc and a powerpoint doc, both of which have images in them
       // Without recursion, should see both documents + the images
       handler = process("testEXCEL_embeded.xlsx", extractor, false);
View Full Code Here

       assertEquals(TYPE_EMF, handler.mediaTypes.get(8))// Icon of embedded office doc
    }

    @Test
    public void testEmbeddedOutlook() throws Exception {
        TrackingHandler handler =
                process("EmbeddedOutlook.docx", extractor, false);

        assertEquals(2, handler.filenames.size());
        assertEquals(2, handler.mediaTypes.size());
View Full Code Here

        assertEquals(TYPE_MSG, handler.mediaTypes.get(1));
    }

    @Test
    public void testEmbeddedPDF() throws Exception {
        TrackingHandler handler =
                process("EmbeddedPDF.docx", extractor, false);

        assertEquals(2, handler.filenames.size());
        assertEquals(2, handler.mediaTypes.size());
View Full Code Here

TOP

Related Classes of org.apache.tika.TikaTest.TrackingHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.