Examples of TrackingHandler


Examples of org.apache.tika.TikaTest.TrackingHandler

             "testEXCEL.xls", "testWORD.doc", "testPPT.ppt",
             "testVISIO.vsd", "test-outlook.msg"
       };
       for(String file : files) {
          // Process it without recursing
          TrackingHandler handler = process(file, extractor, false);
         
          // Won't have fired
          assertEquals(0, handler.filenames.size());
          assertEquals(0, handler.mediaTypes.size());
         
View Full Code Here

Examples of org.apache.tika.TikaTest.TrackingHandler

     *  office files in them
     */
    @Test
    public void testEmbeddedImages() throws Exception {
       ContainerExtractor extractor = new ParserContainerExtractor();
       TrackingHandler handler;
      
       // Excel with 1 image
       handler = process("testEXCEL_1img.xls", extractor, false);
       assertEquals(1, handler.filenames.size());
       assertEquals(1, handler.mediaTypes.size());
View Full Code Here

Examples of org.apache.tika.TikaTest.TrackingHandler

     *           -> image
     */
    @Test
    public void testEmbeddedOfficeFiles() throws Exception {
       ContainerExtractor extractor = new ParserContainerExtractor();
       TrackingHandler handler;
      
      
       // Excel with a word doc and a powerpoint doc, both of which have images in them
       // Without recursion, should see both documents + the images
       handler = process("testEXCEL_embeded.xls", extractor, false);
View Full Code Here

Examples of org.apache.tika.TikaTest.TrackingHandler

    }

    @Test
    public void testEmbeddedOfficeFilesXML() throws Exception {
        ContainerExtractor extractor = new ParserContainerExtractor();
        TrackingHandler handler;

        handler = process("EmbeddedDocument.docx", extractor, false);
        assertTrue(handler.filenames.contains("Microsoft_Office_Excel_97-2003_Worksheet1.bin"));
        assertEquals(2, handler.filenames.size());
    }
View Full Code Here

Examples of org.apache.tika.TikaTest.TrackingHandler

    }

    @Test
    public void testPowerpointImages() throws Exception {
        ContainerExtractor extractor = new ParserContainerExtractor();
        TrackingHandler handler;

        handler = process("pictures.ppt", extractor, false);
        assertTrue(handler.mediaTypes.contains(new MediaType("image", "jpeg")));
        assertTrue(handler.mediaTypes.contains(new MediaType("image", "png")));
    }
View Full Code Here

Examples of org.apache.tika.TikaTest.TrackingHandler

            TikaInputStream stream = TikaInputStream.get(input);

            assertEquals(true, extractor.isSupported(stream));

            // Process it
            TrackingHandler handler = new TrackingHandler();
            extractor.extract(stream, null, handler);

            assertEquals(2, handler.filenames.size());
        } finally {
            input.close();
View Full Code Here

Examples of org.apache.tika.TikaTest.TrackingHandler

        TikaInputStream stream = getTestFile(filename);
        try {
            assertEquals(true, extractor.isSupported(stream));

            // Process it
            TrackingHandler handler = new TrackingHandler();
            if(recurse) {
                extractor.extract(stream, extractor, handler);
            } else {
                extractor.extract(stream, null, handler);
            }
View Full Code Here

Examples of org.apache.tika.TikaTest.TrackingHandler

    public void testBodyAndAttachments() throws Exception {
       ContainerExtractor extractor = new ParserContainerExtractor();
      
       // Process it with recursing
       // Will have the message body RTF and the attachments
       TrackingHandler handler = process(file, extractor, true);
       assertEquals(6, handler.filenames.size());
       assertEquals(6, handler.mediaTypes.size());
      
       // We know the filenames for all of them
       assertEquals("message.rtf", handler.filenames.get(0));
View Full Code Here

Examples of org.apache.tika.parser.microsoft.AbstractPOIContainerExtractionTest.TrackingHandler

       assertTrue(pdfHaystack > -1);
       assertTrue(needle > -1);
       assertTrue(needle > pdfHaystack && pdfHaystack > outerHaystack);
      
       //plagiarized from POIContainerExtractionTest.  Thank you!
       TrackingHandler tracker = new TrackingHandler();
       TikaInputStream tis;
       ContainerExtractor ex = new ParserContainerExtractor();
       try{
          tis= TikaInputStream.get(getResourceAsStream("/test-documents/testPDFEmbeddingAndEmbedded.docx"));
          ex.extract(tis, ex, tracker);
View Full Code Here

Examples of org.apache.tika.parser.microsoft.AbstractPOIContainerExtractionTest.TrackingHandler

    public void testEmbeddedFilesInChildren() throws Exception {
        String xml = getXML("/testPDF_childAttachments.pdf").xml;
        //"regressiveness" exists only in Unit10.doc not in the container pdf document
        assertTrue(xml.contains("regressiveness"));

        TrackingHandler tracker = new TrackingHandler();
        TikaInputStream tis = null;
        ContainerExtractor ex = new ParserContainerExtractor();
        try{
            tis= TikaInputStream.get(
                getResourceAsStream("/test-documents/testPDF_childAttachments.pdf"));
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.