Package org.apache.tika.sax

Examples of org.apache.tika.sax.BasicContentHandlerFactory


    }

    @Test
    public void testBasicText() throws Exception {
        List<Metadata> list = getMetadata(new Metadata(),
                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
        Metadata container = list.get(0);
        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
        assertTrue(content.indexOf("<p ") < 0);
        assertTrue(content.indexOf("embed_0") > -1);
    }
View Full Code Here


    }

    @Test
    public void testIgnoreContent() throws Exception {
        List<Metadata> list = getMetadata(new Metadata(),
                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, -1));
        Metadata container = list.get(0);
        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
        assertNull(content);
    }
View Full Code Here

        ParseContext context = new ParseContext();
        Metadata metadata = new Metadata();
     
        Parser wrapped = new AutoDetectParser();
        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, 60));
        InputStream stream = RecursiveParserWrapperTest.class.getResourceAsStream(
                "/test-documents/test_recursive_embedded.docx");
        wrapper.parse(stream, new DefaultHandler(), metadata, context);
        List<Metadata> list = wrapper.getMetadata();
       
View Full Code Here

        Metadata metadata = new Metadata();
        String limitReached = null;
       
        Parser wrapped = new AutoDetectParser();
        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));

        InputStream stream = RecursiveParserWrapperTest.class.getResourceAsStream(
                "/test-documents/test_recursive_embedded.docx");
        wrapper.parse(stream, new DefaultHandler(), metadata, context);
        List<Metadata> list = wrapper.getMetadata();
View Full Code Here

        targets.add("test_recursive_embedded.docx/image1.emf");
       
        Metadata metadata = new Metadata();
        metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
        List<Metadata> list = getMetadata(metadata,
                new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
        Metadata container = list.get(0);
        String content = container.get(RecursiveParserWrapper.TIKA_CONTENT);
        assertTrue(content.indexOf("<p class=\"header\" />") > -1);       
       
        Set<String> seen = new HashSet<String>();
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.BasicContentHandlerFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.