Examples of AutoDetectParser


Examples of org.apache.tika.parser.AutoDetectParser

*/
public class Bzip2ParserTest extends AbstractPkgTest {

    @Test
    public void testBzip2Parsing() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = Bzip2ParserTest.class.getResourceAsStream(
                "/test-documents/test-documents.tbz2");
        try {
            parser.parse(stream, handler, metadata, recursingContext);
        } finally {
            stream.close();
        }

        assertEquals("application/x-bzip2", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

     * Tests that the ParseContext parser is correctly
     *  fired for all the embedded entries.
     */
    @Test
    public void testEmbedded() throws Exception {
       Parser parser = new AutoDetectParser(); // Should auto-detect!
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();

       InputStream stream = ZipParserTest.class.getResourceAsStream(
               "/test-documents/test-documents.tbz2");
       try {
           parser.parse(stream, handler, metadata, trackingContext);
       } finally {
           stream.close();
       }
      
       // Should find a single entry, for the (compressed) tar file
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

*/
public class GzipParserTest extends AbstractPkgTest {

    @Test
    public void testGzipParsing() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = GzipParserTest.class.getResourceAsStream(
                "/test-documents/test-documents.tgz");
        try {
            parser.parse(stream, handler, metadata, recursingContext);
        } finally {
            stream.close();
        }

        assertEquals("application/x-gzip", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

     * Tests that the ParseContext parser is correctly
     *  fired for all the embedded entries.
     */
    @Test
    public void testEmbedded() throws Exception {
       Parser parser = new AutoDetectParser(); // Should auto-detect!
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();

       InputStream stream = ZipParserTest.class.getResourceAsStream(
               "/test-documents/test-documents.tgz");
       try {
           parser.parse(stream, handler, metadata, trackingContext);
       } finally {
           stream.close();
       }
      
       // Should find a single entry, for the (compressed) tar file
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

       assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, "ASCII"));
    }
   
    @Test
    public void testSvgzParsing() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = GzipParserTest.class.getResourceAsStream(
                "/test-documents/testSVG.svgz");
        try {
            parser.parse(stream, handler, metadata, recursingContext);
        } finally {
            stream.close();
        }

        assertEquals("application/x-gzip", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

*/
public class ZipParserTest extends AbstractPkgTest {

    @Test
    public void testZipParsing() throws Exception {
        Parser parser = new AutoDetectParser(); // Should auto-detect!
        ContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        InputStream stream = ZipParserTest.class.getResourceAsStream(
                "/test-documents/test-documents.zip");
        try {
            parser.parse(stream, handler, metadata, recursingContext);
        } finally {
            stream.close();
        }

        assertEquals("application/zip", metadata.get(Metadata.CONTENT_TYPE));
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

     * Tests that the ParseContext parser is correctly
     *  fired for all the embedded entries.
     */
    @Test
    public void testEmbedded() throws Exception {
       Parser parser = new AutoDetectParser(); // Should auto-detect!
       ContentHandler handler = new BodyContentHandler();
       Metadata metadata = new Metadata();

       InputStream stream = ZipParserTest.class.getResourceAsStream(
               "/test-documents/test-documents.zip");
       try {
           parser.parse(stream, handler, metadata, trackingContext);
       } finally {
           stream.close();
       }
      
       // Should have found all 9 documents
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

        assertContains("<div class=\"embedded\" id=\"test1.txt\" />", xml);
        assertContains("<div class=\"embedded\" id=\"test2.txt\" />", xml);

        // Also make sure EMBEDDED_RELATIONSHIP_ID was
        // passed when parsing the embedded docs:
        Parser parser = new AutoDetectParser();
        ParseContext context = new ParseContext();
        context.set(Parser.class, parser);
        GatherRelIDsDocumentExtractor relIDs = new GatherRelIDsDocumentExtractor();
        context.set(EmbeddedDocumentExtractor.class, relIDs);
        InputStream input = getResourceAsStream("/test-documents/testEmbedded.zip");
        try {
          parser.parse(input,
                       new BodyContentHandler(),
                       new Metadata(),
                       context);
        } finally {
            input.close();
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

        InputStream input = ODFParserTest.class.getResourceAsStream(
            "/test-documents/testMasterFooter.odp");
        try {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new AutoDetectParser().parse(input, handler, metadata);
 
            String content = handler.toString();
            assertContains("Master footer is here", content);
        } finally {
            input.close();
View Full Code Here

Examples of org.apache.tika.parser.AutoDetectParser

        InputStream input = ODFParserTest.class.getResourceAsStream(
            "/test-documents/testFooter.odt");
        try {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new AutoDetectParser().parse(input, handler, metadata);
 
            String content = handler.toString();
            assertContains("Here is some text...", content);
            assertContains("Here is some text on page 2", content);
            assertContains("Here is footer text", content);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.