Package org.apache.tika.parser.odf

Examples of org.apache.tika.parser.odf.OpenDocumentParser


                parsers.put(MediaType.application("vnd.ms-excel"), parser);
                parsers.put(MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document"), parser);
                parsers.put(MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation"), parser);
                parsers.put(MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"), parser);
            } else if (name.equals("org.apache.jackrabbit.extractor.OpenOfficeTextExtractor")) {
                Parser parser = new OpenDocumentParser();
                parsers.put(MediaType.application("vnd.oasis.opendocument.database"), parser);
                parsers.put(MediaType.application("vnd.oasis.opendocument.formula"), parser);
                parsers.put(MediaType.application("vnd.oasis.opendocument.graphics"), parser);
                parsers.put(MediaType.application("vnd.oasis.opendocument.presentation"), parser);
                parsers.put(MediaType.application("vnd.oasis.opendocument.spreadsheet"), parser);
                parsers.put(MediaType.application("vnd.oasis.opendocument.text"), parser);
                parsers.put(MediaType.application("vnd.sun.xml.calc"), parser);
                parsers.put(MediaType.application("vnd.sun.xml.draw"), parser);
                parsers.put(MediaType.application("vnd.sun.xml.impress"), parser);
                parsers.put(MediaType.application("vnd.sun.xml.writer"), parser);
            } else if (name.equals("org.apache.jackrabbit.extractor.PdfTextExtractor")) {
                parsers.put(MediaType.application("pdf"), new PDFParser());
            } else if (name.equals("org.apache.jackrabbit.extractor.PlainTextExtractor")) {
                parsers.put(MediaType.TEXT_PLAIN, new TXTParser());
            } else if (name.equals("org.apache.jackrabbit.extractor.PngTextExtractor")) {
                Parser parser = new ImageParser();
                parsers.put(MediaType.image("png"), parser);
                parsers.put(MediaType.image("apng"), parser);
                parsers.put(MediaType.image("mng"), parser);
            } else if (name.equals("org.apache.jackrabbit.extractor.RTFTextExtractor")) {
                Parser parser = new RTFParser();
                parsers.put(MediaType.application("rtf"), parser);
                parsers.put(MediaType.text("rtf"), parser);
            } else if (name.equals("org.apache.jackrabbit.extractor.XMLTextExtractor")) {
                Parser parser = new XMLParser();
                parsers.put(MediaType.APPLICATION_XML, parser);
                parsers.put(MediaType.text("xml"), parser);
            } else {
                logger.warn("Ignoring unknown text extractor class: {}", name);
            }
        }

        parser.setParsers(parsers);
    }
View Full Code Here


        InputStream input = ODFParserTest.class.getResourceAsStream(
                "/test-documents/testODFwithOOo3.odt");
        try {
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new OpenDocumentParser().parse(input, handler, metadata);

            assertEquals(
                    "application/vnd.oasis.opendocument.text",
                    metadata.get(Metadata.CONTENT_TYPE));
           
View Full Code Here

        // MS Open XML Format
        addConverter( new OOXMLParser().getSupportedTypes( parseContext ),
                MSOfficeXMLConverter.class );

        // Open document format
        addConverter( new OpenDocumentParser().getSupportedTypes( parseContext ),
                OpenDocumentConverter.class );
    }
View Full Code Here

                parsers.put("application/vnd.openxmlformats-officedocument.wordprocessingml.document", parser);
                parsers.put("application/vnd.openxmlformats-officedocument.presentationml.presentation", parser);
                parsers.put("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.OpenOfficeTextExtractor")) {
                Parser parser = new OpenDocumentParser();
                parsers.put("application/vnd.oasis.opendocument.database", parser);
                parsers.put("application/vnd.oasis.opendocument.formula", parser);
                parsers.put("application/vnd.oasis.opendocument.graphics", parser);
                parsers.put("application/vnd.oasis.opendocument.presentation", parser);
                parsers.put("application/vnd.oasis.opendocument.spreadsheet", parser);
                parsers.put("application/vnd.oasis.opendocument.text", parser);
                parsers.put("application/vnd.sun.xml.calc", parser);
                parsers.put("application/vnd.sun.xml.draw", parser);
                parsers.put("application/vnd.sun.xml.impress", parser);
                parsers.put("application/vnd.sun.xml.writer", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PdfTextExtractor")) {
                parsers.put("application/pdf", new PDFParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PlainTextExtractor")) {
                parsers.put("text/plain", new TXTParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PngTextExtractor")) {
                Parser parser = new ImageParser();
                parsers.put("image/png", parser);
                parsers.put("image/apng", parser);
                parsers.put("image/mng", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.RTFTextExtractor")) {
                Parser parser = new RTFParser();
                parsers.put("application/rtf", parser);
                parsers.put("text/rtf", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.XMLTextExtractor")) {
                Parser parser = new XMLParser();
                parsers.put("application/xml", parser);
                parsers.put("text/xml", parser);
            } else {
                logger.warn("Ignoring unknown text extractor class: {}", name);
            }
        }

        parser.setParsers(parsers);
    }
View Full Code Here

                parsers.put("application/vnd.openxmlformats-officedocument.wordprocessingml.document", parser);
                parsers.put("application/vnd.openxmlformats-officedocument.presentationml.presentation", parser);
                parsers.put("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.OpenOfficeTextExtractor")) {
                Parser parser = new OpenDocumentParser();
                parsers.put("application/vnd.oasis.opendocument.database", parser);
                parsers.put("application/vnd.oasis.opendocument.formula", parser);
                parsers.put("application/vnd.oasis.opendocument.graphics", parser);
                parsers.put("application/vnd.oasis.opendocument.presentation", parser);
                parsers.put("application/vnd.oasis.opendocument.spreadsheet", parser);
                parsers.put("application/vnd.oasis.opendocument.text", parser);
                parsers.put("application/vnd.sun.xml.calc", parser);
                parsers.put("application/vnd.sun.xml.draw", parser);
                parsers.put("application/vnd.sun.xml.impress", parser);
                parsers.put("application/vnd.sun.xml.writer", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PdfTextExtractor")) {
                parsers.put("application/pdf", new PDFParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PlainTextExtractor")) {
                parsers.put("text/plain", new TXTParser());
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.PngTextExtractor")) {
                Parser parser = new ImageParser();
                parsers.put("image/png", parser);
                parsers.put("image/apng", parser);
                parsers.put("image/mng", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.RTFTextExtractor")) {
                Parser parser = new RTFParser();
                parsers.put("application/rtf", parser);
                parsers.put("text/rtf", parser);
            } else if (name.equals(
                    "org.apache.jackrabbit.extractor.XMLTextExtractor")) {
                Parser parser = new XMLParser();
                parsers.put("application/xml", parser);
                parsers.put("text/xml", parser);
            } else {
                logger.warn("Ignoring unknown text extractor class: {}", name);
            }
        }

        parser.setParsers(parsers);
    }
View Full Code Here

    InputStream input;
    try {
      input = new FileInputStream(new File(f.fileName()));
      ContentHandler textHandler = new BodyContentHandler(-1);
      Metadata metadata = new Metadata();
      OpenDocumentParser parser = new OpenDocumentParser()
      ParseContext context = new ParseContext();
      parser.parse(input, textHandler, metadata, context);
      String[] result = textHandler.toString().split(regex);
      for (int i=0; i<result.length && keepRunning; i++) {
        if (interrupt) {
          processInterrupt();
        }
View Full Code Here

        // MS Open XML Format
        addConverter( new OOXMLParser().getSupportedTypes( parseContext ),
                MSOfficeXMLConverter.class );

        // Open document format
        addConverter( new OpenDocumentParser().getSupportedTypes( parseContext ),
                OpenDocumentConverter.class );
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.parser.odf.OpenDocumentParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.