Package org.apache.tika.parser.microsoft.ooxml

Examples of org.apache.tika.parser.microsoft.ooxml.OOXMLParser


                 throw new EncryptedDocumentException();
              }

              // Decrypt the OLE2 stream, and delegate the resulting OOXML
              //  file to the regular OOXML parser for normal handling
              OOXMLParser parser = new OOXMLParser();

              parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
                    new BodyContentHandler(xhtml)),
                    metadata, context);
           } catch (GeneralSecurityException ex) {
              throw new EncryptedDocumentException(ex);
           }
View Full Code Here


      
       // OfficeParser won't handle it
       assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
      
       // OOXMLParser won't handle it
       assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
      
       // AutoDetectParser doesn't break on it
       input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb");

       try {
View Full Code Here

      
       // OfficeParser will claim to handle it
       assertEquals(true, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
      
       // OOXMLParser won't handle it
       assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
      
       // AutoDetectParser doesn't break on it
       input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls");

       try {
View Full Code Here

                    try {
                        if (!d.verifyPassword(Decryptor.DEFAULT_PASSWORD)) {
                            throw new TikaException("Unable to process: document is encrypted");
                        }

                        OOXMLParser parser = new OOXMLParser();

                        parser.parse(d.getDataStream(filesystem), new EmbeddedContentHandler(
                                        new BodyContentHandler(xhtml)),
                                        metadata, context);
                    } catch (GeneralSecurityException ex) {
                        throw new TikaException("Unable to process encrypted document", ex);
                    }
View Full Code Here

                 throw new EncryptedDocumentException();
              }

              // Decrypt the OLE2 stream, and delegate the resulting OOXML
              //  file to the regular OOXML parser for normal handling
              OOXMLParser parser = new OOXMLParser();

              parser.parse(d.getDataStream(root), new EmbeddedContentHandler(
                    new BodyContentHandler(xhtml)),
                    metadata, context);
           } catch (GeneralSecurityException ex) {
              throw new EncryptedDocumentException(ex);
           }
View Full Code Here

        // Rich Text Format
        addConverter( new RTFParser().getSupportedTypes( parseContext ), RTFConverter.class );

        // MS Open XML Format
        addConverter( new OOXMLParser().getSupportedTypes( parseContext ),
                MSOfficeXMLConverter.class );

        // Open document format
        addConverter( new OpenDocumentParser().getSupportedTypes( parseContext ),
                OpenDocumentConverter.class );
View Full Code Here

    InputStream input;
    try {
      input = new FileInputStream(new File(f.fileName()));
      ContentHandler textHandler = new BodyContentHandler(-1);
      Metadata metadata = new Metadata();
      OOXMLParser parser = new OOXMLParser()
      ParseContext context = new ParseContext();
      parser.parse(input, textHandler, metadata, context);
      String[] result = textHandler.toString().split(regex);
      for (int i=0; i<result.length && keepRunning; i++) {
        if (interrupt) {
          processInterrupt();
        }
View Full Code Here

                    try {
                        if (!d.verifyPassword(Decryptor.DEFAULT_PASSWORD)) {
                            throw new EncryptedDocumentException();
                        }

                        OOXMLParser parser = new OOXMLParser();

                        parser.parse(d.getDataStream(filesystem), new EmbeddedContentHandler(
                                        new BodyContentHandler(xhtml)),
                                        metadata, context);
                    } catch (GeneralSecurityException ex) {
                        throw new EncryptedDocumentException(ex);
                    }
View Full Code Here

        // Rich Text Format
        addConverter( new RTFParser().getSupportedTypes( parseContext ), RTFConverter.class );

        // MS Open XML Format
        addConverter( new OOXMLParser().getSupportedTypes( parseContext ),
                MSOfficeXMLConverter.class );

        // Open document format
        addConverter( new OpenDocumentParser().getSupportedTypes( parseContext ),
                OpenDocumentConverter.class );
View Full Code Here

      
       // OfficeParser won't handle it
       assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
      
       // OOXMLParser won't handle it
       assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
      
       // AutoDetectParser doesn't break on it
       input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb");

       try {
View Full Code Here

TOP

Related Classes of org.apache.tika.parser.microsoft.ooxml.OOXMLParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.