Package org.apache.poi

Examples of org.apache.poi.POITextExtractor


    InputStream stream = null;

    try {
      stream = rawDocument.getContentAsStream();
      POITextExtractor contentExtractor = ExtractorFactory.createExtractor(stream);
      setCleanedContent(contentExtractor.getText());
      POITextExtractor metadataExtractor = contentExtractor.getMetadataTextExtractor();

      Map<String, String> metaDataMap = createMetaDataMap(metadataExtractor.getText());
//      if (mLog.isDebugEnabled()) {
//      mLog.info("Found meta data ::" + metadataExtractor.getText()
//              + ":: in " + rawDocument.getUrl());
//      }
View Full Code Here


      is = new ByteArrayInputStream(str.getBytes());
    } else {
      throw new IllegalArgumentException("Parameter must be instance of byte[]");
    }
    String ret = null;
    POITextExtractor extractor;

    try {
      extractor = ExtractorFactory.createExtractor(is);

      ret = extractor.getText();

    } catch (IOException e) {
      throw new CRException(e);
    } catch (InvalidFormatException e) {
      throw new CRException(e);
View Full Code Here

      msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
   }

   public void testFile() throws Exception {
      // Excel
      POITextExtractor xlsExtractor = ExtractorFactory.createExtractor(xls);
      assertNotNull("Had empty extractor for " + xls, xlsExtractor);
      assertTrue("Expected instanceof ExcelExtractor, but had: " + xlsExtractor.getClass(),
            xlsExtractor
            instanceof ExcelExtractor
      );
      assertTrue(
            xlsExtractor.getText().length() > 200
      );

      assertTrue(
            ExtractorFactory.createExtractor(xlsx)
            instanceof XSSFExcelExtractor
View Full Code Here

      new ExcelExtractor(simpleXLS);
   
    POITextExtractor[] extractors =
      new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
    for (int i = 0; i < extractors.length; i++) {
      POITextExtractor extractor = extractors[i];
     
      String text = extractor.getText().replaceAll("[\r\t]", "");
      //System.out.println(text.length());
      //System.out.println(text);
      assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
      Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
      Matcher m = pattern.matcher(text);
View Full Code Here

      new ExcelExtractor(HSSFTestDataSamples.openSampleWorkbook("SampleSS.xls"));
   
    POITextExtractor[] extractors =
      new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
    for (int i = 0; i < extractors.length; i++) {
      POITextExtractor extractor = extractors[i];
     
      String text = extractor.getText().replaceAll("[\r\t]", "");
      assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
      Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
      Matcher m = pattern.matcher(text);
      assertTrue(m.matches());     
    }
View Full Code Here

      new ExcelExtractor(HSSFTestDataSamples.openSampleWorkbook("SampleSS.xls"));
   
    POITextExtractor[] extractors =
      new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
    for (int i = 0; i < extractors.length; i++) {
      POITextExtractor extractor = extractors[i];
     
      String text = extractor.getText().replaceAll("[\r\t]", "");
      assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
      Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
      Matcher m = pattern.matcher(text);
      assertTrue(m.matches());     
    }
View Full Code Here

      new ExcelExtractor(HSSFTestDataSamples.openSampleWorkbook("SampleSS.xls"));
   
    POITextExtractor[] extractors =
      new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
    for (int i = 0; i < extractors.length; i++) {
      POITextExtractor extractor = extractors[i];
     
      String text = extractor.getText().replaceAll("[\r\t]", "");
      assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
      Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
      Matcher m = pattern.matcher(text);
      assertTrue(m.matches());     
    }
View Full Code Here

         System.out.println(DIVIDER);
        
         File f = new File(args[i]);
         System.out.println(f);
        
         POITextExtractor extractor =
            ExtractorFactory.createExtractor(f);
         POITextExtractor metadataExtractor =
            extractor.getMetadataTextExtractor();
        
         System.out.println("   " + DIVIDER);
         System.out.println(metadataExtractor.getText());
         System.out.println("   " + DIVIDER);
         System.out.println(extractor.getText());
         System.out.println(DIVIDER);
      }
   }
View Full Code Here

      new ExcelExtractor(HSSFTestDataSamples.openSampleWorkbook("SampleSS.xls"));
   
    POITextExtractor[] extractors =
      new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
    for (int i = 0; i < extractors.length; i++) {
      POITextExtractor extractor = extractors[i];
     
      String text = extractor.getText().replaceAll("[\r\t]", "");
      assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
      Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
      Matcher m = pattern.matcher(text);
      assertTrue(m.matches());     
    }
View Full Code Here

      new ExcelExtractor(HSSFTestDataSamples.openSampleWorkbook("SampleSS.xls"));
   
    POITextExtractor[] extractors =
      new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
    for (int i = 0; i < extractors.length; i++) {
      POITextExtractor extractor = extractors[i];
     
      String text = extractor.getText().replaceAll("[\r\t]", "");
      assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
      Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
      Matcher m = pattern.matcher(text);
      assertTrue(m.matches());     
    }
View Full Code Here

TOP

Related Classes of org.apache.poi.POITextExtractor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.