Examples of parseToString()


Examples of com.dotcms.repackage.org.apache.tika.Tika.parseToString()

    // I can use the faster parseToString
    try {

      if(forceMemory){
        // no worry about the limit and less time to process.
        String content = t.parseToString(new FileInputStream(binFile), met);
        metaMap = new HashMap<String, String>();
        for (int i = 0; i < met.names().length; i++) {
          String name = met.names()[i];
          if (UtilMethods.isSet(name) && met.get(name) != null) {
            // we will want to normalize our metadata for searching
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

  public String getText() {
    if (text == null || lastTextUpdate < getFile().lastModified()) {
      try {
        Tika tika = new Tika();
        tika.setMaxStringLength(-1);
        text = tika.parseToString(getFile()).replaceAll("\n\\s*\n+","\n\n");
      } catch (TikaException e) {
        text = "";
      } catch (IOException e) {
        text = "";
      } finally {
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

        // Simple type detection
        assertEquals("text/plain", tika.detect("test.txt"));
        assertEquals("application/pdf", tika.detect("test.pdf"));

        // Simple text extraction
        String xml = tika.parseToString(new File("pom.xml"));
        assertTrue(xml.contains("tika-bundle"));

        // Package extraction
        ContentHandler handler = new BodyContentHandler();
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

        Metadata metadata = new Metadata();
        InputStream stream = TikaInputStream.get(file, metadata);

        // Test w/ default limit:
        Tika localTika = new Tika();
        String content = localTika.parseToString(stream, metadata);
        // parseToString closes for convenience:
        //stream.close();
        assertTrue(content.length() > 500);

        // Test setting max length on the instance:
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

        assertTrue(content.length() > 500);

        // Test setting max length on the instance:
        localTika.setMaxStringLength(200);
        stream = TikaInputStream.get(file, metadata);
        content = localTika.parseToString(stream, metadata);
       
        // parseToString closes for convenience:
        //stream.close();
        assertTrue(content.length() <= 200);
       
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

        //stream.close();
        assertTrue(content.length() <= 200);
       
        // Test setting max length per-call:
        stream = TikaInputStream.get(file, metadata);
        content = localTika.parseToString(stream, metadata, 100);
        // parseToString closes for convenience:
        //stream.close();
        assertTrue(content.length() <= 100);
    }
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

       Tika tika = new Tika();
       String hit = "\u0623\u0639\u0631\u0628";

       for (int i = 1; i <=4; i++){
          String fileName = "/test-documents/testHTMLNoisyMetaEncoding_"+i+".html";
          String content = tika.parseToString(
                HtmlParserTest.class.getResourceAsStream(fileName));
          assertTrue("testing: " +fileName, content.contains(hit));
       }
    }
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

        Metadata metadata = new Metadata();
        InputStream stream = TikaInputStream.get(file, metadata);

        // Test w/ default limit:
        Tika localTika = new Tika();
        String content = localTika.parseToString(stream, metadata);
        // parseToString closes for convenience:
        //stream.close();
        assertTrue(content.length() > 500);

        // Test setting max length on the instance:
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

        assertTrue(content.length() > 500);

        // Test setting max length on the instance:
        localTika.setMaxStringLength(200);
        stream = TikaInputStream.get(file, metadata);
        content = localTika.parseToString(stream, metadata);
       
        // parseToString closes for convenience:
        //stream.close();
        assertTrue(content.length() <= 200);
       
View Full Code Here

Examples of org.apache.tika.Tika.parseToString()

        //stream.close();
        assertTrue(content.length() <= 200);
       
        // Test setting max length per-call:
        stream = TikaInputStream.get(file, metadata);
        content = localTika.parseToString(stream, metadata, 100);
        // parseToString closes for convenience:
        //stream.close();
        assertTrue(content.length() <= 100);
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.