Package org.exoplatform.services.document

Examples of org.exoplatform.services.document.DocumentReader


   public void testPDFDocumentReaderServiceXMPMetadataTikasFile() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/tikaTestPDF.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Document title");
         etalon.put(DCMetaData.CREATOR, "Document author");
         evalProps(etalon, testprops);
      }
View Full Code Here


      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/test.html");
      try
      {
         String mimeType = mimetypeResolver.getMimeType("test.html");

         DocumentReader dr = service.getDocumentReader(mimeType);
         String text = dr.getContentAsText(is);
         assertTrue(text.contains("This is the third maintenance release of the redesigned 2.0"));
      }
      finally
      {
         is.close();
View Full Code Here

   {

      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/ch-core.html");
      String mimeType = mimetypeResolver.getMimeType("ch-core.html");

      DocumentReader dr = service.getDocumentReader(mimeType);
      String text = dr.getContentAsText(is);

      assertTrue((normalizeWhitespaces(text))
         .contains("The eXo Core is a set of common services that are used by eXo products and modules, it also can be used in the business logic. It's Authentication and Security, Organization, Database, Logging, JNDI, LDAP, Document reader and other services."));
   }
View Full Code Here

      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/test.html");
      try
      {
         String mimeType = mimetypeResolver.getMimeType("test.html");

         DocumentReader dr = service.getDocumentReader(mimeType);
         String text = dr.getContentAsText(is);
         assertTrue(text.contains("This is the third maintenance release of the redesigned 2.0"));
      }
      finally
      {
         is.close();
View Full Code Here

   public void testXHTMLGetContentAsString() throws Exception
   {
      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/testXHTML.html");
      try
      {
         DocumentReader dr = service.getDocumentReader("application/xhtml+xml");
         String text = dr.getContentAsText(is);
         assertTrue(text
            .contains("This document tests the ability of Apache Tika to extract content from an XHTML document."));
      }
      finally
      {
View Full Code Here

   public void testPDFDocumentReaderServiceXMPMetadata() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/MyTest.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Test de convertion de fichier tif");
         etalon.put(DCMetaData.CREATOR, "Christian Klaus");
         etalon.put(DCMetaData.DESCRIPTION, "20080901 TEST Christian Etat OK");
         //         Calendar c = ISO8601.parseEx("2008-09-01T08:01:10+00:00");
View Full Code Here

   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/pfs_accapp.pdf");
      try
      {

         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Personal Account Opening Form VN");
         etalon.put(DCMetaData.CREATOR, "mr");
         etalon.put(DCMetaData.PUBLISHER, "Adobe LiveCycle Designer ES 8.2");
         evalProps(etalon, testprops, false);
View Full Code Here

   public void testPDFDocumentReaderServiceXMPUsecase1() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/Trait_union.06.Mai_2009.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "journal interne mai 2009.qxp");
         etalon.put(DCMetaData.CREATOR, "presse");
         evalProps(etalon, testprops, false);
      }
View Full Code Here

                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                  data = propData.getValues();

                  if (data == null)
                     log.warn("null value found at property " + prop.getQPath().getAsString());

                  // check the jcr:encoding property
                  PropertyData encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                        ItemType.PROPERTY);

                  if (encProp != null)
                  {
                     // encoding parameter used
                     String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is, encoding));
                           doc.add(createFulltextField(reader));

                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e)
                           {
                           }
                        }
                     }
                  }
                  else
                  {
                     // no encoding parameter
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is));
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
                           try
View Full Code Here

                     Constants.JCR_DATA, 0)));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                  data = propData.getValues();

                  if (data == null)
                     log.warn("null value found at property " + prop.getQPath().getAsString());

                  // check the jcr:encoding property
                  PropertyData encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));

                  if (encProp != null)
                  {
                     // encoding parameter used
                     String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is, encoding));
                           doc.add(createFulltextField(reader));

                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e)
                           {
                           }
                        }
                     }
                  }
                  else
                  {
                     // no encoding parameter
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is));
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
                           try
View Full Code Here

TOP

Related Classes of org.exoplatform.services.document.DocumentReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.