Package org.exoplatform.services.document

Examples of org.exoplatform.services.document.DocumentReader


                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray(),
                        Constants.DEFAULT_ENCODING));

                  data = propData.getValues();

                  if (data == null)
                  {
                     log.warn("null value found at property " + prop.getQPath().getAsString());
                  }

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null && !node.containAllProperties())
                  {
                     encProp =
                        (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null)
                  {
                     // encoding parameter used
                     encoding = new String(encProp.getValues().get(0).getAsByteArray(), Constants.DEFAULT_ENCODING);
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here


   public void testPDFDocumentReaderServiceXMPMetadataTikasFile() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/tikaTestPDF.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Document title");
         etalon.put(DCMetaData.CREATOR, "Document author");
         evalProps(etalon, testprops);
      }
View Full Code Here

                        LOG.debug("No value found for the property located at " + prop.getQPath().getAsString());
                     }
                     return;
                  }

                  DocumentReader dreader =
                     extractor.getDocumentReader(ValueDataUtil.getString(pmime.getValues().get(0)));

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null && !node.containAllProperties())
                  {
                     encProp =
                        (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null && encProp.getValues() != null && !encProp.getValues().isEmpty())
                  {
                     // encoding parameter used
                     encoding = ValueDataUtil.getString(encProp.getValues().get(0));
                  }
                  else
                  {
                     if (LOG.isDebugEnabled())
                     {
                        LOG.debug("No encoding found for the node located at " + node.getQPath().getAsString());
                     }
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here

                        new QPathEntry(Constants.JCR_DATA, 0)));
              
               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                  data = propData.getValues();
                 
                  if (data == null)
                     log.warn("null value found at property " + prop.getQPath().getAsString());

                  // check the jcr:encoding property
                  PropertyData encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));

                  if (encProp != null)
                  {
                     // encoding parameter used
                     String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is, encoding));
                           doc.add(createFulltextField(reader));

                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e)
                           {
                           }
                        }
                     }
                  }
                  else
                  {
                     // no encoding parameter
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is));
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
                           try
View Full Code Here

            if (pmime != null)
            {
               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                           extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                  // ok, have a reader
                  // if the prop obtainer from cache it will contains a values,
                  // otherwise read prop with values from DM
                  data =
                           prop.getValues().size() > 0 ? prop.getValues() : ((PropertyData) stateProvider.getItemData(
                                    node, new QPathEntry(Constants.JCR_DATA, 0))).getValues();
                  if (data == null)
                     log.warn("null value found at property " + prop.getQPath().getAsString());

                  // check the jcr:encoding property
                  PropertyData encProp =
                           (PropertyData) stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));

                  if (encProp != null)
                  {
                     // encoding parameter used
                     String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           doc.add(createFulltextField(dreader.getContentAsText(is = pvd.getAsStream(), encoding)));
                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e)
                           {
                           }
                        }
                     }
                  }
                  else
                  {
                     // no encoding parameter
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           doc.add(createFulltextField(dreader.getContentAsText(is = pvd.getAsStream())));
                        }
                        finally
                        {
                           try
                           {
View Full Code Here

            {
               List<ValueData> values = prop.getValues();
               ValueData mimeValue = values.get(0);
               String mime = new String(mimeValue.getAsByteArray());

               DocumentReader dreader = extractor.getDocumentReader(mime);

               InputStream is = null;
               try
               {
                  is = internalValue.getAsStream();

                  // check the jcr:encoding property
                  PropertyData encProp =
                           (PropertyData) stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));
                  if (encProp != null)
                  {
                     ValueData encValue = encProp.getValues().get(0);
                     text = dreader.getContentAsText(is, new String(encValue.getAsByteArray()));
                  }
                  else
                  {
                     text = dreader.getContentAsText(is);
                  }
               }
               finally
               {
                  try
View Full Code Here

      fis.close();
      fis = new FileInputStream(url.getFile());
      DocumentReaderService extr =
         (DocumentReaderService)session.getContainer().getComponentInstanceOfType(DocumentReaderService.class);

      DocumentReader dreader = extr.getDocumentReader("application/excel");
      assertNotNull(dreader);

      System.out.println(dreader);

      if (dreader instanceof MSExcelDocumentReader)
View Full Code Here

   public void testPDFDocumentReaderServiceXMPMetadataTikasFile() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/tikaTestPDF.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Document title");
         etalon.put(DCMetaData.CREATOR, "Document author");
         evalProps(etalon, testprops);
      }
View Full Code Here

          if (pmime != null)
          {
             // index if have jcr:mimeType sibling for this binary property only
             try
             {
                DocumentReader dreader =
                   extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                // ok, have a reader
                // if the prop obtainer from cache it will contains a values,
                // otherwise read prop with values from DM
                data =
                   prop.getValues().size() > 0 ? prop.getValues() : ((PropertyData)stateProvider.getItemData(node,
                      new QPathEntry(Constants.JCR_DATA, 0))).getValues();
                if (data == null)
                   log.warn("null value found at property " + prop.getQPath().getAsString());

                // check the jcr:encoding property
                PropertyData encProp =
                   (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));

                if (encProp != null)
                {
                   // encoding parameter used
                   String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                   for (ValueData pvd : data)
                   {
                      InputStream is = null;
                      try
                      {
                        
                         is = pvd.getAsStream();
                         Reader  reader = new StringReader(dreader.getContentAsText(is, encoding));
                         doc.add(createFulltextField(reader));
                        
                      }
                      finally
                      {
                         try
                         {
                            is.close();
                         }
                         catch (Throwable e)
                         {
                         }
                      }
                   }
                }
                else
                {
                   // no encoding parameter
                   for (ValueData pvd : data)
                   {
                      InputStream is = null;
                      try
                      {
                         doc.add(createFulltextField(dreader.getContentAsText(is = pvd.getAsStream())));
                      }
                      finally
                      {
                         try
                         {
View Full Code Here

                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(ValueDataUtil.getString(pmime.getValues().get(0)));

                  data = propData.getValues();

                  if (data == null)
                  {
                     LOG.warn("null value found at property " + prop.getQPath().getAsString());
                     return;
                  }

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null && !node.containAllProperties())
                  {
                     encProp =
                        (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null)
                  {
                     // encoding parameter used
                     ValueDataUtil.getString(encProp.getValues().get(0));
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here

TOP

Related Classes of org.exoplatform.services.document.DocumentReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.