Package org.exoplatform.services.document

Examples of org.exoplatform.services.document.DocumentReader


    */
   public DocumentReader getDocumentReader(String mimeType) throws HandlerNotFoundException
   {
      // first check user defined old-style and previously registered TikaDocumentReaders
      mimeType = mimeType.toLowerCase();
      DocumentReader reader = readers_.get(mimeType);

      if (reader != null)
      {
         return reader;
      }
View Full Code Here


                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray(),
                        Constants.DEFAULT_ENCODING));

                  data = propData.getValues();

                  if (data == null)
                  {
                     log.warn("null value found at property " + prop.getQPath().getAsString());
                  }

                  // check the jcr:encoding property
                  PropertyData encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                        ItemType.PROPERTY);

                  String encoding = null;
                  if (encProp != null)
                  {
                     // encoding parameter used
                     encoding = new String(encProp.getValues().get(0).getAsByteArray(), Constants.DEFAULT_ENCODING);
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here

                     Constants.JCR_DATA, 0)));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                  data = propData.getValues();

                  if (data == null)
                  {
                     log.warn("null value found at property " + prop.getQPath().getAsString());
                  }

                  // check the jcr:encoding property
                  PropertyData encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));

                  if (encProp != null)
                  {
                     // encoding parameter used
                     String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is, encoding));
                           doc.add(createFulltextField(reader));

                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e)
                           {
                           }
                        }
                     }
                  }
                  else
                  {
                     // no encoding parameter
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is));
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
                           try
View Full Code Here

                        LOG.debug("No value found for the property located at " + prop.getQPath().getAsString());
                     }
                     return;
                  }

                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray(),
                        Constants.DEFAULT_ENCODING));

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null && !node.containAllProperties())
                  {
                     encProp =
                        (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null && encProp.getValues() != null && !encProp.getValues().isEmpty())
                  {
                     // encoding parameter used
                     encoding = new String(encProp.getValues().get(0).getAsByteArray(), Constants.DEFAULT_ENCODING);
                  }
                  else
                  {
                     if (LOG.isDebugEnabled())
                     {
                        LOG.debug("No encoding found for the node located at " + node.getQPath().getAsString());
                     }
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here

   public void testPDFDocumentReaderServiceXMPMetadataTikasFile() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/tikaTestPDF.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Document title");
         etalon.put(DCMetaData.CREATOR, "Document author");
         evalProps(etalon, testprops);
      }
View Full Code Here

   @Override
   public DocumentReader getDocumentReader(String mimeType) throws HandlerNotFoundException
   {
      // first check user defined old-style and previously registered TikaDocumentReaders
      mimeType = mimeType.toLowerCase();
      DocumentReader reader = readers_.get(mimeType);

      if (reader != null)
      {
         return reader;
      }
View Full Code Here

                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray(),
                        Constants.DEFAULT_ENCODING));

                  data = propData.getValues();

                  if (data == null)
                  {
                     LOG.warn("null value found at property " + prop.getQPath().getAsString());
                  }

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null && !node.containAllProperties())
                  {
                     encProp =
                        (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null)
                  {
                     // encoding parameter used
                     encoding = new String(encProp.getValues().get(0).getAsByteArray(), Constants.DEFAULT_ENCODING);
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here

   @Override
   public DocumentReader getDocumentReader(String mimeType) throws HandlerNotFoundException
   {
      // first check user defined old-style and previously registered TikaDocumentReaders
      mimeType = mimeType.toLowerCase();
      DocumentReader reader = readers_.get(mimeType);

      if (reader != null)
      {
         return reader;
      }
View Full Code Here

                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                  data = propData.getValues();

                  if (data == null)
                     log.warn("null value found at property " + prop.getQPath().getAsString());

                  // check the jcr:encoding property
                  PropertyData encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                        ItemType.PROPERTY);

                  if (encProp != null)
                  {
                     // encoding parameter used
                     String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is, encoding));
                           doc.add(createFulltextField(reader));

                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e)
                           {
                           }
                        }
                     }
                  }
                  else
                  {
                     // no encoding parameter
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is));
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
                           try
View Full Code Here

                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray(),
                        Constants.DEFAULT_ENCODING));

                  data = propData.getValues();

                  if (data == null)
                  {
                     log.warn("null value found at property " + prop.getQPath().getAsString());
                  }

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null)
                  {
                     encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null)
                  {
                     // encoding parameter used
                     encoding = new String(encProp.getValues().get(0).getAsByteArray(), Constants.DEFAULT_ENCODING);
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here

TOP

Related Classes of org.exoplatform.services.document.DocumentReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.