Package com.dotcms.repackage.org.apache.tika

Examples of com.dotcms.repackage.org.apache.tika.Tika


    Map<String, String> metaMap = new HashMap<String, String>();

    // store content metadata on disk
        File contentM=APILocator.getFileAssetAPI().getContentMetadataFile(inode);

    Tika t = new Tika();
    Metadata met = new Metadata();
    t.setMaxStringLength(-1);
    Reader fulltext = null;
    InputStream is = null;
    // if the limit is not "unlimited"
    // I can use the faster parseToString
    try {

      if(forceMemory){
        // no worry about the limit and less time to process.
        String content = t.parseToString(new FileInputStream(binFile), met);
        metaMap = new HashMap<String, String>();
        for (int i = 0; i < met.names().length; i++) {
          String name = met.names()[i];
          if (UtilMethods.isSet(name) && met.get(name) != null) {
            // we will want to normalize our metadata for searching
            String[] x = translateKey(name);
            for (String y : x)
              metaMap.put(y, met.get(name));
          }
        }
        metaMap.put(FileAssetAPI.CONTENT_FIELD, content);
      }
      else {


        is = TikaInputStream.get(binFile);
        fulltext = t.parse(is, met);
        metaMap = new HashMap<String, String>();
        for (int i = 0; i < met.names().length; i++) {
          String name = met.names()[i];
          if (UtilMethods.isSet(name) && met.get(name) != null) {
            // we will want to normalize our metadata for searching
View Full Code Here


                "metaData"+File.separator+"content");
    }

    @Override
    public String getContentMetadataAsString(File metadataFile) throws Exception {
        String type=new Tika().detect(metadataFile);
       
        InputStream input=new FileInputStream(metadataFile);
       
        if(type.equals("application/x-gzip")) {
            // gzip compression were used
View Full Code Here

TOP

Related Classes of com.dotcms.repackage.org.apache.tika.Tika

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.