Package org.kitesdk.morphline.tika.decompress

Source Code of org.kitesdk.morphline.tika.decompress.EmbeddedExtractor

/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.morphline.tika.decompress;

import java.io.InputStream;

import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;

import org.kitesdk.morphline.shaded.com.google.common.io.Closeables;

/**
* Adapted from org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor
*/
final class EmbeddedExtractor {

  public boolean parseEmbedded(InputStream stream, Record record, String name, Command child) {
    // Use the delegate parser to parse this entry
   
    TemporaryResources tmp = new TemporaryResources();
    try {
      final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp);
      if (stream instanceof TikaInputStream) {
        final Object container = ((TikaInputStream) stream).getOpenContainer();
        if (container != null) {
          newStream.setOpenContainer(container);
        }
      }
      record = record.copy();

      record.replaceValues(Fields.ATTACHMENT_BODY, newStream);
      record.removeAll(Fields.ATTACHMENT_MIME_TYPE);
      record.removeAll(Fields.ATTACHMENT_CHARSET);
     
      record.removeAll(Fields.ATTACHMENT_NAME);
      if (name != null && name.length() > 0) {
        record.put(Fields.ATTACHMENT_NAME, name);
      }
     
      return child.process(record);
//    } catch (RuntimeException e) {
//     
//      // THIS IS THE DIFF WRT ParsingEmbeddedDocumentExtractor
//      throw new MorphlineRuntimeException(e);
//     
//        // TODO: can we log a warning somehow?
//        // Could not parse the entry, just skip the content
    } finally {
      Closeables.closeQuietly(tmp);
    }

  }

}
TOP

Related Classes of org.kitesdk.morphline.tika.decompress.EmbeddedExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.