Examples of EmbeddedDocumentExtractor


Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

abstract class AbstractPOIFSExtractor {

    private final EmbeddedDocumentExtractor extractor;

    protected AbstractPOIFSExtractor(ParseContext context) {
        EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);

        if (ex==null) {
            this.extractor = new ParsingEmbeddedDocumentExtractor(context);
        } else {
            this.extractor = ex;
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
      
       // We work by recursing, so get the appropriate bits
       EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);
       EmbeddedDocumentExtractor embeddedExtractor;
       if (ex==null) {
           embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
       } else {
           embeddedExtractor = ex;
       }
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

                }
                entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
            }

            // Use the delegate parser to parse the compressed document
            EmbeddedDocumentExtractor extractor = context.get(
                    EmbeddedDocumentExtractor.class,
                    new ParsingEmbeddedDocumentExtractor(context));
            if (extractor.shouldParseEmbedded(entrydata)) {
                extractor.parseEmbedded(cis, xhtml, entrydata, true);
            }
        } finally {
            cis.close();
        }
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

    private MimeTypes mimeTypes;
    private Detector detector;
    private static final Log logger = LogFactory.getLog(AbstractPOIFSExtractor.class);

    protected AbstractPOIFSExtractor(ParseContext context) {
        EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);

        if (ex==null) {
            this.extractor = new ParsingEmbeddedDocumentExtractor(context);
        } else {
            this.extractor = ex;
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

    private final EmbeddedDocumentExtractor embeddedExtractor;

    public AbstractOOXMLExtractor(ParseContext context, POIXMLTextExtractor extractor) {
        this.extractor = extractor;

        EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);

        if (ex==null) {
            embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
        } else {
            embeddedExtractor = ex;
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

        if (names != null) {

            PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
            if (embeddedFiles != null) {

                EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class);
                if (embeddedExtractor == null) {
                    embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
                }

                Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();

                if (embeddedFileNames != null) {
                    for (Map.Entry<String,COSObjectable> ent : embeddedFileNames.entrySet()) {
                        PDComplexFileSpecification spec = (PDComplexFileSpecification) ent.getValue();
                        PDEmbeddedFile file = spec.getEmbeddedFile();

                        Metadata metadata = new Metadata();
                        // TODO: other metadata?
                        metadata.set(Metadata.RESOURCE_NAME_KEY, ent.getKey());
                        metadata.set(Metadata.CONTENT_TYPE, file.getSubtype());
                        metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize()));

                        if (embeddedExtractor.shouldParseEmbedded(metadata)) {
                            TikaInputStream stream = TikaInputStream.get(file.createInputStream());
                            try {
                                embeddedExtractor.parseEmbedded(
                                                                stream,
                                                                new EmbeddedContentHandler(handler),
                                                                metadata, false);
                            } finally {
                                stream.close();
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

abstract class AbstractPOIFSExtractor {

    private final EmbeddedDocumentExtractor extractor;

    protected AbstractPOIFSExtractor(ParseContext context) {
        EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);

        if (ex==null) {
            this.extractor = new ParsingEmbeddedDocumentExtractor(context);
        } else {
            this.extractor = ex;
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

    public PackageExtractor(
            ContentHandler handler, Metadata metadata, ParseContext context) {
        this.handler = handler;
        this.metadata = metadata;

        EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);

        if (ex==null) {
            this.extractor = new ParsingEmbeddedDocumentExtractor(context);
        } else {
            this.extractor = ex;
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
      
       // We work by recursing, so get the appropriate bits
       EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);
       EmbeddedDocumentExtractor embeddedExtractor;
       if (ex==null) {
           embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
       } else {
           embeddedExtractor = ex;
       }
View Full Code Here

Examples of org.apache.tika.extractor.EmbeddedDocumentExtractor

        return Collections.singleton(MediaType.application("x-fictionbook+xml"));
    }

    @Override
    protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata, ParseContext context) {
        EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);

        if (ex == null) {
            ex = new ParsingEmbeddedDocumentExtractor(context);
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.