Examples of TaggedInputStream


Examples of org.apache.tika.io.TaggedInputStream

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        Parser parser = getParser(metadata);
        TaggedInputStream taggedStream = new TaggedInputStream(stream);
        TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
        try {
            parser.parse(taggedStream, taggedHandler, metadata, context);
        } catch (RuntimeException e) {
            throw new TikaException(
                    "Unexpected RuntimeException from " + parser, e);
        } catch (IOException e) {
            taggedStream.throwIfCauseOf(e);
            throw new TikaException(
                    "TIKA-198: Illegal IOException from " + parser, e);
        } catch (SAXException e) {
            taggedHandler.throwIfCauseOf(e);
            throw new TikaException(
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        Parser parser = getParser(metadata);
        TaggedInputStream taggedStream = new TaggedInputStream(stream);
        TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
        try {
            parser.parse(taggedStream, taggedHandler, metadata, context);
        } catch (RuntimeException e) {
            throw new TikaException(
                    "Unexpected RuntimeException from " + parser, e);
        } catch (IOException e) {
            taggedStream.throwIfCauseOf(e);
            throw new TikaException(
                    "TIKA-198: Illegal IOException from " + parser, e);
        } catch (SAXException e) {
            taggedHandler.throwIfCauseOf(e);
            throw new TikaException(
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        Parser parser = getParser(metadata);
        TaggedInputStream taggedStream = new TaggedInputStream(stream);
        TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);
        try {
            parser.parse(taggedStream, taggedHandler, metadata, context);
        } catch (RuntimeException e) {
            throw new TikaException(
                    "Unexpected RuntimeException from " + parser, e);
        } catch (IOException e) {
            taggedStream.throwIfCauseOf(e);
            throw new TikaException(
                    "TIKA-198: Illegal IOException from " + parser, e);
        } catch (SAXException e) {
            taggedHandler.throwIfCauseOf(e);
            throw new TikaException(
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {
        TaggedInputStream tagged = new TaggedInputStream(stream);
        try {
            final TextExtractor ert = new TextExtractor(new XHTMLContentHandler(handler, metadata), metadata);
            ert.extract(stream);
            metadata.add(Metadata.CONTENT_TYPE, "application/rtf");
        } catch (IOException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("Error parsing an RTF document", e);
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

        MailContentHandler mch = new MailContentHandler(
                xhtml, metadata, config.isStrictParsing());
        parser.setContentHandler(mch);
        parser.setContentDecoding(true);
        TaggedInputStream tagged = TaggedInputStream.get(stream);
        try {
            parser.parse(tagged);
        } catch (IOException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("Failed to parse an email message", e);
        } catch (MimeException e) {
            // Unwrap the exception in case it was not thrown by mime4j
            Throwable cause = e.getCause();
            if (cause instanceof TikaException) {
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

        MailContentHandler mch = new MailContentHandler(
                xhtml, metadata, config.isStrictParsing());
        parser.setContentHandler(mch);
        parser.setContentDecoding(true);
        TaggedInputStream tagged = TaggedInputStream.get(stream);
        try {
            parser.parse(tagged);
        } catch (IOException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("Failed to parse an email message", e);
        } catch (MimeException e) {
            // Unwrap the exception in case it was not thrown by mime4j
            Throwable cause = e.getCause();
            if (cause instanceof TikaException) {
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {
        TaggedInputStream tagged = new TaggedInputStream(stream);
        try {
            final TextExtractor ert = new TextExtractor(new XHTMLContentHandler(handler, metadata), metadata);
            ert.extract(stream);
            metadata.add(Metadata.CONTENT_TYPE, "application/rtf");
        } catch (IOException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("Error parsing an RTF document", e);
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

        MailContentHandler mch = new MailContentHandler(
                xhtml, metadata, context, config.isStrictParsing());
        parser.setContentHandler(mch);
        parser.setContentDecoding(true);
        TaggedInputStream tagged = TaggedInputStream.get(stream);
        try {
            parser.parse(tagged);
        } catch (IOException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("Failed to parse an email message", e);
        } catch (MimeException e) {
            // Unwrap the exception in case it was not thrown by mime4j
            Throwable cause = e.getCause();
            if (cause instanceof TikaException) {
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

        // so we don't modify the current position of the stream
        File file = stream.getFile();

        // Use a tagged stream to distinguish between real I/O problems
        // and parse errors thrown as IOExceptions by POI.
        TaggedInputStream tagged = new TaggedInputStream(
                new BufferedInputStream(new FileInputStream(file)));
        try {
            // POIFSFileSystem might try close the stream
            POIFSFileSystem fs =
                new POIFSFileSystem(new CloseShieldInputStream(tagged));

            // Optimize a possible later parsing process by keeping
            // a reference to the already opened POI file system
            stream.setOpenContainer(fs);

            Set<String> names = new HashSet<String>();
            for (Entry entry : fs.getRoot()) {
                names.add(entry.getName());
            }
            return names;
        } catch (IOException e) {
            // Was this a real I/O problem?
            tagged.throwIfCauseOf(e);
            // Parse error in POI, so we don't know the file type
            return Collections.emptySet();
        } catch (RuntimeException e) {
            // Another problem in POI
            return Collections.emptySet();
        } finally {
            tagged.close();
        }
    }
View Full Code Here

Examples of org.apache.tika.io.TaggedInputStream

    public void parse(
            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {
        TaggedInputStream tagged = new TaggedInputStream(stream);
        try {
            RTFEmbObjHandler embObjHandler = new RTFEmbObjHandler(handler,
                    metadata, context);
            final TextExtractor ert =
                    new TextExtractor(new XHTMLContentHandler(handler,
                    metadata), metadata, embObjHandler);
            ert.extract(stream);
            metadata.add(Metadata.CONTENT_TYPE, "application/rtf");
        } catch (IOException e) {
            tagged.throwIfCauseOf(e);
            throw new TikaException("Error parsing an RTF document", e);
        }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.