Package org.apache.tika.exception

Examples of org.apache.tika.exception.TikaException


                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            xhtml.element("p", sd.getText(0, sd.getLength()));
            xhtml.endDocument();
        } catch (BadLocationException e) {
            throw new TikaException("Error parsing an RTF document", e);
        } catch (InternalError e) {
            throw new TikaException(
                    "Internal error parsing an RTF document, see TIKA-282", e);
        }
    }
View Full Code Here


            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        DataInputStream datainput = new DataInputStream(stream);
        if (!checkSignature(datainput)) {
            throw new TikaException("FLV signature not detected");
        }

        // header
        int version = datainput.readUnsignedByte();
        if (version != 1) {
            // should be 1, perhaps this is not flv?
            throw new TikaException("Unpexpected FLV version: " + version);
        }

        int typeFlags = datainput.readUnsignedByte();

        long len = readUInt32(datainput);
        if (len != 9) {
            // we only know about format with header of 9 bytes
            throw new TikaException("Unpexpected FLV header length: " + len);
        }

        long sizePrev = readUInt32(datainput);
        if (sizePrev != 0) {
            // should be 0, perhaps this is not flv?
            throw new TikaException(
                    "Unpexpected FLV first previous block size: " + sizePrev);
        }

        metadata.set(Metadata.CONTENT_TYPE, "video/x-flv");
        metadata.set("hasVideo", Boolean.toString((typeFlags & MASK_VIDEO) != 0));
View Full Code Here

                    metadata.set("height", Integer.toString(reader.getHeight(0)));
                    metadata.set("width", Integer.toString(reader.getWidth(0)));
                    reader.dispose();
                }
            } catch (IIOException e) {
                throw new TikaException(type + " parse error", e);
            }
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
View Full Code Here

                parse(new DocumentSummaryInformation(properties), metadata);
            }
        } catch (FileNotFoundException e) {
            // entry does not exist, just skip it
        } catch (NoPropertySetStreamException e) {
            throw new TikaException("Not a HPSF document", e);
        } catch (UnexpectedPropertySetTypeException e) {
            throw new TikaException("Unexpected HPSF document", e);
        } catch (MarkUnsupportedException e) {
            throw new TikaException("Invalid DocumentInputStream", e);
        }
    }
View Full Code Here

            }
        }

        String encoding = metadata.get(Metadata.CONTENT_ENCODING);
        if (encoding == null) {
            throw new TikaException(
                    "Text encoding could not be detected and no encoding"
                    + " hint is available in document metadata");
        }

        // TIKA-341: Only stomp on content-type after we're done trying to
        // use it to guess at the charset.
        metadata.set(Metadata.CONTENT_TYPE, "text/plain");

        try {
            Reader reader =
                new BufferedReader(new InputStreamReader(stream, encoding));

            // TIKA-240: Drop the BOM when extracting plain text
            reader.mark(1);
            int bom = reader.read();
            if (bom != '\ufeff') { // zero-width no-break space
                reader.reset();
            }

            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();

            xhtml.startElement("p");
            char[] buffer = new char[4096];
            int n = reader.read(buffer);
            while (n != -1) {
                xhtml.characters(buffer, 0, n);
                n = reader.read(buffer);
            }
            xhtml.endElement("p");

            xhtml.endDocument();
        } catch (UnsupportedEncodingException e) {
            throw new TikaException(
                    "Unsupported text encoding: " + encoding, e);
        }
    }
View Full Code Here

            SAXParser parser = factory.newSAXParser();
            parser.parse(
                    new CloseShieldInputStream(stream),
                    new OfflineContentHandler(xhtml));
        } catch (ParserConfigurationException e) {
            throw new TikaException("XML parser configuration error", e);
        }
    }
View Full Code Here

                    (POIXMLTextExtractor) ExtractorFactory.createExtractor(stream),
                    locale);
            extractor.getMetadataExtractor().extract(metadata);
            extractor.getXHTML(handler, metadata);
        } catch (InvalidFormatException e) {
            throw new TikaException("Error creating OOXML extractor", e);
        } catch (OpenXML4JException e) {
            throw new TikaException("Error creating OOXML extractor", e);
        } catch (XmlException e) {
            throw new TikaException("Error creating OOXML extractor", e);
        }
    }
View Full Code Here

            parser.parse(
                    new CloseShieldInputStream(stream),
                    new OfflineContentHandler(
                            new NSNormalizerContentHandler(dh)));
        } catch (ParserConfigurationException e) {
            throw new TikaException("XML parser configuration error", e);
        }
    }
View Full Code Here

   
        detector.setText(stream);
   
        CharsetMatch match = detector.detect();
        if (match == null) {
            throw new TikaException("Unable to detect character encoding");
        }
       
        metadata.set(Metadata.CONTENT_ENCODING, match.getName());
        String language = match.getLanguage();
        if (language != null) {
View Full Code Here

    public static List<Parser> getParsersFromZip(File zip, TikaConfig config)
            throws TikaException, FileNotFoundException {
        String zipMimeType = config.getMimeRepository().getMimeType(zip)
        .getName();
        if (!zipMimeType.equalsIgnoreCase("application/zip")) {
            throw new TikaException("The file you are using is note a zip file");
        }
        return getParsersFromZip(new FileInputStream(zip), config);
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.exception.TikaException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.