Package org.apache.tika.sax

Examples of org.apache.tika.sax.XHTMLContentHandler


            InputStream stream, ContentHandler handler,
            Metadata metadata, ParseContext context)
            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // Create handlers for the various kinds of ID3 tags
        ID3TagsAndAudio audioAndTags = getAllTagHandlers(stream, handler);

        if (audioAndTags.tags.length > 0) {
           CompositeTagHandler tag = new CompositeTagHandler(audioAndTags.tags);

           metadata.set(Metadata.TITLE, tag.getTitle());
           metadata.set(Metadata.AUTHOR, tag.getArtist());
           metadata.set(XMPDM.ARTIST, tag.getArtist());
           metadata.set(XMPDM.COMPOSER, tag.getComposer());
           metadata.set(XMPDM.ALBUM, tag.getAlbum());
           metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
           metadata.set(XMPDM.GENRE, tag.getGenre());
           metadata.set(XMPDM.LOG_COMMENT, tag.getComment());

           xhtml.element("h1", tag.getTitle());
           xhtml.element("p", tag.getArtist());

            // ID3v1.1 Track addition
            if (tag.getTrackNumber() != null) {
                xhtml.element("p", tag.getAlbum() + ", track " + tag.getTrackNumber());
                metadata.set(XMPDM.TRACK_NUMBER, tag.getTrackNumber());
            } else {
                xhtml.element("p", tag.getAlbum());
            }
            xhtml.element("p", tag.getYear());
            xhtml.element("p", tag.getComment());
            xhtml.element("p", tag.getGenre());
        }
        if (audioAndTags.audio != null) {
            metadata.set("samplerate", String.valueOf(audioAndTags.audio.getSampleRate()));
            metadata.set("channels", String.valueOf(audioAndTags.audio.getChannels()));
            metadata.set("version", audioAndTags.audio.getVersion());
            metadata.set(
                    XMPDM.AUDIO_SAMPLE_RATE,
                    Integer.toString(audioAndTags.audio.getSampleRate()));
        }
        if (audioAndTags.lyrics != null && audioAndTags.lyrics.hasLyrics()) {
          xhtml.element("p", audioAndTags.lyrics.lyricsText);
        }

        xhtml.endDocument();
    }
View Full Code Here


        }
    }

    public HtmlHandler(
            HtmlMapper mapper, ContentHandler handler, Metadata metadata) {
        this(mapper, new XHTMLContentHandler(handler, metadata), metadata);
    }
View Full Code Here

        // First up, which version of the format are we handling?
        byte[] header = new byte[128];
        IOUtils.readFully(stream, header);
        String version = new String(header, 0, 6, "US-ASCII");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        if (version.equals("AC1018")) {
            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
            if(skipToPropertyInfoSection(stream, header)){
                get2004Props(stream,metadata,xhtml);
            }
        } else if (version.equals("AC1021") || version.equals("AC1024")) {
            metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
            skipToPropertyInfoSection(stream, header);
            get2007and2010Props(stream,metadata,xhtml);
        } else {
            throw new TikaException(
                    "Unsupported AutoCAD drawing version: " + version);
        }

        xhtml.endDocument();
    }
View Full Code Here

        metadata.set(Metadata.TITLE, feedTitle);
        metadata.set(Metadata.DESCRIPTION, feedDesc);
        // store the other fields in the metadata

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        List entries = feed.getEntries();
        for (Iterator i = entries.iterator(); i.hasNext();) {
            SyndEntry entry = (SyndEntry) i.next();
            String link = entry.getLink();
            if (link == null)
                continue;
            SyndContent description = entry.getDescription();

            String title = stripTags(entry.getTitleEx());
            xhtml.startElement("a", "href", link);
            xhtml.characters(title);
            xhtml.endElement("a");
            xhtml.startElement("p");
            if (description != null)
                xhtml.characters(description.getValue());
            xhtml.endElement("p");
        }

        xhtml.endDocument();
    }
View Full Code Here

    private Type type;

    private String packageName;

    public XHTMLClassVisitor(ContentHandler handler, Metadata metadata) {
        this.xhtml = new XHTMLContentHandler(handler, metadata);
        this.metadata = metadata;
    }
View Full Code Here

        metadata.set(DublinCore.DATE, font.getHeader().getCreated().getTime());
        metadata.set(
                Property.internalDate(DublinCore.MODIFIED),
                font.getHeader().getModified().getTime());

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }
View Full Code Here

    private final XHTMLContentHandler handler;

    private PDF2XHTML(ContentHandler handler, Metadata metadata)
            throws IOException {
        this.handler = new XHTMLContentHandler(handler, metadata);
        setForceParsing(true);
        setSortByPosition(true);
    }
View Full Code Here

            // There is no way to know whether this exception was
            // caused by the document being corrupted or by the format
            // just being unsupported. So we do nothing.
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();
    }
View Full Code Here

        metadata.set(Metadata.CONTENT_TYPE, "video/x-flv");
        metadata.set("hasVideo", Boolean.toString((typeFlags & MASK_VIDEO) != 0));
        metadata.set("hasAudio", Boolean.toString((typeFlags & MASK_AUDIO) != 0));

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        // flv tag stream follows...
        while (true) {
            int type = datainput.read();
            if (type == -1) {
                // EOF
                break;
            }

            int datalen = readUInt24(datainput); //body length
            stream.skip(4); // timestamp
            stream.skip(3); // streamid

            if (type == TYPE_METADATA) {
                // found metadata Tag, read content to buffer
                byte[] metaBytes = new byte[datalen];
                for (int readCount = 0; readCount < datalen;) {
                    int r = stream.read(metaBytes, readCount, datalen - readCount);
                    if(r!=-1) {
                        readCount += r;

                    } else {
                        break;
                    }
                }

                ByteArrayInputStream is = new ByteArrayInputStream(metaBytes);

                DataInputStream dis = new DataInputStream(is);

                Object data = null;

                for (int i = 0; i < 2; i++) {
                    data = readAMFData(dis, -1);
                }

                if (data instanceof Map) {
                    // TODO if there are multiple metadata values with same key (in
                    // separate AMF blocks, we currently loose previous values)
                    Map<String, Object> extractedMetadata = (Map<String, Object>) data;
                    for (Entry<String, Object> entry : extractedMetadata.entrySet()) {
                        metadata.set(entry.getKey(), entry.getValue().toString());
                    }
                }

            } else {
                // Tag was not metadata, skip over data we cannot handle
                for (int skiplen = 0; skiplen < datalen;) {
                    long currentSkipLen = datainput.skip(datalen - skiplen);
                    skiplen += currentSkipLen;
                }
            }

            sizePrev = readUInt32(datainput); // previous block size
            if (sizePrev != datalen + 11) {
                // file was corrupt or we could not parse it...
                break;
            }
        }

        xhtml.endDocument();
    }
View Full Code Here

            throws IOException, SAXException, TikaException {
        try {
            Document sd = new CustomStyledDocument();
            new RTFEditorKit().read(stream, sd, 0);

            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            xhtml.element("p", sd.getText(0, sd.getLength()));
            xhtml.endDocument();
        } catch (BadLocationException e) {
            throw new TikaException("Error parsing an RTF document", e);
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.sax.XHTMLContentHandler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.