Package org.apache.tika.parser.mp3

Source Code of org.apache.tika.parser.mp3.Mp3Parser

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.mp3;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/**
* The <code>Mp3Parser</code> is used to parse ID3 Version 1 Tag information
* from an MP3 file, if available.
*
* @see http://www.id3.org/ID3v1
*/
public class Mp3Parser implements Parser {

    /**
     * List of predefined genres.
     *
     * @see http://www.id3.org/id3v2-00
     */
    private static final String[] GENRES = new String[] {
        /*  0 */ "Blues",
        /*  1 */ "Classic Rock",
        /*  2 */ "Country",
        /*  3 */ "Dance",
        /*  4 */ "Disco",
        /*  5 */ "Funk",
        /*  6 */ "Grunge",
        /*  7 */ "Hip-Hop",
        /*  8 */ "Jazz",
        /*  9 */ "Metal",
        /* 10 */ "New Age",
        /* 11 */ "Oldies",
        /* 12 */ "Other",
        /* 13 */ "Pop",
        /* 14 */ "R&B",
        /* 15 */ "Rap",
        /* 16 */ "Reggae",
        /* 17 */ "Rock",
        /* 18 */ "Techno",
        /* 19 */ "Industrial",
        /* 20 */ "Alternative",
        /* 21 */ "Ska",
        /* 22 */ "Death Metal",
        /* 23 */ "Pranks",
        /* 24 */ "Soundtrack",
        /* 25 */ "Euro-Techno",
        /* 26 */ "Ambient",
        /* 27 */ "Trip-Hop",
        /* 28 */ "Vocal",
        /* 29 */ "Jazz+Funk",
        /* 30 */ "Fusion",
        /* 31 */ "Trance",
        /* 32 */ "Classical",
        /* 33 */ "Instrumental",
        /* 34 */ "Acid",
        /* 35 */ "House",
        /* 36 */ "Game",
        /* 37 */ "Sound Clip",
        /* 38 */ "Gospel",
        /* 39 */ "Noise",
        /* 40 */ "AlternRock",
        /* 41 */ "Bass",
        /* 42 */ "Soul",
        /* 43 */ "Punk",
        /* 44 */ "Space",
        /* 45 */ "Meditative",
        /* 46 */ "Instrumental Pop",
        /* 47 */ "Instrumental Rock",
        /* 48 */ "Ethnic",
        /* 49 */ "Gothic",
        /* 50 */ "Darkwave",
        /* 51 */ "Techno-Industrial",
        /* 52 */ "Electronic",
        /* 53 */ "Pop-Folk",
        /* 54 */ "Eurodance",
        /* 55 */ "Dream",
        /* 56 */ "Southern Rock",
        /* 57 */ "Comedy",
        /* 58 */ "Cult",
        /* 59 */ "Gangsta",
        /* 60 */ "Top 40",
        /* 61 */ "Christian Rap",
        /* 62 */ "Pop/Funk",
        /* 63 */ "Jungle",
        /* 64 */ "Native American",
        /* 65 */ "Cabaret",
        /* 66 */ "New Wave",
        /* 67 */ "Psychadelic",
        /* 68 */ "Rave",
        /* 69 */ "Showtunes",
        /* 70 */ "Trailer",
        /* 71 */ "Lo-Fi",
        /* 72 */ "Tribal",
        /* 73 */ "Acid Punk",
        /* 74 */ "Acid Jazz",
        /* 75 */ "Polka",
        /* 76 */ "Retro",
        /* 77 */ "Musical",
        /* 78 */ "Rock & Roll",
        /* 79 */ "Hard Rock",
        /* 80 */ "Folk",
        /* 81 */ "Folk-Rock",
        /* 82 */ "National Folk",
        /* 83 */ "Swing",
        /* 84 */ "Fast Fusion",
        /* 85 */ "Bebob",
        /* 86 */ "Latin",
        /* 87 */ "Revival",
        /* 88 */ "Celtic",
        /* 89 */ "Bluegrass",
        /* 90 */ "Avantgarde",
        /* 91 */ "Gothic Rock",
        /* 92 */ "Progressive Rock",
        /* 93 */ "Psychedelic Rock",
        /* 94 */ "Symphonic Rock",
        /* 95 */ "Slow Rock",
        /* 96 */ "Big Band",
        /* 97 */ "Chorus",
        /* 98 */ "Easy Listening",
        /* 99 */ "Acoustic",
        /* 100 */ "Humour",
        /* 101 */ "Speech",
        /* 102 */ "Chanson",
        /* 103 */ "Opera",
        /* 104 */ "Chamber Music",
        /* 105 */ "Sonata",
        /* 106 */ "Symphony",
        /* 107 */ "Booty Bass",
        /* 108 */ "Primus",
        /* 109 */ "Porn Groove",
        /* 110 */ "Satire",
        /* 111 */ "Slow Jam",
        /* 112 */ "Club",
        /* 113 */ "Tango",
        /* 114 */ "Samba",
        /* 115 */ "Folklore",
        /* 116 */ "Ballad",
        /* 117 */ "Power Ballad",
        /* 118 */ "Rhythmic Soul",
        /* 119 */ "Freestyle",
        /* 120 */ "Duet",
        /* 121 */ "Punk Rock",
        /* 122 */ "Drum Solo",
        /* 123 */ "A capella",
        /* 124 */ "Euro-House",
        /* 125 */ "Dance Hall",
        /* sentinel */ ""
    };

    public void parse(
            InputStream stream, ContentHandler handler, Metadata metadata)
            throws IOException, SAXException, TikaException {
        metadata.set(Metadata.CONTENT_TYPE, "audio/mpeg");

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();

        byte[] tag = getSuffix(stream, 128);
        if (tag.length == 128
                && tag[0] == 'T' && tag[1] == 'A' && tag[2] == 'G') {
            String title = getString(tag, 3, 33);
            String artist = getString(tag, 33, 63);
            String album = getString(tag, 63, 93);
            String year = getString(tag, 93, 97);
            String comment = getString(tag, 97, 127);
            int genre = (int) tag[127] & 0xff; // unsigned byte

            metadata.set(Metadata.TITLE, title);
            metadata.set(Metadata.AUTHOR, artist);

            xhtml.element("h1", title);
            xhtml.characters("\n");

            xhtml.element("p", artist);
            xhtml.characters("\n");

            // ID3v1.1 Track addition
            // If the last two bytes of the comment field are zero and
            // non-zero, then the last byte is the track number
            if (tag[125] == 0 && tag[126] != 0) {
                int track = (int) tag[126] & 0xff;
                xhtml.element("p", album + ", track " + track);
            } else {
                xhtml.element("p", album);
            }
            xhtml.characters("\n");

            xhtml.element("p", year);
            xhtml.characters("\n");

            xhtml.element("p", comment);
            xhtml.characters("\n");

            xhtml.element("p", GENRES[Math.min(genre, GENRES.length - 1)]);
            xhtml.characters("\n");
        }

        xhtml.endDocument();
    }

    /**
     * Returns the identified ISO-8859-1 substring from the given byte buffer.
     * The return value is the zero-terminated substring retrieved from
     * between the given start and end positions in the given byte buffer.
     * Extra whitespace (and control characters) from the beginning and the
     * end of the substring is removed.
     *
     * @param buffer byte buffer
     * @param start start index of the substring
     * @param end end index of the substring
     * @return the identified substring
     * @throws TikaException if the ISO-8859-1 encoding is not available
     */
    private static String getString(byte[] buffer, int start, int end)
            throws TikaException {
        // Find the zero byte that marks the end of the string
        int zero = start;
        while (zero < end && buffer[zero] != 0) {
            zero++;
        }

        // Skip trailing whitespace
        end = zero;
        while (start < end && buffer[end - 1] <= ' ') {
            end--;
        }

        // Skip leading whitespace
        while (start < end && buffer[start] <= ' ') {
            start++;
        }

        // Return the remaining substring
        try {
            return new String(buffer, start, end - start, "ISO-8859-1");
        } catch (UnsupportedEncodingException e) {
            throw new TikaException("ISO-8859-1 encoding is not available", e);
        }
    }

    /**
     * Reads and returns the last <code>length</code> bytes from the
     * given stream.
     * @param stream input stream
     * @param length number of bytes from the end to read and return
     * @return stream the <code>InputStream</code> to read from.
     * @throws IOException if the stream could not be read from.
     */
   private static byte[] getSuffix(InputStream stream, int length)
           throws IOException {
       byte[] buffer = new byte[2 * length];
       int bytesInBuffer = 0;

       int n = stream.read(buffer);
       while (n != -1) {
           bytesInBuffer += n;
           if (bytesInBuffer == buffer.length) {
               System.arraycopy(buffer, bytesInBuffer - length, buffer, 0, length);
               bytesInBuffer = length;
           }
           n = stream.read(buffer, bytesInBuffer, buffer.length - bytesInBuffer);
       }

       if (bytesInBuffer < length) {
           length = bytesInBuffer;
       }

       byte[] result = new byte[length];
       System.arraycopy(buffer, bytesInBuffer - length, result, 0, length);
       return result;
   }

}
TOP

Related Classes of org.apache.tika.parser.mp3.Mp3Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.