Source Code of com.barrybecker4.sound.speech.PhoneToken

/*  Java Speech Synthesizer
 *  (C) LOTONtech Limited 2001
 */
package com.barrybecker4.sound.speech;


import com.barrybecker4.common.concurrency.ThreadUtil;
import com.barrybecker4.common.util.FileUtil;


import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.SourceDataLine;
import java.net.URL;


/**
 * Represents a single phonetic token.
 * See http://www.javaworld.com/javaworld/jw-08-2001/jw-0817-javatalk.html?page=1


 * Allophones, are labeled with a one-, two-, or three-letter identifier.
 * Some identifiers are obvious and some not so obvious, as you can see from the phonetic
 * representation of the word "hello."


    h -- sounds as you would expect
    e -- sounds as you would expect
    l -- sounds as you would expect, but notice that I've reduced a double "l" to a single one
    oo -- is the sound for "hello," not for "bot," and not for "too"
 */
class PhoneToken {


    private static final String RESOURCE_PATH = "com/barrybecker4/sound/allophones/";
    private static final int COMMA_PAUSE = 200;
    private static final int PERIOD_PAUSE = 680;
    private static final int MAX_MERGE_COUNT = 1000;


    /** delay in millis between words. */
    private static final int DELAY_BETWEEN_WORDS = 200;




    private String phoneToken;
    private SourceDataLine line = null;


    /**
     * Constructor.
     * This method speaks the given phonetic word.
     * Construct a file name for the allophone and load/say it
     */
    PhoneToken(String token) {
        phoneToken = token;
    }




    /** Speak the next allophone unless it is punctuation, in which case we just pause. */
    byte[] process(byte[] previousSound) {


        if (!pauseForPunctuation()) {


            String phoneFile = RESOURCE_PATH + phoneToken + ".au";


            // -- get the data from the file --
            byte[] thisSound = getSound( phoneFile );
            assert (thisSound.length > 0) : "Invalid sound file: " + phoneFile;


            return sayPhoneToken(previousSound, thisSound);
        }
        return previousSound;
    }


    /** Pause appropriately if we have encountered punctuation. */
    private boolean pauseForPunctuation() {
        if (phoneToken.equals(",") || phoneToken.equals(".")) {
            if (phoneToken.equals(",")) {
                ThreadUtil.sleep(COMMA_PAUSE);
            }
            else {
                ThreadUtil.sleep(PERIOD_PAUSE);
            }
            return true;
        }
        return false;
    }


    /*
     * This method drains the sound channel.
     */
    void drain() {
        if ( line != null )  {
            // this used to be just drain, but I added flush to make it work post java 1.5
            line.drain();
            line.flush();
            ThreadUtil.sleep(100);


        }
        ThreadUtil.sleep(DELAY_BETWEEN_WORDS);
    }


    /**
     * Speak a single phonetic token
     * @return the token that was said.
     */
    private byte[] sayPhoneToken(byte[] previousSound, byte[] thisSound) {
        if ( previousSound != null ) {
            // -- merge the previous allophone with this one if we can --
            int mergeCount = 0;
            if ( previousSound.length >= MAX_MERGE_COUNT && thisSound.length >= MAX_MERGE_COUNT )  {
                mergeCount = MAX_MERGE_COUNT;
            }
            for ( int i = 0; i < mergeCount; i++ ) {
                previousSound[previousSound.length - mergeCount + i]
                        = (byte) ((previousSound[previousSound.length - mergeCount + i] + thisSound[i]) / 2);
            }


            // -- play the previous allophone --
            playSound( previousSound );


            // -- set the truncated current allophone as previous --
            byte[] newSound = new byte[thisSound.length - mergeCount];
            System.arraycopy(thisSound, mergeCount, newSound, 0, newSound.length);
            return newSound;
        }
        else {
            return thisSound;
        }
    }


    /**
     * This method plays a sound sample.
     */
    public void playSound( byte[] data ) {
        if (data == null) return;
        if (line == null) {
            throw new IllegalArgumentException("Problem processing " + phoneToken);
        }
        if ( data.length > 0 ) {
            line.write( data, 0, data.length );
            ThreadUtil.sleep(100);
        }
    }


    /**
     * This method reads the file for a single allophone and constructs a byte vector.
     */
    private byte[] getSound( String sPath ) {
        try {
            //URL url = SpeechSynthesizer.class.getResource( fileName );
            URL url = FileUtil.getURL(sPath);
            AudioInputStream stream = AudioSystem.getAudioInputStream( url );
            AudioFormat format = stream.getFormat();


            // -- convert an ALAW/ULAW sound to PCM for playback --
            if ( (format.getEncoding() == AudioFormat.Encoding.ULAW) ||
                    (format.getEncoding() == AudioFormat.Encoding.ALAW) ) {
                AudioFormat tmpFormat = createAudioFormat(format);
                stream = AudioSystem.getAudioInputStream( tmpFormat, stream );
                format = tmpFormat;
            }


            if ( line == null ) {
                line = createDataLine(format);
            }


            // -- some size calculations --
            int frameSizeInBytes = format.getFrameSize();
            int bufferLengthInFrames = line.getBufferSize() >> 3;
            int bufferLengthInBytes = bufferLengthInFrames * frameSizeInBytes;


            byte[] data = new byte[bufferLengthInBytes];


            // -- read the data bytes and count them --
            int numBytesRead = 0;
            numBytesRead = stream.read(data);


            byte maxByte = 0;


            // -- truncate the byte array to the correct size --
            byte[] newData = new byte[numBytesRead];
            for ( int i = 0; i < numBytesRead; i++ ) {
                newData[i] = data[i];
                if ( newData[i] > maxByte ) maxByte = newData[i];
            }


            return newData;
        } catch (Exception e) {
            System.out.println( "Something wrong with " + sPath );
            e.printStackTrace();
            return new byte[0];
        }
    }


    private AudioFormat createAudioFormat(AudioFormat format) {
        return new AudioFormat(
            AudioFormat.Encoding.PCM_SIGNED,
            format.getSampleRate(),
            format.getSampleSizeInBits() << 1,
            format.getChannels(),
            format.getFrameSize() << 1,
            format.getFrameRate(),
            true );
    }


    private SourceDataLine createDataLine(AudioFormat format) throws Exception {
        SourceDataLine dataLine;
        // -- output line not instantiated yet --
        // -- can we find a suitable kind of line? --
        DataLine.Info outInfo = new DataLine.Info( SourceDataLine.class, format );
        if ( !AudioSystem.isLineSupported( outInfo ) ) {
            throw new Exception( "Line matching " + outInfo + " not supported." );
        }


        // -- open the source data line (the output line) --
        dataLine = (SourceDataLine) AudioSystem.getLine( outInfo );
        dataLine.open( format, 50000 );
        dataLine.start();
        return dataLine;
    }
}
Source Code of com.barrybecker4.sound.speech.PhoneToken

Related Classes of com.barrybecker4.sound.speech.PhoneToken