Package com.sun.speech.freetts.en.us

Source Code of com.sun.speech.freetts.en.us.CMULexicon

/**
* Portions Copyright 2001 Sun Microsystems, Inc.
* Portions Copyright 1999-2001 Language Technologies Institute,
* Carnegie Mellon University.
* All Rights Reserved.  Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*/
package com.sun.speech.freetts.en.us;

import java.io.IOException;
import java.net.URL;
import java.util.List;

import com.sun.speech.freetts.VoiceManager;
import com.sun.speech.freetts.lexicon.LexiconImpl;
import com.sun.speech.freetts.util.BulkTimer;

/**
* Provides a CMU lexicon-specific implementation of a Lexicon that is
* stored in a text file.
*/
public class CMULexicon extends LexiconImpl {
   
    /**
     * Vowels
     */
    static final private String VOWELS = "aeiou";

    /**
     * Glides/Liquids
     */
    static final private String GLIDES_LIQUIDS = "wylr";

    /**
     * Nasals
     */
    static final private String NASALS = "nm";

    /**
     * Voiced Obstruents
     */
    static final private String VOICED_OBSTRUENTS = "bdgjlmnnnrvwyz";

    /**
     * Creates a CMULexicon based upon the given compiled and addenda
     * DBs and the given letter to sound rules
     *
     * @param compiledURL the compiled database is loaded from here
     * @param addendaURL the database addenda is loaded from here
     * @param letterToSoundURL the letter to sound rules are loaded
     *     from here
     * @param binary if <code>true</code> the input data are loaded as
     *     binary ; otherwise if <code>false</code> the input
     *     data are loaded as text.
     *
     */
    public CMULexicon(URL compiledURL,
                       URL addendaURL,
                       URL  letterToSoundURL,
           boolean binary) {
        setLexiconParameters(compiledURL, addendaURL, letterToSoundURL, binary);
    }

    /**
     * Creates the default CMU Lexicon which is a binary lexicon
     */
    public CMULexicon() {
  this("cmulex");
    }

    /**
     * Creates the CMU Lexicon which is a binary lexicon
     *
     * @param basename the basename for the lexicon.
     */
    public CMULexicon(String basename) {
        this(basename, true);
    }

    public CMULexicon(String basename, boolean useBinaryIO) {
        java.net.URLClassLoader classLoader =
                VoiceManager.getVoiceClassLoader();
        String type = (useBinaryIO ? "bin" : "txt");

        URL letterToSoundURL = classLoader.getResource(
                "com/sun/speech/freetts/en/us/" + basename + "_lts." + type);
        URL compiledURL = classLoader.getResource(
                "com/sun/speech/freetts/en/us/" + basename
                + "_compiled." + type);
        URL addendaURL = classLoader.getResource(
                "com/sun/speech/freetts/en/us/" + basename
                + "_addenda." + type);

        /* Just another try with possibly a different class loader
         * if the above didn't work.
         */
        if (letterToSoundURL == null) {
            Class cls = CMULexicon.class;
            letterToSoundURL = cls.getResource(basename + "_lts." + type);
            compiledURL = cls.getResource(basename + "_compiled." + type);
            addendaURL = cls.getResource(basename + "_addenda." + type);
            if (letterToSoundURL == null) {
                System.err.println(
                    "CMULexicon: Oh no!  Couldn't find lexicon data!");
            }
        }
       
  setLexiconParameters(compiledURL, addendaURL,
                letterToSoundURL, useBinaryIO);
    }
   
    /**
     * Get the CMULexicon.
     *
     * @param useBinaryIO if true use binary IO to load DB
     *
     * @throws IOException if problems occurred while reading the data
     */
    static public CMULexicon getInstance( boolean useBinaryIO)
            throws IOException {
  return getInstance("cmulex", useBinaryIO);
    }

    /**
     * Get the CMULexicon.
     *
     * @param useBinaryIO if true use binary IO to load DB
     *
     * @throws IOException if problems occurred while reading the data
     */
    static public CMULexicon getInstance( String basename, boolean useBinaryIO)
            throws IOException {
  CMULexicon lexicon = new CMULexicon(basename, useBinaryIO);
  lexicon.load();
        return lexicon;
    }

       
    /**
     * Determines if the currentPhone represents a new syllable
     * boundary.
     *
     * @param syllablePhones the phones in the current syllable so far
     * @param wordPhones the phones for the whole word
     * @param currentWordPhone the word phone in question
     *
     * @return <code>true</code> if the word phone in question is on a
     *     syllable boundary; otherwise <code>false</code>.
     */
    public boolean isSyllableBoundary(List syllablePhones,
                                      String[] wordPhones,
                                      int currentWordPhone) {
        if (currentWordPhone >= wordPhones.length) {
            return true;
        } else if (isSilence(wordPhones[currentWordPhone])) {
            return true;
        } else if (!hasVowel(wordPhones, currentWordPhone)) { // rest of word
            return false;
        } else if (!hasVowel(syllablePhones)) { // current syllable
            return false;
        } else if (isVowel(wordPhones[currentWordPhone])) {
            return true;
        } else if (currentWordPhone == (wordPhones.length - 1)) {
            return false;
        } else {
            int p, n, nn;
            p = getSonority(
                (String) syllablePhones.get(syllablePhones.size() - 1));
            n = getSonority(wordPhones[currentWordPhone]);
            nn = getSonority(wordPhones[currentWordPhone + 1]);
            if ((p <= n) && (n <= nn)) {
                return true;
            } else {
                return false;
            }
        }
    }
   
    /**
     * Determines if the given phone represents a silent phone.
     *
     * @param phone the phone to test
     *
     * @return <code>true</code> if the phone represents a silent
     *     phone; otherwise <code>false</code>.
     */
    static protected boolean isSilence(String phone) {
        return phone.equals("pau");
    }

    /**
     * Determines if there is a vowel in the remainder of the array,
     * starting at the given index.
     *
     * @param phones the set of phones to check
     * @param index start checking at this index
     *
     * @return <code>true</code> if a vowel is found;
     *    otherwise <code>false</code>.
     */
    static protected boolean hasVowel(String[] phones, int index) {
        for (int i = index; i < phones.length; i++) {
            if (isVowel(phones[i])) {
                return true;
            }
        }
        return false;
    }
   
    /**
     * Determines if there is a vowel in given list of phones.
     *
     * @param phones the list of phones
     *
     * @return <code>true</code> if a vowel is found;
     *    otherwise <code>false</code>.
     */
    static protected boolean hasVowel(List phones) {
        for (int i = 0; i < phones.size(); i++) {
            if (isVowel((String) phones.get(i))) {
                return true;
            }
        }
        return false;
    }
   
    /**
     * Determines if the given phone is a vowel
     *
     * @param phone the phone to test
     *
     * @return <code>true</code> if phone is a vowel
     *    otherwise <code>false</code>.
     */
    static protected boolean isVowel(String phone) {
        return VOWELS.indexOf(phone.substring(0,1)) != -1;
    }

    /**
     * Determines the sonority for the given phone.
     *
     * @param phone the phone of interest
     *
     * @return an integer that classifies phone transitions
     */
    static protected int getSonority(String phone) {
        if (isVowel(phone) || isSilence(phone)) {
            return 5;
        } else if (GLIDES_LIQUIDS.indexOf(phone.substring(0,1)) != -1) {
            return 4;
        } else if (NASALS.indexOf(phone.substring(0,1)) != -1) {
            return 3;
        } else if (VOICED_OBSTRUENTS.indexOf(phone.substring(0,1)) != -1) {
            return 2;
        } else {
            return 1;
        }
    }   

    /**
     * Provides test code for the CMULexicon.
     * <br><b>Usage:</b><br>
     * <pre>
     *  com.sun.speech.freetts.en.us.CMULexicon [options]
     *
     * Where options is any combination of:
     *
     * -src path
     * -dest path
     * -generate_binary [base_name]
     * -compare
     * -showtimes
     *
     * </pre>
     */
    public static void main(String[] args) {
  LexiconImpl lex, lex2;
  boolean showTimes = false;
        String srcPath = ".";
        String destPath = ".";
  String baseName = "cmulex";

  try {
      if (args.length > 0) {
    BulkTimer.LOAD.start();
    for (int i = 0 ; i < args.length; i++) {
                    if (args[i].equals("-src")) {
                        srcPath = args[++i];
                    } else if (args[i].equals("-dest")) {
                        destPath = args[++i];
        } else if (args[i].equals("-name")
                               && i < args.length - 1) {
      baseName = args[++i];
        } else if (args[i].equals("-generate_binary")) {

       System.out.println("Loading " + baseName);
                         String path = "file:" + srcPath + "/" + baseName;
                         lex = new CMULexicon(
                             new URL(path + "_compiled.txt"),
                             new URL(path + "_addenda.txt"),
                             new URL(path + "_lts.txt"),
                             false);
       BulkTimer.LOAD.start("load_text");
                         lex.load();
       BulkTimer.LOAD.stop("load_text");

       System.out.println("Dumping " + baseName);
       BulkTimer.LOAD.start("dump_text");
       lex.dumpBinary(destPath + "/" + baseName);
       BulkTimer.LOAD.stop("dump_text");

        } else if (args[i].equals("-compare")) {

      BulkTimer.LOAD.start("load_text");
      lex = CMULexicon.getInstance(baseName, false);
      BulkTimer.LOAD.stop("load_text");

      BulkTimer.LOAD.start("load_binary");
      lex2 = CMULexicon.getInstance(baseName, true);
      BulkTimer.LOAD.stop("load_binary");

      BulkTimer.LOAD.start("compare");
      lex.compare(lex2);
      BulkTimer.LOAD.stop("compare");
        } else if (args[i].equals("-showtimes")) {
      showTimes = true;
        } else {
      System.out.println("Unknown option " + args[i]);
        }
    }
    BulkTimer.LOAD.stop();
    if (showTimes) {
        BulkTimer.LOAD.show("CMULexicon loading and dumping");
    }
      } else {
    System.out.println("Options: ");
    System.out.println("    -src path");
    System.out.println("    -dest path");
    System.out.println("    -compare");
    System.out.println("    -generate_binary");
    System.out.println("    -showtimes");
      }
  } catch (IOException ioe) {
      System.err.println(ioe);
  }
    }
}
TOP

Related Classes of com.sun.speech.freetts.en.us.CMULexicon

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.