Package edu.cmu.sphinx.tools.aligner

Source Code of edu.cmu.sphinx.tools.aligner.Aligner

/*
* Copyright 1999-2013 Carnegie Mellon University.
* Portions Copyright 2004 Sun Microsystems, Inc.
* Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved.  Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*/

package edu.cmu.sphinx.tools.aligner;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;

import edu.cmu.sphinx.api.SpeechAligner;
import edu.cmu.sphinx.util.TimeFrame;
import edu.cmu.sphinx.result.WordResult;

/**
* This is a simple tool to align audio to text and dump a database
* for the training/evaluation.
*
* You need to provide a model, dictionary, audio and the text to align.
*/
public class Aligner {

    private static int MIN_FILLER_LENGTH = 200;

    /**
     * @param args acoustic model, dictionary, audio file, text
     */
    public static void main(String args[]) throws Exception {
        File file = new File(args[2]);
        SpeechAligner aligner = new SpeechAligner(args[0], args[1], null);
        splitStream(file, aligner.align(file.toURI().toURL(), args[3]));
    }

    private static void splitStream(File inFile, List<WordResult> results)
        throws UnsupportedAudioFileException, IOException
    {
        System.err.println(results.size());

        List<List<WordResult>> utts = new ArrayList<List<WordResult>>();
        List<WordResult> currentUtt = null;
        int fillerLength = 0;

        for (WordResult result : results) {
            if (result.isFiller()) {
                fillerLength += result.getTimeFrame().length();
                if (fillerLength > MIN_FILLER_LENGTH) {
                    if (currentUtt != null)
                        utts.add(currentUtt);

                    currentUtt = null;
                }
            } else {
                fillerLength = 0;
                if (currentUtt == null)
                    currentUtt = new ArrayList<WordResult>();

                currentUtt.add(result);
            }
        }

        if (null != currentUtt)
            utts.add(currentUtt);

        int count = 0;
        for (List<WordResult> utt : utts) {
            long startFrame = Long.MAX_VALUE;
            long endFrame = Long.MIN_VALUE;

            for (WordResult result : utt) {
                TimeFrame frame = result.getTimeFrame();
                startFrame = Math.min(startFrame, frame.getStart());
                endFrame = Math.max(endFrame, frame.getEnd());
                System.out.print(result.getPronunciation().getWord());
                System.out.print(' ');
            }

            String[] basename = inFile.getName().split("\\.wav$");
            String uttId = String.format("%03d0", count);
            String outPath = String.format("%s-%s.wav", basename[0], uttId);
            System.out.println("(" + uttId + ")");
            count++;

            dumpStreamChunk(inFile, outPath, startFrame - MIN_FILLER_LENGTH,
                            endFrame - startFrame + MIN_FILLER_LENGTH);
        }
    }

    private static void dumpStreamChunk(File file, String dstPath,
                                        long offset, long length)
        throws UnsupportedAudioFileException, IOException
    {
        AudioFileFormat fileFormat = AudioSystem.getAudioFileFormat(file);
        AudioInputStream inputStream = AudioSystem.getAudioInputStream(file);
        AudioFormat audioFormat = fileFormat.getFormat();
        int bitrate = Math.round(audioFormat.getFrameSize() *
                audioFormat.getFrameRate() / 1000);

        inputStream.skip(offset * bitrate);
        AudioInputStream chunkStream =
            new AudioInputStream(inputStream, audioFormat, length * bitrate);
        AudioSystem.write(chunkStream, fileFormat.getType(), new File(dstPath));
        inputStream.close();
        chunkStream.close();
    }
}
TOP

Related Classes of edu.cmu.sphinx.tools.aligner.Aligner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.