Package edu.cmu.sphinx.tools.live

Source Code of edu.cmu.sphinx.tools.live.LiveModeRecognizer

/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved.  Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.tools.live;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;

import edu.cmu.sphinx.frontend.util.ConcatFileDataSource;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.result.WordResult;
import edu.cmu.sphinx.util.*;
import edu.cmu.sphinx.util.props.*;

/**
* Decodes a batch file containing a list of files to decode. The files can be either audio files or cepstral files, but
* defaults to audio files.
*/
public class LiveModeRecognizer implements Configurable {

    /** The property that specifies the recognizer to use */
    @S4Component(type = Recognizer.class)
    public final static String PROP_RECOGNIZER = "recognizer";

    /** The property that specifies the source of the transcript */
    @S4Component(type = ConcatFileDataSource.class)
    public final static String PROP_INPUT_SOURCE = "inputSource";

    /** The property specifying whether to print out the gap insertion errors. */
    @S4Boolean(defaultValue = false)
    public static final String PROP_SHOW_GAP_INSERTIONS = "showGapInsertions";

    /** The property specifying the transcript file. */
    @S4String(defaultValue = "hypothesis.txt")
    public final static String PROP_HYPOTHESIS_TRANSCRIPT = "hypothesisTranscript";

    /** The property specifying the number of files to decode before alignment is performed. */
    @S4Integer(defaultValue = -1)
    public final static String PROP_ALIGN_INTERVAL = "alignInterval";

    // TODO - the instrumentation in here that is looking for gap insertions
    // and performing the alignment and reporting of the live summary data
    // should probably be moved to a separate instrumentation package, much
    // like the BestPathAccuracyTracker.

    // -------------------------------
    // Configuration data
    // --------------------------------
    private Recognizer recognizer;
    private ConcatFileDataSource dataSource;
    private String hypothesisFile;
    private boolean showGapInsertions;

    // -------------------------------
    // Working data
    // --------------------------------
    private int alignInterval;
    private int numUtterances;

    private FileWriter hypothesisTranscript;
    private ReferenceSource referenceSource;
    private final NISTAlign aligner = new NISTAlign(true, true);

    public LiveModeRecognizer( Recognizer recognizer, ConcatFileDataSource dataSource, int skip, boolean showGapInsertions, String hypothesisFile, int alignInterval ) {
        this.recognizer = recognizer;
        this.dataSource = dataSource;
        this.showGapInsertions = showGapInsertions;
        this.hypothesisFile = hypothesisFile;
        this.alignInterval = alignInterval;
        this.referenceSource = dataSource;
    }

    public LiveModeRecognizer() {

    }

    /*
    * (non-Javadoc)
    *
    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
    */
    public void newProperties(PropertySheet ps) throws PropertyException {
        recognizer = (Recognizer) ps.getComponent(PROP_RECOGNIZER);
        dataSource = (ConcatFileDataSource) ps.getComponent(PROP_INPUT_SOURCE);
        showGapInsertions = ps.getBoolean(PROP_SHOW_GAP_INSERTIONS);

        hypothesisFile = ps.getString(PROP_HYPOTHESIS_TRANSCRIPT);

        alignInterval = ps.getInt(PROP_ALIGN_INTERVAL);

        referenceSource = dataSource;
    }


    /** Decodes the batch of audio files */
    public void decode() throws IOException {
        List<String> resultList = new LinkedList<String>();
        Result result;
        int startReference = 0;
        hypothesisTranscript = new FileWriter(hypothesisFile);
        recognizer.allocate();
        while ((result = recognizer.recognize()) != null) {
            numUtterances++;
            String resultText = result.getBestResultNoFiller();

            System.out.println("\nHYP: " + resultText);
            System.out.println("   Sentences: " + numUtterances);
            resultList.add(resultText);

            for (WordResult wr : result.getTimedBestResult(false)) {
                hypothesisTranscript.write(wr.toString());
                hypothesisTranscript.write(' ');
            }
            hypothesisTranscript.write('\n');
            hypothesisTranscript.flush();

            if (alignInterval > 0 && (numUtterances % alignInterval == 0)) {
                // perform alignment if the property 'alignInterval' is set
                List<String> references = referenceSource.getReferences();
                List<String> section = references.subList(startReference, references
                        .size());
                alignResults(resultList, section);
                resultList = new LinkedList<String>();
                startReference = references.size();
            }
        }

        hypothesisTranscript.close();

        // perform alignment on remaining results
        List<String> references = referenceSource.getReferences();
        List<String> section = references.subList(startReference, references.size());
        if (!resultList.isEmpty() || !section.isEmpty()) {
            alignResults(resultList, section);
        }
        System.out.println("# ------------- Summary Statistics -------------");
        aligner.printTotalSummary();

        recognizer.deallocate();

        showLiveSummary();
        System.out.println();
    }


    /** Shows the test statistics that relates to live mode decoding. */
    private void showLiveSummary() throws IOException {
        int actualUtterances = referenceSource.getReferences().size();
        int gapInsertions = detectGapInsertionErrors();

        System.out.println
                ("   Utterances:  Actual: " + actualUtterances +
                        "  Found: " + numUtterances);
        System.out.println
                ("   Gap Insertions: " + gapInsertions);
    }


    /** Detect gap insertion errors. */
    private int detectGapInsertionErrors() throws IOException {
        Timer gapTimer = TimerPool.getTimer(this, "GapInsertionDetector");
        gapTimer.start();
        GapInsertionDetector gid = new GapInsertionDetector(dataSource
                .getTranscriptFile(), hypothesisFile, showGapInsertions);
        int gapInsertions = gid.detect();
        gapTimer.stop();
        return gapInsertions;
    }


    /**
     * Align the list of results with reference text. This method figures out how many words and sentences match, and
     * the different types of errors.
     *
     * @param hypothesisList the list of hypotheses
     * @param referenceList  the list of references
     */
    private void alignResults(List<String> hypothesisList, List<String> referenceList) {
        System.out.println();
        System.out.println("Aligning results...");
        System.out.println("   Utterances: Found: " + hypothesisList.size()
                + "   Actual: " + referenceList.size());

        String hypothesis = listToString(hypothesisList);
        String reference = listToString(referenceList);
        saveAlignedText(hypothesis, reference);

        getAlignTimer().start();
        aligner.align(reference, hypothesis);
        getAlignTimer().stop();

        System.out.println(" ...done aligning");
        System.out.println();
    }


    /**
     * Saves the aligned hypothesis and reference text to the aligned text file.
     *
     * @param hypothesis the aligned hypothesis text
     * @param reference  the aligned reference text
     */
    private void saveAlignedText(String hypothesis, String reference) {
        try {
            FileWriter writer = new FileWriter("align.txt");
            writer.write(hypothesis);
            writer.write("\n");
            writer.write(reference);
            writer.close();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }


    /**
     * Converts the given list of strings into one string, putting a space character in between the strings.
     *
     * @param resultList the list of strings
     * @return a string which is a concatenation of the strings in the list, separated by a space character
     */
    private String listToString(List<String> resultList) {
        StringBuilder sb = new StringBuilder();
        for (String result : resultList) {
            sb.append(result).append(' ');
        }
        return sb.toString();
    }


    /** Return the timer for alignment. */
    private Timer getAlignTimer() {
        return TimerPool.getTimer(this, "Align");
    }


    /** Do clean up */
    public void close() throws IOException {
        hypothesisTranscript.close();
    }


    /**
     * Main method of this BatchDecoder.
     *
     * @param argv argv[0] : config file argv[1] : a file listing all the audio files to decode
     */
    public static void main(String[] argv) {
        if (argv.length != 1) {
            System.out.println("Usage: LiveModeRecognizer config-file.xml ");
            System.exit(1);
        }
        String cmFile = argv[0];
        ConfigurationManager cm;
        LiveModeRecognizer lmr;

        try {
            URL url = new File(cmFile).toURI().toURL();
            cm = new ConfigurationManager(url);
            lmr = (LiveModeRecognizer) cm.lookup("live");
        } catch (IOException ioe) {
            System.err.println("I/O error during initialization: \n   " + ioe);
            return;
        } catch (PropertyException e) {
            System.err.println("Error during initialization: \n  " + e);
            e.printStackTrace();
            return;
        }

        if (lmr == null) {
            System.err.println("Can't find liveModeRecognizer in " + cmFile);
            return;
        }

        try {
            lmr.decode();
        } catch (IOException ioe) {
            System.err
                    .println("I/O error during decoding: " + ioe.getMessage());
        }
    }

//
//    @Test
//    public void testUseConcatDataSource() {
//        Map<String, Object> props = new HashMap<String, Object>();
//        props.put("inputSource", new ConcatFileDataSource());
//        props.put("recognizer", new Recognizer());
//        LiveModeRecognizer liveRecognizer = (LiveModeRecognizer) ConfigurationManager.getInstance(LiveModeRecognizer.class, props);
//
//    }
}
TOP

Related Classes of edu.cmu.sphinx.tools.live.LiveModeRecognizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.