Package com.meapsoft.featextractors

Source Code of com.meapsoft.featextractors.AvgMelSpec

/*
*  Copyright 2006-2007 Columbia University.
*
*  This file is part of MEAPsoft.
*
*  MEAPsoft is free software; you can redistribute it and/or modify
*  it under the terms of the GNU General Public License version 2 as
*  published by the Free Software Foundation.
*
*  MEAPsoft is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*  General Public License for more details.
*
*  You should have received a copy of the GNU General Public License
*  along with MEAPsoft; if not, write to the Free Software
*  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
*  02110-1301 USA
*
*  See the file "COPYING" for the text of the license.
*/

package com.meapsoft.featextractors;

import java.util.Arrays;

import com.meapsoft.FeatExtractor;
import com.meapsoft.STFT;
import com.meapsoft.RingMatrix;

/**
* Averages all spectral frames together into a single feature vector
* and then converts the vector to the mel frequency scale.
*
* @author Mike Mandel (mim@ee.columbia.edu)
*/
public class AvgMelSpec extends FeatureExtractor {

    // for each mel bin...
    protected double[] melCenter;   // actual targe mel value at center of this bin
    protected double[] melWidth;    // mel width divisor for this bin (constant, except broadens in low bins)
    // for each fft bin
    protected double[] melOfLin;
    protected double[] linSpec;

    protected int N;
    protected int outDim;

  public double lin2mel(double fq) {
    return 1127.0 * Math.log(1.0 + fq / 700.0);
  }

  public double mel2lin(double mel) {
    return 700.0 * (Math.exp(mel / 1127.0) - 1.0);
  }

  // Default constructor - Use 40 mel spaced bins
  public AvgMelSpec() {
      this(FeatExtractor.nfft/2+1, FeatExtractor.feSamplingRate, 40);
  }

  public AvgMelSpec(int N, float sampleRate, int outDim) {
    this.N = N;
    this.outDim = outDim;
    linSpec = new double[N];

    // Calculate the locations of the bin centers on the mel scale and
    // as indices into the input vector
    melCenter = new double[outDim+2];
    melWidth = new double[outDim+2];

    double melMin = lin2mel(0);
    //double melMax = lin2mel(sampleRate/2);
    double melMax = lin2mel((8000.0 < sampleRate/2)? 8000.0 : sampleRate/2); // dpwe 2006-12-11 - hard maximum
    double hzPerBin = sampleRate/2/N;
    for(int i=0; i<outDim+2; i++) {
      melCenter[i] = melMin + i * (melMax - melMin) / (outDim + 1);
      // System.out.println("centersMel["+i+"]="+centersMel[i]+" centersInd[]="+centersInd[i]);
    }
    for(int i=0; i<outDim+1; i++) {
  melWidth[i] = melCenter[i+1]-melCenter[i];
  double linbinwidth = (mel2lin(melCenter[i+1])-mel2lin(melCenter[i]))/hzPerBin;
  if (linbinwidth < 1) {
      melWidth[i] = lin2mel(mel2lin(melCenter[i])+hzPerBin) - melCenter[i];
  }
  //System.out.println("melBin="+i+" melCenter="+melCenter[i]+" melWidth="+melWidth[i]+"("+mel2lin(melCenter[i]-melWidth[i])/hzPerBin+".."+mel2lin(melCenter[i])/hzPerBin+".."+mel2lin(melCenter[i]+melWidth[i])/hzPerBin);
    }
    // precalculate mel translations of fft bin frequencies
    melOfLin = new double[N];
    for(int i=0; i<N; i++) {
      melOfLin[i] = lin2mel(i * sampleRate / (2*N));
      //      System.out.println("linbin2Mel["+i+"]="+linbin2mel[i]);
    }
  }


  public double[] features(STFT stft, long startFrame, int length) {
    double[] melSpec = new double[outDim];
    double[] curFrame;
    double sum = 0;

    // we're expecting a certain frequency resolution...
    boolean recalculateSTFT = stft.getRows() != N;
    RingMatrix newstft = null;
    if(recalculateSTFT) {
        // keep the same number of frames as in stft
        newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame+length), (N-1)*2, stft.nhop);
        length = newstft.getColumns();
    }   

    // intialize average to 0
    Arrays.fill(linSpec, 0);

    // collect average linear spectrum
    for(int frame=0; frame<length; frame++) {
        if(!recalculateSTFT)
            curFrame = stft.getFrame(startFrame+frame);
        else
            curFrame = newstft.getColumn(frame);

        for(int band=0; band<linSpec.length; band++)
            linSpec[band] += curFrame[band] / length;
    }
   
    // convert log magnitude to linear magnitude for binning
    for(int band=0; band<linSpec.length; band++)
        //linSpec[band] = Math.exp(linSpec[band]);
        linSpec[band] = Math.pow(10,linSpec[band]/10);
   
    // convert to mel scale
    for(int bin=0; bin<outDim; bin++) {
        // initialize
        melSpec[bin] = 0;
       
  for(int i = 0; i < linSpec.length; ++i) {
      double weight = 1.0 - (Math.abs(melOfLin[i] - melCenter[bin])/melWidth[bin]);
      if (weight > 0) {
    melSpec[bin] += weight * linSpec[i];
      }
  }

        // Take log
        melSpec[bin] = 10*Math.log(melSpec[bin]) / Math.log(10);
       
        sum += melSpec[bin];
    }
   
    // Audio scrubber takes care of normalization, level is a good cue
    //     for(int bin=0; bin<outDim; bin++)
    //       melSpec[bin] = melSpec[bin] / sum;
   
    return melSpec;
  }

  public String description()
  {
    return "Computes the mean spectrum of a chunk and converts it to the perceptually weighted Mel frequency scale.";
  }
}
TOP

Related Classes of com.meapsoft.featextractors.AvgMelSpec

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.