Package com.meapsoft.featextractors

Source Code of com.meapsoft.featextractors.AvgPitch

/*
*  Copyright 2006-2007 Columbia University.
*
*  This file is part of MEAPsoft.
*
*  MEAPsoft is free software; you can redistribute it and/or modify
*  it under the terms of the GNU General Public License version 2 as
*  published by the Free Software Foundation.
*
*  MEAPsoft is distributed in the hope that it will be useful, but
*  WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*  General Public License for more details.
*
*  You should have received a copy of the GNU General Public License
*  along with MEAPsoft; if not, write to the Free Software
*  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
*  02110-1301 USA
*
*  See the file "COPYING" for the text of the license.
*/

package com.meapsoft.featextractors;

import java.util.Arrays;

import com.meapsoft.FeatExtractor;
import com.meapsoft.RingMatrix;
import com.meapsoft.STFT;

import com.meapsoft.DSP;

/**
* Averages all spectral frames together into a single feature vector and then
* applies a log-frequency harmonic template to enhance harmonic sets,
* then picks the biggest peak.
*
* Based on AvgChroma
*
* @author Dan Ellis (dpwe@ee.columbia.edu)
*/

public class AvgPitch extends FeatureExtractor
{

  protected double[][] mapMx;

  protected double[] linSpec;

  protected int N, BPO, nOcts, nOpBins;

  public static final double LOG2 = 0.69314718055995;
  public static final double LOG_ONE_SEMITONE = 0.057762265046662;
  public static final double MIDI0C_HZ = 8.1757989156;
  public static final double MIDI_BPO = 12.0;
  public static final double C0_HZ = 16.3516;

  //    protected static int firstTime = 0;  // no first time at all

  protected double lowestFrq = 65.4;

  public double hz2octs(double fq)
  {
    if (fq <= 0)
      return 0;
    // notes above BASEPITCH will be positive, below will be negative.
    double BASEPITCH = C0_HZ;
    // with C0_HZ, integer part of return is octave number,
    // fractional part is chroma.
    // So C0 (16.35 Hz) -> 0.0, A0 (27.5 Hz) -> 0.25,
    // C4 (262 Hz) -> 4.0, A4 (440 Hz) -> 4.25

    return Math.log(fq / BASEPITCH) / LOG2;
  }

  // Default constructor
  public AvgPitch()
  {
    this(FeatExtractor.nfft / 2 + 1, 48, 5, FeatExtractor.feSamplingRate);
  }

  public AvgPitch(int N, int BPO, int nOcts, double sampleRate)
  {
    this.N = N;
    this.BPO = BPO;
    this.nOcts = nOcts;
    linSpec = new double[N];
    int nOpBins = nOcts * BPO;
    this.nOpBins = nOpBins;

    mapMx = new double[nOpBins][N];

    //System.out.println("N="+N);
    //System.out.println("sampleRate="+sampleRate);


    // Create the mapping matrix
    // mapMx = pitchTplt * logFmap

    // first, the logfsgram array (after logfsgram.m)
    // frequency spacing of FFT bins
    double hzPerBin = 2*sampleRate/(2*(N-1));
    double fratio = Math.pow(2.0, 1.0/(double)BPO);
    int nbins = (int)Math.floor(Math.log(sampleRate/2.0/lowestFrq)/Math.log(fratio));
    // the FFT-to-logfsgram matrix
    double LFM[][] = new double[nbins][N];

    // Freqs corresponding to each bin in FFT
    double fftfrqs[] = new double[N];
    for (int i = 0; i < N; ++i) {
      fftfrqs[i] = i*hzPerBin;
    }
    // Freqs corresponding to each bin in log F output
    double logffrqs[] = new double[nbins];
    for (int i = 0; i < nbins; ++i) {
      logffrqs[i] = lowestFrq*Math.exp(LOG2*((double)i)/(double)BPO);
    }
    // Bandwidths of each bin in log F
    // .. but bandwidth cannot be less than FFT binwidth, hzPerBin
    double logfbws[] = new double[nbins];
    for (int i = 0; i < nbins; ++i) {
      logfbws[i] = logffrqs[i] * (fratio - 1.0);
      if (logfbws[i] < hzPerBinlogfbws[i] = hzPerBin;
    }
    double ovfctr = 0.5475;   // Adjusted by hand to make sum(mx'*mx) close to 1.0
    // Weighting matrix mapping energy in FFT bins to logF bins
    // is a set of Gaussian profiles depending on the difference in
    // frequencies, scaled by the bandwidth of that bin
    for (int i = 0; i < nbins; ++i) {
      double rowE = 0;
      for (int j = 0; j < N; ++j) {
        double freqdiff = (logffrqs[i] - fftfrqs[j])/(ovfctr * logfbws[i]);
        LFM[i][j] = Math.exp(-0.5*freqdiff*freqdiff);
        rowE = rowE + LFM[i][j]*LFM[i][j];
      }
      // Normalize rows by sqrt(E), so multiplying by mx' gets approx orig spec back
      for (int j = 0; j < N; ++j) {
        LFM[i][j] = LFM[i][j] / Math.sqrt(2*rowE);
      }
    }

    // Weight frequencies around 400 Hz by scaling each row
    double fctr = 400;
    double fsdoct = 1.5;
    for (int i = 0; i < nbins; ++i) {
      double wt = Math.exp(-0.5*Math.pow(Math.log(logffrqs[i]/fctr)/LOG2/fsdoct,2));
      for (int j = 0; j < N; ++j) {
        LFM[i][j] = LFM[i][j]*wt;
      }
    }

    // Construct the pitch harmonics template
    int nharmonics = 15;
    int tpltlen = 4 * BPO;
    double tpltsd = 0.5;
    double tplt[] = new double[tpltlen];
    Arrays.fill(tplt, 0);

    for (int i = 1; i <= nharmonics; ++i) {
      double hb = (double)BPO * Math.log((double)i)/LOG2;
      for (int j = 0; j < tpltlen; ++j) {
        double bd = ((double)j - hb)/tpltsd;
        tplt[j] = tplt[j] + 1.0/(double)i * Math.exp(-0.5*bd*bd);
      }
    }
    double sumtplt = 0;
    // normalize sum to 1
    for (int j = 0; j < tpltlen; ++j) {
      sumtplt = sumtplt + tplt[j];
    }
    for (int j = 0; j < tpltlen; ++j) {
      tplt[j] = tplt[j]/sumtplt;
    }

    // Build a matrix of templates for every pitch
    double TM[][] = new double[nOpBins][nbins];
    for (int i = 0; i < nOpBins; ++i) {
      Arrays.fill(TM[i], 0);
      int tpl = tpltlen;
      if ( (tpl + i) > nbins ) {
        tpl = nbins - i;
      }
      for (int j = 0; j < tpl; ++j) {
        TM[i][i+j] = tplt[j];
      }
    }


    // Combine the matrices together
    for (int i = 0; i < nOpBins; ++i) {
      for (int j = 0; j < N; ++j) {
        double x = 0;
        for (int k = 0; k < nbins; ++k) {
          x = x + TM[i][k]*LFM[k][j];
        }
        mapMx[i][j] = x;
      }
    }

    // Save it out for debug
    //if (firstTime == 1) {
    //    for (int i = 0; i < nOpBins; ++i) {
    //  for (int j = 0; j < N; ++j) {
    //      System.out.print(mapMx[i][j] + " ");
    //  }
    //  System.out.println("");
    //   }
    //    firstTime = 0;
    //}


  }

  public double[] features(STFT stft, long startFrame, int length, boolean preEmphasis)
  {
    int boff = 0;
    //double[] logSpec = new double[nOpBins];
    double[] curFrame;
    double sum = 0;
    double sum2 = 0;
    double[] pitch = new double[1];

    boolean recalculateSTFT = stft.getRows() != N;
    RingMatrix newstft = null;
    if (recalculateSTFT)
    {
      // keep the same number of frames as in stft
      newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame
          + length), (N - 1) * 2, stft.nhop);
      length = newstft.getColumns();
    }


    // Apply mapping to each frame, keep histogram of results
    int[] pitchcounts = new int[nOpBins];
    Arrays.fill(pitchcounts, 0);
    for (int frame = 0; frame < length; frame++) {
      if (!recalculateSTFT)
        curFrame = stft.getFrame(startFrame + frame);
      else
        curFrame = newstft.getColumn(frame);

      if (curFrame != null) {
        for (int band = 0; band < linSpec.length; band++) {
          linSpec[band] = Math.pow(10, curFrame[band] / 20);
          //System.out.print(linSpec[band] + " ");
        }
        //System.out.println("");

        // matrix multiply to find bins
        int maxbinix = 0;
        double maxbinval = 0;

        for (int bin = 0; bin < nOpBins; bin++) {
          double val = 0;
          for (int band = 0; band < linSpec.length; band++) {
            val += linSpec[band] * mapMx[bin][band];
          }
          // logSpec[bin] = val;
          if (val > maxbinval) {
            maxbinix = bin;
            maxbinval = val;
          }
        }
        ++pitchcounts[maxbinix];
      }
    }
    // find the most popular pitch
    int maxbinix = 0;
    int maxbincount = 0;
    for (int bin = 0; bin < nOpBins; bin++) {
      if ( pitchcounts[bin] > maxbincount ) {
        maxbinix = bin;
        maxbincount = pitchcounts[bin];
      }
    }

    // return the bin index mapped to MIDI numbers
    double midiVal = MIDI_BPO*(Math.log(lowestFrq/MIDI0C_HZ)/LOG2
        + (((double)maxbinix)/BPO));

    pitch[0] = (double)midiVal;

    return pitch;
  }

  public String description()
  {
    return "MIDI value corresponding to dominant pitch in segment, as found by correlating log-f-spectrum against a template";
  }
}
TOP

Related Classes of com.meapsoft.featextractors.AvgPitch

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.