%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams["figure.figsize"] = (12,4)
mpl.rcParams["savefig.dpi"] = 100
mpl.rcParams["savefig.bbox"] = 'tight'
# When the SAVE_FIG flag is set, then pictures are save along the way
SAVE_FIG = False
import librosa
from mel import *

mels = np.arange(0.,30.,.5)
freqs_sl = mel2hz(mels,scale='SLANEY')
freqs_htk = mel2hz(mels,scale='HTK')
freqs_dm = mel2hz(mels,scale='DM')

fig1,ax1 = plt.subplots()

ax1.plot(freqs_dm,mels,'--',color='g')
ax1.plot(freqs_htk,mels,'--',color='b')
ax1.plot(freqs_sl,mels,'--',color='r')

ax1.legend(['DM','HTK','SLANEY']);

ax1.grid()
ax1.set_xlabel('Frequency (Hz)')
ax1.set_ylabel("MEL")
ax1.tick_params(axis='x')
ax1.set_title("MEL Scale Approximations (scaled for hz2mel(1000)=10)\n by Davis-Mermelstein, HTK and Slaney \n");
ax1.set_ylim([0.,35.])
ax1.set_xlim([0,8000])

ax1.scatter(1000,10,marker='o',s=75,color='k');
if SAVE_FIG : fig1.savefig("figures/mel_scale")

bw_mel = 1.
mels = np.arange(0.,30.,.2)
freqs = mel2hz(mels)
lows = mel2hz(mels-bw_mel/2)
highs = mel2hz(mels+bw_mel/2)
bw_hz = (highs-lows)
fig,ax = plt.subplots()
ax.plot(freqs,bw_hz)
ax.set_title("Masking bandwidth")
ax.set_xlabel("Frequency (Hz)")
ax.set_ylabel("Bandwidth (Hz)")
ax.grid('on')

mels = np.arange(0.,30.,.5)
freqs = mel2hz(mels)
#
fig,ax1 = plt.subplots()
ax1.plot(freqs,mels,'--',color='b')
ax1.grid()
ax1.set_xlabel('Frequency (Hz)')
ax1.set_ylabel("MEL")
ax1.set_title('MEL Scale (Slaney) \n mel is used for left y-axis; Hz is used for the right y-axis');

y_lim = np.asarray([0.,30.])
ax1.set_ylim(y_lim)
ax1.set_xlim([0,8000])

ax2 = ax1.twinx()
#specify y2_ticks at specific Hz locations
y2_ticks = hz2mel([0,1000,2000,3000,4000,5000,6000]) 
### or use tick marks as on y1
# y2_ticks = ax1.get_yticks()
#
y2_ticklabels = ['{:.0f}'.format(mel2hz(x)).rjust(8) for x in y2_ticks]



# (matplotlib programming note) 
# you need to do the following lines in STRICT order to avoid warnings and/or plotting mistakes
# 1. set the y_ticks, 2. set the y_ticklables, 3. set the y_lim identical to axis1
ax2.set_yticks(y2_ticks)
ax2.set_yticklabels(y2_ticklabels)
ax2.set_ylim(y_lim)
ax2.set_ylabel( 'Hz' );

##  DESIGN PARAMETERS - MINIMAL NUMBER OF CHANNELS  ########
sr = 16000         # sampling rate, typically 16 or 8 kHz
n_mels = 24        # the number of mel filterbanks should not be less than 24 with 16kHz sampling or 20 channels with 8kHz sampling
                   # such design has roughly filterbank widths equal to 1 mel; less channels would imply wider bands that would smear information too much
                   # and should be no more than 64 for sr=8000, can go to 100 for sr=16000
fmin = 50.         # lower cutoff can be set to 0Hz, but 50Hz is more common in practice is there is no useful acoustic energy below 50Hz
fmax = 6500.       # higher cutoff for filterbank.  6500.Hz is an arbitrary value; it was chosen to make the first 20 bands fit nicely into the 4kHz range
##########

freqs,fbank = mel_filterbank(n_mels=n_mels,sr=sr,fmin=fmin,fmax=fmax) 
#print("MEL FBANK Center Frequencies")
#print(freqs[1:-1],"\n")
#

name = "figures/mel_filterbank"+str(n_mels)+"_"
plot_filterbank_cf_bw(freqs,sr=sr)
if SAVE_FIG : plt.savefig(name+"cf")
plt.show()
plot_filterbank_mapping(freqs,sr=sr)
if SAVE_FIG : plt.savefig(name+"map")
plt.show()
plot_filterbank(freqs,sr=sr)
if SAVE_FIG : plt.savefig(name+"filt")
plt.show()
plot_filterbank_weights(fbank,sr=sr)
if SAVE_FIG : plt.savefig(name+"coef")
plt.show()

sr = 16000         # sampling rate, typically 16 or 8 kHz
n_mels = 80        # the number of mel filterbanks should not be less than 24 with 16kHz sampling or 20 channels with 8kHz sampling
                   # such design has roughly filterbank widths equal to 1 mel; less channels would imply wider bands that would smear information too much
                   # it should be no more than 64 for sr=8000, and can go up to 100 for sr=16000
fmin = 50.         # lower cutoff is by default set to 50Hz as no significant speech frequencies are present below this
fmax = 6500.       # will by default be 0.5*sampling_rate but with a cuttoff at 6.5kHz 
##########
freqs,fbank = mel_filterbank(n_mels=n_mels,sr=sr,fmin=fmin,fmax=fmax) 
#print("MEL FBANK Center Frequencies")
#print(freqs[1:-1],"\n")
#

name = "figures/mel_filterbank"+str(n_mels)+"_"
plot_filterbank_cf_bw(freqs,sr=sr)
if SAVE_FIG : plt.savefig(name+"cf")
plt.show()
plot_filterbank_mapping(freqs,sr=sr)
if SAVE_FIG : plt.savefig(name+"map")
plt.show()
plot_filterbank(freqs,sr=sr)
if SAVE_FIG : plt.savefig(name+"filt")
plt.show()
plot_filterbank_weights(fbank,sr=sr)
if SAVE_FIG : plt.savefig(name+"coef")
plt.show()

The MEL Scale¶

Frequency Sensitivity in Auditory Perception¶

Auditory Filters in the cochlea¶

1. Mel Scale Approximations¶

2. Mel scale and Equivalent Bandwidth¶

Labeling a mel axis with 'mel' or 'Hz' ??¶

2. MEL FILTERBANK¶

A critically spaced mel filterbank¶

TASK:¶

A high resolution mel filterbank¶