# uncomment the pip install command to install pyspch -- it is required!
#
#!pip install git+https://github.com/compi1234/pyspch.git
#
try:
    import pyspch
except ModuleNotFoundError:
    try:
        print(
        """
        To enable this notebook on platforms as Google Colab, 
        install the pyspch package and dependencies by running following code:

        !pip install git+https://github.com/compi1234/pyspch.git
        """
        )
    except ModuleNotFoundError:
        raise

####################################################################################
### This setting is to avoid memory leaks in sklearn KMEANS on windows+MKL machines 
### This must be executed before importing numpy (here or in other packages) 
import os
os.environ["OMP_NUM_THREADS"] = '3'   # if you get warnings, try 1 or just suppress all warnings :)
import warnings
warnings.filterwarnings("ignore")
####################################################################################

# Importing some core Python libraries for data handling and plotting's baseline machine learning stack 
#
%matplotlib inline
import sys,os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns
import pyspch.core as Spch
from pyspch.core.hillenbrand import fetch_hillenbrand, select_hillenbrand

np.set_printoptions(precision=2)
mpl.rcParams['figure.figsize'] = [8.0, 8.0]
mpl.rcParams['font.size'] = 12
mpl.rcParams['legend.fontsize'] = 'large'
mpl.rcParams['figure.titlesize'] = 'large'

hildata = fetch_hillenbrand(genders='all',columns=['gender','f0']).dropna()
hildata

def plot_pitch_histogram(data,selection):
    #
    long_names = {'m':'men','w':'women','b':'boys','g':'girls'}
    # gather data for the selected classes
    X1 = data.loc[data['gender']==selection[0],'f0']
    X2 = data.loc[data['gender']==selection[1],'f0']
    #
    # plot histograms of the data
    plt.figure(figsize=(8,4))
    plt.hist(X1,range=[50.,350.],bins=20,ec='b',fc=(.8,.8,1,.2))
    plt.hist(X2,range=[50.,350.],bins=20,ec='r',fc=(1,.8,.8,.2))
    plt.xlabel('Frequency (Hz)')
    plt.legend([selection[0],selection[1]])
    plt.title('Pitch Histograms for speakers in %s and %s classes' % (long_names[selection[0]], long_names[selection[1]]) );

# select 2 gender classes from m(en), w(omen), b(oys), g(irls)
plot_pitch_histogram( hildata,['m','w'] )

plot_pitch_histogram( hildata,['b','g'] )

from sklearn.mixture import GaussianMixture


def model_data(data,genders):
    '''
    construct a normal model while selecting part of the data as specified in genders
    '''
    X1 = data.loc[data['gender']==genders[0],'f0']
    X2 = data.loc[data['gender']==genders[1],'f0']
    #
    # plot histograms of the data
    plt.figure(figsize=(8,4))
    plt.hist(X1,range=[50.,350.],bins=20,ec='b',fc=(.8,.8,1,.2))
    plt.hist(X2,range=[50.,350.],bins=20,ec='r',fc=(1,.8,.8,.2))
    plt.xlabel('Frequency (Hz)')
    plt.legend([selection[0],selection[1]])
    plt.title('Pitch Histograms for speakers in %s and %s classes' % (long_names[selection[0]], long_names[selection[1]]) );    
    
def plot_model(data,genders):
    '''
    plot the constructed model
    '''
    data = select_hillenbrand(data,genders=genders)
    yy = data['gender'].values
    XX = data['f0'].values.reshape(-1,1)
    gmm= []
    X = []
    y = []
    for j in range(0,len(genders)):
        indx= (yy==genders[j])
        X.append( XX[indx,:])
        y.append( yy[indx])
        gmm.append( GaussianMixture(max_iter=3,random_state=1,n_components=1,init_params='kmeans'))
        
    fig,ax = plt.subplots(figsize=(8,4))
    f0_range = [50.,350.]
    colors = ['b','r','g','k']
    fcs = [(.8,.8,1,.2),(1,.8,.8,.2),(.8,1,.8,.2),(.8,.8,.8,.2)]
    xp  = np.linspace(f0_range[0],f0_range[1],100).reshape(-1,1)
    kws_hist = dict(histtype='stepfilled', range=f0_range, alpha=.5, bins=30, density=True)
    
    for j in range(0,len(genders)):
        gmm[j].fit(X[j])
        yp = gmm[j].score_samples(xp)
        ax.hist(X[j],ec=colors[j],fc=fcs[j],**kws_hist) 
        ax.plot(xp,np.exp(yp),label='%s (mean=%.0f Hz) ' % (genders[j],gmm[j].means_[0]),color=colors[j])
    ax.legend(loc='upper right',fontsize='x-small')
    ax.set_xlabel('Pitch Frequency (Hz)')
    ax.grid()

plot_model(hildata,['m','w','g','b'])

for genders in [ ['m','w'], ['b','g'] , ['m','b'], ['w','g']]:
    plot_model(hildata,genders)

	gender	f0
fid
m01ae	m	174.0
m02ae	m	102.0
m03ae	m	99.0
m04ae	m	124.0
m06ae	m	115.0
...	...	...
g17uw	g	236.0
g18uw	g	214.0
g19uw	g	243.0
g20uw	g	248.0
g21uw	g	225.0

Pitch Distribution¶

Pitch and Gender in the Hillenbrand database¶

1. The Hillenbrand Database¶

2. Histograms of Pitch data¶

Explore¶

Questions¶

3. Modeling the Pitch Distributions¶