# uncomment the pip install command to install pyspch -- it is required!
#
#!pip install git+https://github.com/compi1234/pyspch.git
#
try:
    import pyspch
except ModuleNotFoundError:
    try:
        print(
        """
        To enable this notebook on platforms as Google Colab, 
        install the pyspch package and dependencies by running following code:

        !pip install git+https://github.com/compi1234/pyspch.git
        """
        )
    except ModuleNotFoundError:
        raise

# Importing some core Python libraries for data handling and plotting's baseline machine learning stack 
#
%matplotlib inline
import sys,os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns

import pyspch.core as Spch
import pyspch.display as Spd
from pyspch.core.hillenbrand import fetch_hillenbrand, select_hillenbrand
#
np.set_printoptions(precision=2)
mpl.rcParams['figure.figsize'] = [8.,7.]
mpl.rcParams['font.size'] = 11

# A small utility function to set axis to semilog (x=linear, y=log) and adjust range and labels
def set_ax(ax,xlim=[200,1200],ylim=[700,3500],semilog=True,
                   yticks=[1000.,1500.,2000.,3000.],
                   yticklabels=['1000','1500','2000','3000']):
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    if semilog:
        ax.set_yscale('log')
    ax.set_yticks(yticks)
    ax.set_yticklabels(yticklabels);

hildata = fetch_hillenbrand(symbols='arpa').dropna()
all_vowels = np.unique(hildata['vowel'])
all_genders = np.unique(hildata['gender'])
all_features = ["f0","F1","F2","F3"]
print(all_vowels)
print(all_genders)
print(all_features)
#
hildata

['aa' 'ae' 'ah' 'ao' 'eh' 'er' 'ey' 'ih' 'iy' 'ow' 'uh' 'uw']
['b' 'g' 'm' 'w']
['f0', 'F1', 'F2', 'F3']

# get default color and marker orders for pyspch.display
markers = Spd.markers
colors = Spd.colors
cmap = sns.color_palette(colors)
sns.set_palette(cmap)
#
table_order = ['iy','ih','eh','ae','aa','ao','uh','uw','ah','er','ey','ow']
vowel_order = ['iy','aa','uw','ih','eh','er','ah','ae','ao','uh','ey','ow']
vowel2color = dict(zip(vowel_order,colors))
color2vowel = dict(zip(colors,vowel_order))
vow12 = vowel_order
vow10 = vowel_order[0:10]
vow3 = vowel_order[0:3]
vow6 = vowel_order[0:6]
# selected adult data
data3 = select_hillenbrand(hildata,genders='adults',vowels=vow3)
data6 = select_hillenbrand(hildata,genders='adults',vowels=vow6)
data10 = select_hillenbrand(hildata,genders='adults',vowels=vow10)

#genders = 'adults'
# select the vowels you want to plot
for (vow,data) in zip([vow3,vow6,vow10],[data3,data6,data10]):
    f,ax = plt.subplots()
    sns.scatterplot(ax=ax,x='F1',y='F2',data=data,hue="vowel",marker='D',s=20,hue_order=vow)
    ax.set_title('F1-F2 scatterplot');
    #uncomment next line for F2 data on log scale
    #set_ax(ax)

ftr = ['F1','F2']
genders = 'adults'
for data,vowels in zip([data6,data10],[vow6,vow10]):
    f,ax = plt.subplots()
    sns.scatterplot(x=ftr[0],y=ftr[1],data=data,hue="vowel",s=10,hue_order=vowels);
    for i,entry in data.iterrows():
        vow = entry['vowel']
        plt.text(entry[ftr[0]],entry[ftr[1]],vow,ha='center',va='center',fontsize=10,color=vowel2color[vow] )
        ax.set_title('F1-F2 scatterplot');
        #uncomment next 2 lines for F2 data on log scale
        #plt.yscale('log')
        #ax.set_yticks([800.,1000.,1500,2000.,3000.],["800","1000","1500","2000","3000"])

marker_txt = ('a','b')
genders = ['m','w']
(xfeat,yfeat) = ['F1','F2']
vowels = vow10
data = data10
n_std = 2
Diagonal = True

# setup the scatterplot and legend
fig,ax=plt.subplots()
sns.scatterplot(x=xfeat,y=yfeat,data=data,hue='vowel',hue_order=vowels,s=0)
#uncomment next line for F2 data on log scale
#plt.yscale('log')
#ax.set_yticks([800.,1000.,1500,2000.,3000.],["800","1000","1500","2000","3000"])

# plot the datapoints as text
for i,entry in data.iterrows():
    vow = entry['vowel']
    ax.text(entry[xfeat],entry[yfeat],vow,ha='center',va='center',color=vowel2color[vow] )
    
for vow in vowels:
    vowdata = select_hillenbrand(hildata,genders=genders,vowels=[vow])
    Spch.plot_confidence_ellipse(vowdata[xfeat], vowdata[yfeat], ax, n_std=n_std, Diagonal=Diagonal, edgecolor=vowel2color[vow],linewidth=2 ,linestyle='--')
    
#sns.scatterplot(ax=f.axes[0],data=formants,x='F1',y='F2',hue='vowel',style='gender',s=2000)
#ax.legend(loc='upper left', bbox_to_anchor=(.9,1));
if Diagonal:
    ax.set_title("F1-F2 Confidence Ellipses (diag cov., %.1f $\sigma$)" % n_std);
else:
    ax.set_title("F1-F2 Confidence Ellipses (full cov., %.1f$\sigma$)" % n_std);

marker_txt = ('a','b')
genders = ['m','w']
(xfeat,yfeat) = ['F1','F2']
vowels = vow10
data = data10
n_std = 2

# setup the scatterplot and legend
fig,ax=plt.subplots()
sns.scatterplot(x=xfeat,y=yfeat,data=data,hue='vowel',hue_order=vowels,s=0)
#uncomment next line for F2 data on log scale
#plt.yscale('log')
#ax.set_yticks([800.,1000.,1500,2000.,3000.],["800","1000","1500","2000","3000"])

# plot the datapoints as text
for i,entry in data.iterrows():
    vow = entry['vowel']
    ax.text(entry[xfeat],entry[yfeat],vow,ha='center',va='center',color=vowel2color[vow] )
    
for vow in vowels:
    vowdata = select_hillenbrand(hildata,genders=genders,vowels=[vow])
    Spch.plot_confidence_ellipse(vowdata[xfeat], vowdata[yfeat], ax, n_std=n_std, edgecolor=vowel2color[vow] ,linewidth=2,linestyle='--')
    
#sns.scatterplot(ax=f.axes[0],data=formants,x='F1',y='F2',hue='vowel',style='gender',s=2000)
#ax.legend(loc='upper left', bbox_to_anchor=(0.8,1));
ax.set_title("Scatter Plots of Formants with Ellipses at %.1f standard deviations" % n_std);

pd.options.display.float_format = '     {:.0f}  '.format
fdata = select_hillenbrand(hildata,genders='adults',vowels=vow10)[['gender','vowel','F1','F2','F3']]
# average formants per vowel and per gender
formants = fdata.groupby(by=["vowel","gender"]).mean()
formant_table = formants.unstack()
# average formants for all speakers (gender independent)
formants_all = fdata[['vowel','F1','F2','F3']].groupby(by=["vowel"]).mean()
#
print("Formant Table (gender dependent)\n")
display(formant_table.transpose()[table_order[0:10]])
print("\nFormant Table (gender independent)\n")
display(formants_all.transpose()[table_order[0:10]]);

Formant Table (gender dependent)

Formant Table (gender independent)

vowels = vow10
fig,ax=plt.subplots()
sns.scatterplot(ax=ax,x='F1',y='F2',data=data10,hue="vowel",s=1,hue_order=vowels,legend=False)
sns.scatterplot(ax=ax,data=formants_all,x='F1',y='F2',hue='vowel',hue_order=vowels,s=100)
for vow in vowels:
    F1 = formants_all['F1'][vow]
    F2 = formants_all['F2'][vow]
    ax.text(F1+10,F2+30,vow,ha='left',va='bottom',fontsize=12,color=vowel2color[vow] )    

    vowdata = select_hillenbrand(hildata,genders=genders,vowels=[vow])
    Spch.plot_confidence_ellipse(vowdata['F1'], vowdata['F2'], ax, n_std=n_std, edgecolor=vowel2color[vow] )

f1_vals = [ formants_all['F1'][v] for v in ['iy','aa','uw'] ]
f2_vals = [ formants_all['F2'][v] for v in ['iy','aa','uw'] ]
ax.plot([f1_vals[k] for k in [0,1,2,0]],[f2_vals[k] for k in [0,1,2,0]],color='k',linestyle='dashed')
ax.set_title('Formant Triangle')
fig

vowels = vow10
#data = formants
fig,ax =plt.subplots()
sns.scatterplot(ax=ax,x='F1',y='F2',data=data10,hue="vowel",s=5,hue_order=vowels,legend=False)
sns.scatterplot(ax=ax,data=formants,x='F1',y='F2',hue='vowel',hue_order=vowels,style='gender',s=200)
#ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
for vow in vowels:
    vowdata = select_hillenbrand(hildata,genders=['m','w'],vowels=[vow])
    Spch.plot_confidence_ellipse(vowdata['F1'], vowdata['F2'], fig.axes[0], n_std=2, edgecolor=vowel2color[vow])
ax.set_title("Gender Dependent Formant Values with Global Confidence Ellipses");

# only for male or female data
vowels = vow10
genders = ['m','w']
gmarkers = ['o','X']
fig,ax=plt.subplots()
for i in [0,1]:
    sns.scatterplot(ax=ax,x='F1',y='F2',data=data10.loc[data10['gender']==genders[i]],marker = gmarkers[i],hue="vowel",s=25,hue_order=vowels,legend=False)
sns.scatterplot(ax=ax,data=formants,x='F1',y='F2',hue='vowel',hue_order=vowels,style='gender',s=200)
#ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
for gender in genders:
    for vow in vowels:
        vowdata = select_hillenbrand(hildata,vowels=[vow],genders=[gender])
        Spch.plot_confidence_ellipse(vowdata['F1'], vowdata['F2'], ax, n_std=2, edgecolor=vowel2color[vow])
ax.set_title("Gender Dependent Formant Values and Gender Dependent Confidence Ellipses");

genders = ['m','w']
nvow = 6
features = ['f0','F1','F2','F3']
# select data and set classes / side_kick
data =select_hillenbrand(hildata,genders=genders,vowels=vow6)
target = 'vowel'
classes = vow6
side_classes = genders


##########################
# 1. make a grid plot using all features and the target as hue 
fig=plt.figure(figsize=(10,10))
g = sns.PairGrid(data.loc[:,[target]+features],hue=target,hue_order=classes)
g.map_diag(plt.hist, linewidth=1)
g.map_offdiag(plt.scatter,s=3)
g.add_legend()
fig.suptitle("Multi-feature Scatter Plots (features: " + " ".join(features) +")" );
#

<Figure size 1000x1000 with 0 Axes>

# showing with kernel density plots
data = select_hillenbrand(hildata,vowels=vow10)
sns.displot(data, x="F1", y="F2", hue="vowel", kind="kde",hue_order=vow10,levels=5,bw_adjust=1)
#plt.yscale('log')
#plt.ylim([700.,3200.])
#plt.xlim([250.,1200.])

<seaborn.axisgrid.FacetGrid at 0x19d780e09a0>

	gender	vowel	f0	F1	F2	F3
fid
m01ae	m	ae	174.0	663.0	2012.0	2659.0
m02ae	m	ae	102.0	628.0	1871.0	2477.0
m03ae	m	ae	99.0	605.0	1812.0	2570.0
m04ae	m	ae	124.0	627.0	1910.0	2488.0
m06ae	m	ae	115.0	647.0	1864.0	2561.0
...	...	...	...	...	...	...
g17uw	g	uw	236.0	490.0	2179.0	3131.0
g18uw	g	uw	214.0	435.0	1829.0	3316.0
g19uw	g	uw	243.0	497.0	1334.0	3067.0
g20uw	g	uw	248.0	498.0	1740.0	3291.0
g21uw	g	uw	225.0	446.0	1533.0	3269.0

	vowel	iy	ih	eh	ae	aa	ao	uh	uw	ah	er
	gender
F1	m	340	429	588	591	756	656	469	380	621	476
F1	w	435	484	727	678	916	801	519	460	760	527
F2	m	2312	2034	1803	1930	1309	1023	1123	992	1181	1370
F2	w	2756	2369	2063	2332	1526	1188	1229	1106	1416	1589
F3	m	3001	2687	2604	2595	2535	2521	2435	2355	2548	1711
F3	w	3373	3057	2953	2973	2823	2819	2829	2735	2901	1930

vowel	iy	ih	eh	ae	aa	ao	uh	uw	ah	er
F1	389	458	660	636	838	730	495	421	693	501
F2	2539	2207	1937	2136	1420	1108	1177	1051	1302	1480
F3	3191	2878	2784	2788	2682	2673	2638	2551	2730	1820

Formant Distribution¶

Distributions of steady state Formants¶

1. The Hillenbrand Database¶

Hillenbrand vs. Peterson-Barney¶

2. F1-F2 Scatter Plots¶

F1-F2 Scatter plot with labels printed at each data point¶

3. Overlaying Scatter plots with Confidence Ellipses¶

Confidence Ellipses using Diagonal Covariance Matrix¶

Confidence Ellipses using Full Covariance Matrix¶

4. Formant Tables¶

Observations and Questions¶

5. Formant Triangle¶

Gender Dependency of formants¶

6. Grid plots for higher dimensional data¶