Open in Google Colab

Spectrogram - Basics¶

This notebook contains code for a number of spectrogram examples.

Usage:

  • No code needs to be changed
  • You can change following variables:
    • 'file' in this first cell, to choose a different file (some other suggestions are commented out)
    • parameters (i1, frames) in the last cells as indicated in the description
    • flag SAVE_FIG when set to True, the example plots are saved to file
In [4]:
# uncomment the pip install command to install pyspch -- it is required!
#
#!pip install git+https://github.com/compi1234/pyspch.git
#
try:
    import pyspch
except ModuleNotFoundError:
    try:
        print(
        """
        To enable this notebook on platforms as Google Colab, 
        install the pyspch package and dependencies by running following code:

        !pip install git+https://github.com/compi1234/pyspch.git
        """
        )
    except ModuleNotFoundError:
        raise

# Do the imports #
##################
#
%matplotlib inline
import os,sys 
import numpy as np
import pandas as pd
from IPython.display import display, Audio, HTML, clear_output
import matplotlib.pyplot as plt
#   
import pyspch.sp as Sps
import pyspch.core as Spch
import pyspch.display as Spd
import librosa

# make notebook cells stretch over the full screen
display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))
SAVE_FIG = False

1. Waveform + Spectrogram with segmentations¶

First we load a sample waveform with its available segmentations.

Then we create a standard spectrogram view of the loaded speech file. In the top pane we see the signal waveform and below it a spectrogram. The waveform shows the signal amplitude at every sample. The spectrogram is a heatmap representing energy in the time-frequency domain. In practice we compute a short-time Fourier spectrum every 10msec and stack these together as the columns in the spectrogram.

In [5]:
name = "friendly"   # narrowband, male
#name = "expansionist"  # wideband, female
file = "demo/"+name
wavdata, sr = Spch.load_data(file+".wav")
spg = Sps.spectrogram(wavdata,sample_rate=sr,n_mels=None)
segwrd = Spch.load_data(file+ ".wrd")
segphn = Spch.load_data(file+ ".phn")
seggra = Spch.load_data(file+ ".gra")
if seggra is None: seggra = segphn
In [6]:
fig=Spd.PlotSpg(spgdata=spg,wavdata=wavdata,sample_rate=sr,ylabel='Frequency',figsize=(16,9))
fig.add_seg_plot(segwrd,iax=0,xrange=fig.axes[0].get_xlim(),ypos=0.9,
                 txtargs={'color':'brown','fontsize':14},lineargs={'color':'brown','linewidth':2}) 
fig.add_seg_plot(seggra,iax=1,ypos=0.1,txtargs={'color':'black','fontsize':20},
                 lineargs={'color':'black','linestyles':'dashed'})
fig.suptitle("Waveform+Spectrogram",fontsize=16);
fig.axes[0].grid(False,axis='y')
display(fig)
display(Audio(data=wavdata,rate=sr))
if SAVE_FIG : fig.savefig("figures/spg_"+file+".png")
No description has been provided for this image
Your browser does not support the audio element.

2. Sliding Window¶

We analyze speech by cutting it in successive frames, with a typical frame shift of 10 msec. For a number of signal processing reasons we use overlapping frames, i.e. frame length that is larger than the frame shift (eg 25msec). This will allow us to use a window that tapers toward the edges such as the Hamming window (used in the example below) to improve the quality of our spectral computation.

The figure below gives an illustration of the sliding window approach.

In the first illustration , you see how successive frames are cut out of a continuous signal. You can adjust 'i1' to give a different starting frame number and 'n' to give the number of successive frames in the plot.

In the second illustration you see the sliding window approach is used for spectrogram generation. You can adjust the frames variable to show a different part of the spectrogram.

In [11]:
i1=45
if name == 'friendly': i1=15
n=5
shift=0.01
length=0.025
scale=.75
n_shift = int(shift*sr)
n_length = int(length*sr)
# rearrrange the long 1-D data as a sequence of windowed frames
wavshow = wavdata[(i1-1)*n_shift:]
nx = (n_length-n_shift)//2
wav_as_frames = Sps.make_frames(wavdata,pad=nx,n_shift=n_shift,n_length=n_length,preemp=0.0,window='hamming')
window = librosa.filters.get_window('hamming',n_length)

colors = ['r','g','b','c','y','m']
#
fig,ax = plt.subplots(figsize=(12,6))
#ax.plot(wavshow[:(n+2)*n_shift],linewidth=1,color='k')
ax.plot(wavshow,linewidth=1,color='k')
ax.set_xlim([0,n_shift*(n+2)])
ax.axis('off')
ix1 = np.arange(1,n+1)*n_shift
ix2 = ix1+n_shift
for i in range(n):
    xx1 = np.arange(ix1[i],ix2[i])
    xx2 = np.arange(ix1[i]-nx,ix2[i]+nx)
    ax.plot(xx2,scale*window,linestyle='--',linewidth=2,color=colors[i%6])
    ax.plot(xx1,wav_as_frames[nx:nx+n_shift,i1+i],color=colors[i%6])
    ax.plot(xx2,scale*(wav_as_frames[:,i1+i]-(i+1.5)),color=colors[i%6])
if SAVE_FIG: fig.savefig('figures/SlidingWindow'+name+'.png') 
No description has been provided for this image
In [12]:
shift=0.01
length=0.025
n_shift = int(shift*sr)
n_length = int(length*sr)
def highlight_frame(fig,iframe=0):    # the animation part
    window = .5*librosa.filters.get_window('hamming',n_length)
    nx = (n_length-n_shift)//2
    pos = iframe*shift
    x0 = pos - (length-shift)/2.
    ix0 = iframe*n_shift - nx
    wav_frame = wavdata[ix0:ix0+n_length]
    xx0 = np.linspace(x0,x0+length,num=n_length,endpoint=False)
    l1, = fig.axes[0].plot(xx0,window,linestyle=':',color='r')  
    l2, = fig.axes[0].plot(xx0,wav_frame,linestyle='-',color='r',lw=3.)  
    highlights = [l1,l2]
    for iax in np.arange(1,len(fig.axes)):
        ax = fig.axes[iax]
        patch = fig.axes[1].axvspan(pos,pos+shift, color='w',alpha=.5,ec='r',lw=5.)   # color='#AAA',alpha=.5,ec='#A00',lw=5.)
        highlights.append(patch)
    return(highlights)
    filename = "animations/slwin_"+name+"_"

frames=[0,31]
spg = Sps.spectrogram(wavdata,sample_rate=sr,n_mels=None)
fig=Spd.PlotSpg(spgdata=spg,wavdata=wavdata,sample_rate=sr,ylabel='Frequency',frames=frames,figsize=(16,6))
fig.add_seg_plot(seggra,iax=0,ypos=.82,color='#888',size=12,Lines=False)
fig.suptitle("Waveform+Spectrogram",fontsize=16);
fig.axes[0].grid(False,axis='y')
display(fig)
for iframe in range(frames[0]+1,frames[1]-1): # omit boundary frames for safe plotting 
    clear_output(wait=True)
    x=highlight_frame(fig,iframe=iframe)
    display(fig)
    if SAVE_FIG: fig.savefig(filename+f'{iframe:003}')
    for a in x: a.remove()
No description has been provided for this image