import polars as pl
import matplotlib.pyplot as plt
from scipy.fft import dct, idct
import numpy as np
from fasttrackpy import process_corpus
from pathlib import Path
Working with the smoothing parameters
FastTrackPy smooths formant trajectories using a form of Discrete Cosine Transform. Here’s how you can convert the DCT parameters back into formant track smooths after, e.g. averaging them.
= Path("..","assets", "corpus")
corpus_path = process_corpus(corpus_path) measurements
100%|██████████| 65/65 [00:01<00:00, 62.80it/s]
100%|██████████| 274/274 [00:01<00:00, 224.56it/s]
Let’s just look at /ay/ from these speakers.
= [track for track in measurements if track.label == "AY1"]
ays 0].winner.spectrogram() ays[
The Discrete Cosine Transform
The smoothed formants are a 5th order discrete cosine transform. The DCT parameters are estimated via regression, using the following basis.
= ays[0].winner.time_domain
time = time.size
N = 5
order
= dct(
full_basis
np.eye(N), =True,
orthogonalize= "backward"
norm
)
= full_basis[:,:order]
basis
plt.plot(time, basis)range(5))
plt.legend( plt.show()
DCT coefficients define how to weight and sum this bank of cosine functions to match the input signal.
# the dct coefs for f1
= ays[0].winner.parameters[0,:]
f1_params
# the formant track
= ays[0].winner.formants[0,:]
f1
* f1_params)
plt.plot(time, basis "weighted coefficients")
plt.title( plt.show()
@ f1_params)
plt.plot(time, basis
plt.plot(time, f1)"weighted and summed")
plt.title("DCT", "F1"])
plt.legend([ plt.show()
It’s possible to estimate DCT coefficients directly with scipy.fft.dct
, but because there is potential for missing data in some formant tracks, FastTrackPy instead estimates the DCT coefficients using regression.
def reg_fit(x, basis):
= np.linalg.inv(basis.T @ basis) @ (basis.T @ x)
params return params
= reg_fit(f1, basis)
params print(params)
[480.35423508 42.95237019 -25.847216 43.60350087 -21.40266949]
Inverting the DCT
It’s possible to invert the DCT parameters with scipy.fft.idct()
, with orthognalize = True
and norm = "forward"
. You also need to define how large you want the resulting smooth to be with n
= idct(
f1_smooth
params, = time.size,
n =True,
orthogonalize= "forward"
norm
)
plt.plot(time, f1_smooth)
plt.plot(time, f1)"smooth", "F1"])
plt.legend([ plt.show()
Benefits of using the DCT parameters
“Down sampling”
It’s somewhat commmon to reduce the number of measurement points from a formant track to a common number (e.g. 20 evenly spaced points). We can achieve that by inverting the DCT parameters and using differently sized outputs.
= idct(params, n = 100, orthogonalize=True, norm = "forward")
smooth_100 = idct(params, n = 20, orthogonalize=True, norm = "forward")
smooth_20
plt.scatter(100)/99,
np.arange(
smooth_100
)
plt.scatter(20)/19,
np.arange(
smooth_20
)"100", "20"])
plt.legend([ plt.show()
Averaging
Instead of point-wise averaging over formant tracks, you can instead average over the DCT parameters, then invert the average. We can get a polars data frame of all of the parameters of /ays/ from these speakers with the .to_df()
method,
= pl.concat([
all_ay_param = "param")
ay.to_df(output for ay in ays
]) all_ay_param.head()
param | F1 | F2 | F3 | F4 | error | file_name | id | group | label |
---|---|---|---|---|---|---|---|---|---|
u32 | f64 | f64 | f64 | f64 | f64 | str | str | str | str |
0 | 480.354235 | 979.982402 | 1844.645657 | 2375.309331 | 0.007663 | "KY25A_1" | "0-0-8-1" | "KY25A" | "AY1" |
1 | 42.95237 | -130.966974 | -15.909405 | 2.074594 | 0.007663 | "KY25A_1" | "0-0-8-1" | "KY25A" | "AY1" |
2 | -25.847216 | 13.467448 | -67.211033 | -35.364144 | 0.007663 | "KY25A_1" | "0-0-8-1" | "KY25A" | "AY1" |
3 | 43.603501 | 4.180242 | -84.055828 | 18.796364 | 0.007663 | "KY25A_1" | "0-0-8-1" | "KY25A" | "AY1" |
4 | -21.402669 | -2.113492 | -42.2589 | -20.556695 | 0.007663 | "KY25A_1" | "0-0-8-1" | "KY25A" | "AY1" |
To get the the average of each parameter for each speaker, we need to group the dataframe by the param
, file_name
and group
columns, then aggregate them.
= (all_ay_param
ay_param_means
.group_by("param", "file_name", "group", ],
[= True)
maintain_order
.agg("F1").mean()
pl.col(
)
) ay_param_means.head()
param | file_name | group | F1 |
---|---|---|---|
u32 | str | str | f64 |
0 | "KY25A_1" | "KY25A" | 509.370491 |
1 | "KY25A_1" | "KY25A" | 44.866749 |
2 | "KY25A_1" | "KY25A" | -8.803074 |
3 | "KY25A_1" | "KY25A" | 27.038315 |
4 | "KY25A_1" | "KY25A" | -11.637004 |
Here’s the fitted F1 for the first speaker.
= ay_param_means["F1"][0:5]
speaker_params = idct(
speaker_fit
speaker_params,= 100,
n =True,
orthogonalize="forward"
norm
)
plt.plot(speaker_fit) plt.show()
The code below for getting fitted F1 tracks for each speaker is a bit detailed with respect to how polars dataframes work, but the process could be replicated in whatever way a user feels comfortable.
= (
ay_fits
ay_param_means"file_name", "group"])
.group_by(["F1"))
.agg(pl.col(
.with_columns(=pl.col("F1")
f1_fit
.map_elements(lambda x: idct(x, n = 100, orthogonalize=True, norm = "forward").tolist(),
=pl.List(pl.Float64)
return_dtype
),= pl.lit(np.linspace(0,1,100).tolist())
time
)"f1_fit", "time"])
.explode([ )
ay_fits.plot("time",
"f1_fit",
= "group"
by )
Getting derivatives
If we wanted to get the first derivative (or the rate of change) of the formant track smooths, this can also be calculated (code modified from here.)
from scipy.fft import idst
def first_deriv(coefs, size = 100):
= coefs.copy()
hatu for i in range(hatu.size):
=-(i)*hatu[i]
hatu[i]-1]=hatu[1:]
hatu[:-1]=0
hatu[=idst(hatu, n = size, type=2)
dotureturn dotu
= (
speaker_fit
ay_fitsfilter(pl.col("group").str.contains("group"))
.
)
= (
speaker_params
ay_param_meansfilter(pl.col("group").str.contains("group"))
."F1")
.select(
)
= first_deriv(speaker_params["F1"].to_numpy(), size = 100) speaker_rate
= plt.subplots(nrows= 1, ncols=2)
fig, axes 0].plot(speaker_fit["f1_fit"])
axes[0].set_title("f1")
axes[1].plot(speaker_rate)
axes[1].set_title("first derivative")
axes[ plt.show()