Sanity check after NASH hackathon.
Yea it learns distortion nearly perfectly. This is essentially the same as the time align notebook, only with distortion
%pip install -Uqq pip
# Next line only executes on Colab. Colab users: Please enable GPU in Edit > Notebook settings
! [ -e /content ] && pip install -Uqq fastai git+https://github.com/drscotthawley/fastproaudio.git
# Additional installs for this tutorial
%pip install -q fastai_minima torchsummary pyzenodo3 wandb
# Install micro-tcn and auraloss packages (from source, will take a little while)
%pip install -q wheel --ignore-requires-python git+https://github.com/csteinmetz1/micro-tcn.git git+https://github.com/csteinmetz1/auraloss
# After this cell finishes, restart the kernel and continue below
from fastai.vision.all import *
from fastai.text.all import *
from fastai.callback.fp16 import *
import wandb
from fastai.callback.wandb import *
import torch
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T
from IPython.display import Audio
import matplotlib.pyplot as plt
import torchsummary
from fastproaudio.core import *
from pathlib import Path
import glob
import json
import re
import warnings
# mel-spectrogram plot keeps throwing matplotlib deprecation warnings
warnings.filterwarnings( "ignore", module = "librosa\..*" )
data_dir = '/home/shawley/datasets/pb_dist' # Jacob made this, SHH fixed it up a bit
path = Path(data_dir)
fnames_in = sorted(glob.glob(str(path)+'/*/input*'))
fnames_targ = sorted(glob.glob(str(path)+'/*/*targ*'))
ind = np.random.randint(len(fnames_in)) # pick one spot in the list of files
fnames_in[ind], fnames_targ[ind]
Input audio
input, sample_rate = torchaudio.load(fnames_in[ind])
print("sample_rate = ",sample_rate)
show_audio(input, sample_rate)