notebooks/dataset_analysis/CheckPitch.ipynb
%load_ext autoreload
%autoreload 2
import numpy as np
import glob
from TTS.utils.audio import AudioProcessor
from TTS.config.shared_configs import BaseAudioConfig
from TTS.tts.utils.visual import plot_pitch
pitch_path = "/home/ubuntu/TTS/recipes/ljspeech/fast_pitch/f0_cache"
wav_path = "/home/ubuntu/TTS/recipes/ljspeech/LJSpeech-1.1/wavs"
wav_files = glob.glob("/home/ubuntu/TTS/recipes/ljspeech/LJSpeech-1.1/wavs/*.wav")
print(len(wav_files))
ap = AudioProcessor(**BaseAudioConfig( sample_rate=22050,
do_trim_silence=True,
trim_db=60.0,
signal_norm=False,
mel_fmin=0.0,
mel_fmax=8000,
spec_gain=1.0,
log_func="np.log",
ref_level_db=20,
preemphasis=0.0,))
pitch_files = [wf.replace(".wav", "_pitch.npy").replace(wav_path, pitch_path) for wf in wav_files]
idx = 100
# wav_file = wav_files[idx]
# pitch_file = pitch_files[idx]
wav_file = "/home/ubuntu/TTS/recipes/ljspeech/fast_pitch/../LJSpeech-1.1/wavs/LJ011-0097.wav"
pitch_file = "/home/ubuntu/TTS/recipes/ljspeech/fast_pitch/f0_cache/LJ011-0097_pitch.npy"
pitch = np.load(pitch_file)
wav = ap.load_wav(wav_file)
spec = ap.melspectrogram(wav)
plot_pitch(pitch, spec.T)