sovits4_for_colab.ipynb
sovits4data in your google drive at the first time you use this notebook.sovits_data_dir variable.#@title Connect to colab runtime and check GPU
#@markdown # Connect to colab runtime and check GPU
#@markdown
!nvidia-smi
#@title Clone repository and install requirements
#@markdown # Clone repository and install requirements
#@markdown
#@markdown ### After the execution is completed, the runtime will **automatically restart**
#@markdown
!git clone https://github.com/svc-develop-team/so-vits-svc -b 4.1-Stable
%cd /content/so-vits-svc
%pip install --upgrade pip setuptools
%pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu118
exit()
#@title Mount google drive and select which directories to sync with google drive
#@markdown # Mount google drive and select which directories to sync with google drive
#@markdown
from google.colab import drive
drive.mount("/content/drive")
#@markdown Directory to store **necessary files**, dont miss the slash at the endπ.
sovits_data_dir = "/content/drive/MyDrive/sovits4data/" #@param {type:"string"}
#@markdown By default it will create a `sovits4data/` folder in your google drive.
RAW_DIR = sovits_data_dir + "raw/"
RESULTS_DIR = sovits_data_dir + "results/"
FILELISTS_DIR = sovits_data_dir + "filelists/"
CONFIGS_DIR = sovits_data_dir + "configs/"
LOGS_DIR = sovits_data_dir + "logs/44k/"
#@markdown
#@markdown ### These folders will be synced with your google drvie
#@markdownγ### **Strongly recommend to check all.**
#@markdown Sync **input audios** and **output audios**
sync_raw_and_results = True #@param {type:"boolean"}
if sync_raw_and_results:
!mkdir -p {RAW_DIR}
!mkdir -p {RESULTS_DIR}
!rm -rf /content/so-vits-svc/raw
!rm -rf /content/so-vits-svc/results
!ln -s {RAW_DIR} /content/so-vits-svc/raw
!ln -s {RESULTS_DIR} /content/so-vits-svc/results
#@markdown Sync **config** and **models**
sync_configs_and_logs = True #@param {type:"boolean"}
if sync_configs_and_logs:
!mkdir -p {FILELISTS_DIR}
!mkdir -p {CONFIGS_DIR}
!mkdir -p {LOGS_DIR}
!rm -rf /content/so-vits-svc/filelists
!rm -rf /content/so-vits-svc/configs
!rm -rf /content/so-vits-svc/logs/44k
!ln -s {FILELISTS_DIR} /content/so-vits-svc/filelists
!ln -s {CONFIGS_DIR} /content/so-vits-svc/configs
!ln -s {LOGS_DIR} /content/so-vits-svc/logs/44k
#@title Get pretrained model(Optional but strongly recommend).
#@markdown # Get pretrained model(Optional but strongly recommend).
#@markdown
#@markdown - Pre-trained model files: `G_0.pth` `D_0.pth`
#@markdown - Place them under /sovits4data/logs/44k/ in your google drive manualy
#@markdown Get them from svc-develop-team(TBD) or anywhere else.
#@markdown Although the pretrained model generally does not cause any copyright problems, please pay attention to it. For example, ask the author in advance, or the author has indicated the feasible use in the description clearly.
download_pretrained_model = True #@param {type:"boolean"}
D_0_URL = "https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_D_320000.pth" #@param ["https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_D_320000.pth", "https://huggingface.co/1asbgdh/sovits4.0-volemb-vec768/resolve/main/clean_D_320000.pth", "https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/vol_emb/clean_D_320000.pth"] {allow-input: true}
G_0_URL = "https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_G_320000.pth" #@param ["https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_G_320000.pth", "https://huggingface.co/1asbgdh/sovits4.0-volemb-vec768/resolve/main/clean_G_320000.pth", "https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/vol_emb/clean_G_320000.pth"] {allow-input: true}
download_pretrained_diffusion_model = True #@param {type:"boolean"}
diff_model_URL = "https://huggingface.co/datasets/ms903/Diff-SVC-refactor-pre-trained-model/resolve/main/fix_pitch_add_vctk_600k/model_0.pt" #@param {type:"string"}
%cd /content/so-vits-svc
if download_pretrained_model:
!curl -L {D_0_URL} -o logs/44k/D_0.pth
!md5sum logs/44k/D_0.pth
!curl -L {G_0_URL} -o logs/44k/G_0.pth
!md5sum logs/44k/G_0.pth
if download_pretrained_diffusion_model:
!mkdir -p logs/44k/diffusion
!curl -L {diff_model_URL} -o logs/44k/diffusion/model_0.pt
!md5sum logs/44k/diffusion/model_0.pt
Pack and upload your raw dataset(dataset_raw/) to your google drive.
Makesure the file structure in your zip file looks like this:
YourZIPforSingleSpeakers.zip
ββββspeaker
ββββxxx1-xxx1.wav
ββββ...
ββββLxx-0xx8.wav
YourZIPforMultipleSpeakers.zip
ββββspeaker0
β ββββxxx1-xxx1.wav
β ββββ...
β ββββLxx-0xx8.wav
ββββspeaker1
ββββxx2-0xxx2.wav
ββββ...
ββββxxx7-xxx007.wav
Even if there is only one speaker, a folder named {speaker_name} is needed.
#@title Get raw dataset from google drive
#@markdown # Get raw dataset from google drive
#@markdown
#@markdown Directory where **your zip file** located in, dont miss the slash at the endπ.
sovits_data_dir = "/content/drive/MyDrive/sovits4data/" #@param {type:"string"}
#@markdown Filename of **your zip file**, do NOT be "dataset.zip"
zip_filename = "YourZIPFilenameofRawDataset.zip" #@param {type:"string"}
ZIP_PATH = sovits_data_dir + zip_filename
!unzip -od /content/so-vits-svc/dataset_raw {ZIP_PATH}
#@title Resample to 44100Hz and mono
#@markdown # Resample to 44100Hz and mono
#@markdown
%cd /content/so-vits-svc
!python resample.py
#@title Divide filelists and generate config.json
#@markdown # Divide filelists and generate config.json
#@markdown
%cd /content/so-vits-svc
speech_encoder = "vec768l12" #@param ["vec768l12", "vec256l9", "hubertsoft", "whisper-ppg", "whisper-ppg-large"]
use_vol_aug = False #@param {type:"boolean"}
vol_aug = "--vol_aug" if use_vol_aug else ""
from pretrain.meta import download_dict
download_dict = download_dict()
url = download_dict[speech_encoder]["url"]
output = download_dict[speech_encoder]["output"]
import os
if not os.path.exists(output):
!curl -L {url} -o {output}
!md5sum {output}
!python preprocess_flist_config.py --speech_encoder={speech_encoder} {vol_aug}
#@title Generate hubert and f0
#@markdown # Generate hubert and f0
#@markdown
%cd /content/so-vits-svc
f0_predictor = "crepe" #@param ["crepe", "pm", "dio", "harvest", "rmvpe", "fcpe"]
use_diff = True #@param {type:"boolean"}
import os
if f0_predictor == "rmvpe" and not os.path.exists("./pretrain/rmvpe.pt"):
!curl -L https://huggingface.co/datasets/ylzz1997/rmvpe_pretrain_model/resolve/main/rmvpe.pt -o pretrain/rmvpe.pt
if f0_predictor == "fcpe" and not os.path.exists("./pretrain/fcpe.pt"):
!curl -L https://huggingface.co/datasets/ylzz1997/rmvpe_pretrain_model/resolve/main/fcpe.pt -o pretrain/fcpe.pt
diff_param = ""
if use_diff:
diff_param = "--use_diff"
if not os.path.exists("./pretrain/nsf_hifigan/model"):
!curl -L https://github.com/openvpi/vocoders/releases/download/nsf-hifigan-v1/nsf_hifigan_20221211.zip -o nsf_hifigan_20221211.zip
!md5sum nsf_hifigan_20221211.zip
!unzip nsf_hifigan_20221211.zip
!rm -rf pretrain/nsf_hifigan
!mv -v nsf_hifigan pretrain
!python preprocess_hubert_f0.py --f0_predictor={f0_predictor} {diff_param}
#@title Save the preprocessed dataset to google drive
#@markdown # Save the preprocessed dataset to google drive
#@markdown
#@markdown You can save the dataset and related files to your google drive for the next training
#@markdown **Directory for saving**, dont miss the slash at the endπ.
sovits_data_dir = "/content/drive/MyDrive/sovits4data/" #@param {type:"string"}
#@markdown There will be a `dataset.zip` contained `dataset/` in your google drive, which is preprocessed data.
!mkdir -p {sovits_data_dir}
!zip -r dataset.zip /content/so-vits-svc/dataset
!cp -vr dataset.zip "{sovits_data_dir}"
#@title Unzip preprocessed dataset from google drive directly if you have preprocessed already.
#@markdown # Unzip preprocessed dataset from google drive directly if you have preprocessed already.
#@markdown
#@markdown Directory where **your preprocessed dataset** located in, dont miss the slash at the endπ.
sovits_data_dir = "/content/drive/MyDrive/sovits4data/" #@param {type:"string"}
CONFIG = sovits_data_dir + "configs/"
FILELISTS = sovits_data_dir + "filelists/"
DATASET = sovits_data_dir + "dataset.zip"
!cp -vr {CONFIG} /content/so-vits-svc/
!cp -vr {FILELISTS} /content/so-vits-svc/
!unzip {DATASET} -d /
#@title Start training
#@markdown # Start training
#@markdown If you want to use pre-trained models, upload them to /sovits4data/logs/44k/ in your google drive manualy.
#@markdown
%cd /content/so-vits-svc
#@markdown Whether to enable tensorboard
tensorboard_on = True #@param {type:"boolean"}
if tensorboard_on:
%load_ext tensorboard
%tensorboard --logdir logs/44k
config_path = "configs/config.json"
from pretrain.meta import get_speech_encoder
url, output = get_speech_encoder(config_path)
import os
if not os.path.exists(output):
!curl -L {url} -o {output}
!python train.py -c {config_path} -m 44k
#@title Train cluster model (Optional)
#@markdown # Train cluster model (Optional)
#@markdown #### Details see [README.md#cluster-based-timbre-leakage-control](https://github.com/svc-develop-team/so-vits-svc#cluster-based-timbre-leakage-control)
#@markdown
%cd /content/so-vits-svc
!python cluster/train_cluster.py --gpu
#@title Train index model (Optional)
#@markdown # Train index model (Optional)
#@markdown #### Details see [README.md#feature-retrieval](https://github.com/svc-develop-team/so-vits-svc#feature-retrieval)
#@markdown
%cd /content/so-vits-svc
!python train_index.py -c configs/config.json
#@title Train diffusion model (Optional)
#@markdown # Train diffusion model (Optional)
#@markdown #### Details see [README.md#-about-shallow-diffusion](https://github.com/svc-develop-team/so-vits-svc#-about-shallow-diffusion)
#@markdown
%cd /content/so-vits-svc
import os
if not os.path.exists("./pretrain/nsf_hifigan/model"):
!curl -L https://github.com/openvpi/vocoders/releases/download/nsf-hifigan-v1/nsf_hifigan_20221211.zip -o nsf_hifigan_20221211.zip
!unzip nsf_hifigan_20221211.zip
!rm -rf pretrain/nsf_hifigan
!mv -v nsf_hifigan pretrain
#@markdown Whether to enable tensorboard
tensorboard_on = True #@param {type:"boolean"}
if tensorboard_on:
%load_ext tensorboard
%tensorboard --logdir logs/44k
!python train_diff.py -c configs/diffusion.yaml
Open the devtools and copy & paste to run the scrips.
const ping = () => {
const btn = document.querySelector("colab-connect-button");
const inner_btn = btn.shadowRoot.querySelector("#connect");
if (inner_btn) {
inner_btn.click();
console.log("Clicked on connect button");
} else {
console.log("connect button not found");
}
const nextTime = 50000 + Math.random() * 10000;
setTimeout(ping, nextTime);
};
ping();
sovits4data/raw/ in your google drive manualy (should be faster)#title Download nsf_hifigan if you need it
%cd /content/so-vits-svc
!curl -L https://github.com/openvpi/vocoders/releases/download/nsf-hifigan-v1/nsf_hifigan_20221211.zip -o /content/so-vits-svc/nsf_hifigan_20221211.zip
!unzip nsf_hifigan_20221211.zip
!rm -rf pretrain/nsf_hifigan
!mv -v nsf_hifigan pretrain
#@title Upload wav files, the filename should not contain any special symbols like `#` `$` `(` `)`
#@markdown # Upload wav files, the filename should not contain any special symbols like `#` `$` `(` `)`
#@markdown
%cd /content/so-vits-svc
%run wav_upload.py --type audio
#@title Start inference (and download)
#@markdown # Start inference (and download)
#@markdown Parameters see [README.MD#Inference](https://github.com/svc-develop-team/so-vits-svc#-inference)
#@markdown
wav_filename = "YourWAVFile.wav" #@param {type:"string"}
model_filename = "G_210000.pth" #@param {type:"string"}
model_path = "/content/so-vits-svc/logs/44k/" + model_filename
speaker = "YourSpeaker" #@param {type:"string"}
trans = "0" #@param {type:"string"}
cluster_infer_ratio = "0" #@param {type:"string"}
auto_predict_f0 = False #@param {type:"boolean"}
apf = ""
if auto_predict_f0:
apf = " -a "
f0_predictor = "crepe" #@param ["crepe", "pm", "dio", "harvest", "rmvpe", "fcpe"]
enhance = False #@param {type:"boolean"}
ehc = ""
if enhance:
ehc = " -eh "
#@markdown
#@markdown Generally keep default:
config_filename = "config.json" #@param {type:"string"}
config_path = "/content/so-vits-svc/configs/" + config_filename
from pretrain.meta import get_speech_encoder
url, output = get_speech_encoder(config_path)
import os
if f0_predictor == "rmvpe" and not os.path.exists("./pretrain/rmvpe.pt"):
!curl -L https://huggingface.co/datasets/ylzz1997/rmvpe_pretrain_model/resolve/main/rmvpe.pt -o pretrain/rmvpe.pt
if f0_predictor == "fcpe" and not os.path.exists("./pretrain/fcpe.pt"):
!curl -L https://huggingface.co/datasets/ylzz1997/rmvpe_pretrain_model/resolve/main/fcpe.pt -o pretrain/fcpe.pt
if not os.path.exists(output):
!curl -L {url} -o {output}
kmeans_filenname = "kmeans_10000.pt" #@param {type:"string"}
kmeans_path = "/content/so-vits-svc/logs/44k/" + kmeans_filenname
slice_db = "-40" #@param {type:"string"}
wav_format = "flac" #@param {type:"string"}
key = "auto" if auto_predict_f0 else f"{trans}key"
cluster_name = "" if cluster_infer_ratio == "0" else f"_{cluster_infer_ratio}"
isdiffusion = "sovits"
wav_output = f"/content/so-vits-svc/results/{wav_filename}_{key}_{speaker}{cluster_name}_{isdiffusion}_{f0_predictor}.{wav_format}"
%cd /content/so-vits-svc
!python inference_main.py -n {wav_filename} -m {model_path} -s {speaker} -t {trans} -cr {cluster_infer_ratio} -c {config_path} -cm {kmeans_path} -sd {slice_db} -wf {wav_format} {apf} --f0_predictor={f0_predictor} {ehc}
#@markdown
#@markdown If you dont want to download from here, uncheck this.
download_after_inference = True #@param {type:"boolean"}
if download_after_inference:
from google.colab import files
files.download(wav_output)