recipes/mling_pl/mling_model.ipynb
(Adapted from FAIR's notebook: https://colab.research.google.com/github/facebookresearch/flashlight/blob/master/flashlight/app/asr/tutorial/notebooks/InferenceAndAlignmentCTC.ipynb)
FlashlightFirst we need to install Flashlight and its dependencies. Flashlight is installed from source, it takes ~16 minutes.
For installation out of colab notebook please use link.
# First, choose backend to build with
backend = 'CPU' #@param ["CPU", "CUDA"]
# Clone Flashlight
!git clone https://github.com/flashlight/flashlight.git
# install all dependencies for colab notebook
!source flashlight/scripts/colab/colab_install_deps.sh
Build from current master. Builds the ASR app. Resulting binaries in /content/flashlight/build/bin/asr.
If using a GPU Colab runtime, build the CUDA backend; else build the CPU backend.
# export necessary env variables
%env MKLROOT=/opt/intel/mkl
%env ArrayFire_DIR=/opt/arrayfire/share/ArrayFire/cmake
%env DNNL_DIR=/opt/dnnl/dnnl_lnx_2.0.0_cpu_iomp/lib/cmake/dnnl
if backend == "CUDA":
# Total time: ~13 minutes
# !cd flashlight && git checkout d2e1924cb2a2b32b48cc326bb7e332ca3ea54f67 && mkdir -p build && cd build && \
!cd flashlight && git checkout 8f7af9ec1188bfd7050c47abfac528d21650890f && mkdir -p build && cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release \
-DFL_BUILD_TESTS=OFF \
-DFL_BUILD_EXAMPLES=OFF \
-DFL_BUILD_APP_ASR=ON && \
make -j$(nproc)
elif backend == "CPU":
# Total time: ~14 minutes
# !cd flashlight && git checkout d2e1924cb2a2b32b48cc326bb7e332ca3ea54f67 && mkdir -p build && cd build && \
!cd flashlight && git checkout 8f7af9ec1188bfd7050c47abfac528d21650890f && mkdir -p build && cd build && \
cmake .. -DFL_BACKEND=CPU \
-DCMAKE_BUILD_TYPE=Release \
-DFL_BUILD_TESTS=OFF \
-DFL_BUILD_EXAMPLES=OFF \
-DFL_BUILD_APP_ASR=ON && \
make -j$(nproc)
else:
raise ValueError(f"Unknown backend {backend}")
Let's take a look around.
# Binaries are located in
!ls flashlight/build/bin/asr
Download acoustic model, tokens, and a few audio files for testing.
!wget https://dl.fbaipublicfiles.com/wav2letter/mling_pl/tokens-all.lst
!wget https://dl.fbaipublicfiles.com/wav2letter/mling_pl/checkpoint_cv_finetune.bin # acoustic model (large)
!mkdir audio
for i in range(5):
path = "https://dl.fbaipublicfiles.com/wav2letter/rasr/tutorial/audio/116-288045-000{}.flac".format(i)
!cd audio && wget $path
!apt-get install sox
!pip install ffmpeg-python sox
from flashlight.scripts.colab.record import record_audio
record_audio("recorded_audio") # result --> "recorded_audio.wav"
# !ls audio/*.flac > audio.lst
import glob
from subprocess import check_output
with open("audio.lst", "w") as f:
for i,audio in enumerate(glob.glob("audio/*.flac") + ["recorded_audio.wav"]):
duration = float(check_output("soxi -D " + audio, shell=True))
f.write("%d %s %s\n" % (i, audio, duration))
!cat audio.lst
COMPILE MULTILINGUAL MODEL .so
%cd /content/flashlight/build
# !wget https://raw.githubusercontent.com/flashlight/wav2letter/49087d575ddf77aa5a99a01fee980fc00e92c802/recipes/mling_pl/model_with_externally_controlled_reshaping_big_lid.cpp
# !mv model_with_externally_controlled_reshaping_big_lid.cpp mling.cpp
!cmake .. -DFL_PLUGIN_MODULE_SRC_PATH=mling.cpp
# !cmake .. -DFL_PLUGIN_MODULE_SRC_PATH=mling_large.cpp
!make
MULTILINGUAL MODEL INFERENCE
# we need a dummy lexicon:
!echo 'a a |' > lexicon.txt
%cd /content
# checkpoint_base_cvft.bin
# checkpoint_large.bin
!./flashlight/build/bin/asr/fl_asr_test \
--test=audio.lst \
--am=checkpoint_base_cvft.bin \
--arch=flashlight/build/mling.so \
--tokens=tokens-all.lst \
--lexicon=lexicon.txt \
--datadir='' \
--emission_dir='' \
--show \
--logtostderr=1 \
--minloglevel=0