1000-hours/public/jupyter-notebooks/copy-select-mp3-files.ipynb
import os
def readlines_from_file(filename):
with open(filename, 'r') as f:
return f.readlines()
filename = "../../sounds-of-american-english/4.4-linking.md"
lines = readlines_from_file(filename)
mp3_file_paths = []
for line in lines:
if ".mp3" in line:
# extract all substring between 'audios/' and '.mp3'
# there may be more than one substring in a line
while 'audios/' in line:
start = line.index('audios/') + len('audios/')
end = line.index('.mp3')
mp3_file_paths.append(line[start:end])
line = line[end+4:]
# remove duplicates in mp3_file_paths
mp3_file_paths = list(set(mp3_file_paths))
# copy f'../audios/us/{mp3_file_path}' to f'../audios/{mp3_file_path}' uisng os module
for mp3_file_path in mp3_file_paths:
# check if the file exists
if os.path.exists(f'../audios/{mp3_file_path}.mp3'):
print(f'../audios/{mp3_file_path}.mp3 exists')
else:
os.system(f'cp ../audios/us/{mp3_file_path}.mp3 ../audios/{mp3_file_path}.mp3')
# print(f'cp ../audios/us/{mp3_file_path}.mp3 ../audios/{mp3_file_path}.mp3')
print(len(mp3_file_paths))
print(mp3_file_paths)