Back to Everyone Can Use English

remove duplicates in mp3_file_paths

1000-hours/public/jupyter-notebooks/copy-select-mp3-files.ipynb

0.7.91.2 KB
Original Source
python
import os

def readlines_from_file(filename):
    with open(filename, 'r') as f:
        return f.readlines()

filename = "../../sounds-of-american-english/4.4-linking.md"

lines = readlines_from_file(filename)
mp3_file_paths = []
for line in lines:
    if ".mp3" in line:
    # extract all substring between 'audios/' and '.mp3'
    # there may be more than one substring in a line
        while 'audios/' in line:
            start = line.index('audios/') + len('audios/')
            end = line.index('.mp3')
            mp3_file_paths.append(line[start:end])
            line = line[end+4:]
# remove duplicates in mp3_file_paths
mp3_file_paths = list(set(mp3_file_paths))

# copy f'../audios/us/{mp3_file_path}' to f'../audios/{mp3_file_path}' uisng os module
for mp3_file_path in mp3_file_paths:
    # check if the file exists
    if os.path.exists(f'../audios/{mp3_file_path}.mp3'):
        print(f'../audios/{mp3_file_path}.mp3 exists')
    else:
        os.system(f'cp ../audios/us/{mp3_file_path}.mp3 ../audios/{mp3_file_path}.mp3')

    # print(f'cp ../audios/us/{mp3_file_path}.mp3 ../audios/{mp3_file_path}.mp3')


print(len(mp3_file_paths))
print(mp3_file_paths)