1000-hours/public/jupyter-notebooks/phonetics.ipynb
import json
import vlc
import re
# 假设你的 JSON 数据库是一个 JSON 文件,我们将从文件中加载数据
# 如果 JSON 数据在内存中或其他格式,你可能需要修改这部分代码
def load_json_database(file_path):
records = []
with open(file_path, 'r') as file:
for line in file:
try:
record = json.loads(line)
records.append(record)
except json.JSONDecodeError as e:
print(f"Error parsing JSON: {e}")
return records
# The rest of the code remains the same...
# 在 JSON 数据库中检索 word
def search_in_json_database(database, search_word, region):
for record in database:
# 检查 word 字段是否匹配
if record.get('word') == search_word:
# 找到匹配项后,获取美式发音信息
pos_items = record.get('pos_items', [])
for pos_item in pos_items:
pronunciations = pos_item.get('pronunciations', [])
for pronunciation in pronunciations:
if pronunciation.get('region') == region:
# 找到美式发音,返回相关信息
return {
'pronunciation': pronunciation.get('pronunciation'),
'audio': pronunciation.get('audio')
}
# 如果没有找到匹配的 word 字段,返回 'not exist'
return 'not exist'
def search_pronunciation(database, pattern):
# Compile the regex pattern
regex = re.compile(pattern)
results = []
# Search in the database
for record in database:
for pos_item in record["pos_items"]:
for pronunciation in pos_item["pronunciations"]:
if regex.search(pronunciation["pronunciation"]):
if pronunciation.get('region') == 'us':
results.append((record["word"], pronunciation["pronunciation"].replace(".", ""))) # record["_id"]["$oid"],
# Return None if no match found
return results
# 用于测试的 JSON 数据库文件路径
json_db_file_path = '/Users/joker/github/camdict/cam_dict.refined.json'
json_database = load_json_database(json_db_file_path)
list = """
balls,cards
"""
for word in list.split(","):
word = word.strip().lower()
result = search_in_json_database(json_database, word, 'us')
if result != 'not exist':
pho = result['pronunciation']
else:
pho = 'not exist'
line = f'*{word}* <span class="pho alt">{pho}</span><span class="speak-word-inline" data-audio-us-male="/audios/{word}s-us-male.mp3" data-audio-us-female="/audios/{word}s-us-female.mp3"></span>'
print(line)