Back to Everyone Can Use English

Spelling Rules

1000-hours/public/jupyter-notebooks/spelling-rules.ipynb

0.7.93.9 KB
Original Source
python
import sys
import requests
import json
import vlc
import re
import random
from IPython.display import Audio

def load_json_database(url):
    records = []
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an error for bad status codes
        for line in response.iter_lines(decode_unicode=True):
            if line:
                try:
                    record = json.loads(line)
                    records.append(record)
                except json.JSONDecodeError as e:
                    print(f"Error parsing JSON: {e}")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from URL: {e}")
    return records

def search_in_json_database(database, search_word, region):
    for record in database:
        # 检查 word 字段是否匹配
        if record.get('word') == search_word:
            # 找到匹配项后,获取美式发音信息
            pos_items = record.get('pos_items', [])
            for pos_item in pos_items:
                pronunciations = pos_item.get('pronunciations', [])
                for pronunciation in pronunciations:
                    if pronunciation.get('region') == region:
                        # 找到美式发音,返回相关信息
                        return {
                            'pronunciation': pronunciation.get('pronunciation'),
                            'audio': pronunciation.get('audio')
                        }
    # 如果没有找到匹配的 word 字段,返回 'not exist'
    return 'not exist'

url = "https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json"

json_database = load_json_database(url)

python
text ="""
之前讲过,非重音音节里的元音可能会被弱化为 schwa `ə`…… 在自然语流中,连 schwa `ə` 都可能会被进一步弱化,变成非常轻的 `ɤ`。比如,常用词 reasonable,`/ˈriːzənəbəl/`,实际听到的常常是` /ˈriːzɤnəbəl/`……
"""

words = ""
phonetics_not_exist = ''
lines = text.split("\n")
for line in lines:
    line = line.replace(" `", " <span class=\"pho\">")
    line = line.replace("`", "</span>")
    if '*' in line:
        line = line.replace('"', "**")
        examples = line.split("-")[1].split(",")
        examples = [x.strip() for x in examples]
        line = line.replace(" - ", "\n")
        # print(examples)
        # wrap examples in span,
        for e in examples:
            # join e in words with ','
            words += e + ","
            entry_us = search_in_json_database(json_database, e, 'us')
            if entry_us == 'not exist':
                phonetics = entry_us
                phonetics_not_exist += f'{e},'
            else:
                phonetics = entry_us['pronunciation']    
            wrapped_e = f'\t- {e} <span class="pho alt">{phonetics}</span> <span class="speak-word-inline" data-audio-us-male="/audios/us/{e}-us-male.mp3" data-audio-us-female="/audios/us/{e}-us-female.mp3"></span>\n'
            line = line.replace(e, wrapped_e).replace(',', '').strip()
            

    print(line)
print(f'phonetics_not_exist: {phonetics_not_exist}')
print('\n'+words)
python
import asyncio
import edge_tts
import pygame
text = words.rstrip(",")
Wordlist = text.split(",")
# Wordlist = ['reasonable']

print(Wordlist)
for w in Wordlist:
  # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:
  for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:
    w = w.strip()
    # OUTPUT_FILE = f"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3"
    OUTPUT_FILE = f"{w}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3"
    communicate = edge_tts.Communicate(w, VOICE)
    await communicate.save(OUTPUT_FILE) 
  print(w)
print("Files created!")