new-edition-drafts/jupyter-notebooks/spaCy.ipynb
!pip install -U pip setuptools wheel
!pip install -U spacy
!python -m spacy download en_core_web_sm
import spacy
from spacy import displacy
from pathlib import Path
nlp = spacy.load('en_core_web_sm')
def highlight_root(sentence):
doc = nlp(sentence)
# Find the root of the sentence
root = None
for token in doc:
if token.dep_ == 'ROOT':
root = token
break
# Highlight the root entity in the output
if root is not None:
root_start = root.idx
root_end = root.idx + len(root.text)
text = [{
'text': sentence,
'ents': [{
'start': root_start,
'end': root_end,
'label': '',
}],
'title': None
}]
displacy.render(text, style='ent', manual=True)
else:
print("No root found.")
return root
sentence = "The spectacular aurora light displays that appear in Earth’s atmosphere around the north and south magnetic poles were once mysterious phenomena."
doc = nlp(sentence)
# 标注每个单词的词性
for token in doc:
print(token.text, token.pos_)
# 高亮标注主句的谓语动词
root = highlight_root(sentence)
# 主句的简化版本
children = list(root.children)
children.insert(1, root)
simplified_setence = ' '.join(str(c) for c in children).strip().replace(" .", ".").capitalize()
print(simplified_setence)
# 图形化显示句子成分之间的依赖关系
displacy.render(doc, style="dep", options={'distance': 60})
# 将图形保存为 dep-graph.svg 文件
svg = displacy.render(doc, style="dep", jupyter=False)
output_path = Path("dep-graph.svg")
output_path.open("w", encoding="utf-8").write(svg)