Back to Everyone Can Use English

SpaCy

1000-hours/public/jupyter-notebooks/spaCy.ipynb

0.7.91.7 KB
Original Source
python
!pip install -U pip setuptools wheel
!pip install -U spacy
!python -m spacy download en_core_web_sm
python
import spacy
from spacy import displacy
from pathlib import Path

nlp = spacy.load('en_core_web_sm')

def highlight_root(sentence):
  doc = nlp(sentence)
  
  # Find the root of the sentence
  root = None
  for token in doc:
    if token.dep_ == 'ROOT':
      root = token
      break
  
  # Highlight the root entity in the output
  if root is not None:
    root_start = root.idx
    root_end = root.idx + len(root.text)
  
    text = [{
      'text': sentence,
      'ents': [{
        'start': root_start,
        'end': root_end,
        'label': '',
      }],
      'title': None
    }]
    displacy.render(text, style='ent', manual=True)
  else:
      print("No root found.")
  return root
python
sentence = "The spectacular aurora light displays that appear in Earth’s atmosphere around the north and south magnetic poles were once mysterious phenomena."

doc = nlp(sentence)
python
# 标注每个单词的词性

for token in doc:
  print(token.text, token.pos_)
python
# 高亮标注主句的谓语动词

root = highlight_root(sentence)
python
# 主句的简化版本

children = list(root.children)
children.insert(1, root)
simplified_setence = ' '.join(str(c) for c in children).strip().replace(" .", ".").capitalize()

print(simplified_setence)
python
# 图形化显示句子成分之间的依赖关系

displacy.render(doc, style="dep", options={'distance': 60})
python
# 将图形保存为 dep-graph.svg 文件

svg = displacy.render(doc, style="dep", jupyter=False)
output_path = Path("dep-graph.svg")
output_path.open("w", encoding="utf-8").write(svg)