examples/notebooks/romeo_juliet_extraction.ipynb
This notebook demonstrates extracting characters, emotions, and relationships from Shakespeare's Romeo and Juliet using LangExtract.
# Install LangExtract
%pip install -q langextract
# Set up your Gemini API key
# Get your key from: https://aistudio.google.com/app/apikey
import os
from getpass import getpass
if 'GEMINI_API_KEY' not in os.environ:
os.environ['GEMINI_API_KEY'] = getpass('Enter your Gemini API key: ')
import langextract as lx
import textwrap
# Define the extraction task
prompt = textwrap.dedent("""\
Extract characters, emotions, and relationships in order of appearance.
Use exact text for extractions. Do not paraphrase or overlap entities.
Provide meaningful attributes for each entity to add context.""")
# Provide a high-quality example
examples = [
lx.data.ExampleData(
text="ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.",
extractions=[
lx.data.Extraction(
extraction_class="character",
extraction_text="ROMEO",
attributes={"emotional_state": "wonder"}
),
lx.data.Extraction(
extraction_class="emotion",
extraction_text="But soft!",
attributes={"feeling": "gentle awe"}
),
lx.data.Extraction(
extraction_class="relationship",
extraction_text="Juliet is the sun",
attributes={"type": "metaphor"}
),
]
)
]
# Simple extraction from a short text
input_text = "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"
result = lx.extract(
text_or_documents=input_text,
prompt_description=prompt,
examples=examples,
model_id="gemini-2.5-flash",
)
# Display results
print(f"Extracted {len(result.extractions)} entities:\n")
for extraction in result.extractions:
print(f"• {extraction.extraction_class}: '{extraction.extraction_text}'")
if extraction.attributes:
for key, value in extraction.attributes.items():
print(f" - {key}: {value}")
# Save results to JSONL
lx.io.save_annotated_documents([result], output_name="romeo_juliet.jsonl", output_dir=".")
# Generate interactive visualization
html_content = lx.visualize("romeo_juliet.jsonl")
# Display in notebook
print("Interactive visualization (hover over highlights to see attributes):")
html_content
# Save visualization to file (for downloading)
with open("romeo_juliet_visualization.html", "w") as f:
# Handle both Jupyter (HTML object) and non-Jupyter (string) environments
if hasattr(html_content, 'data'):
f.write(html_content.data)
else:
f.write(html_content)
print("✓ Visualization saved to romeo_juliet_visualization.html")
print("You can download this file from the Files panel on the left.")
Experiment with your own Shakespeare quotes or any literary text!
# Try your own text
your_text = """
JULIET: O Romeo, Romeo! wherefore art thou Romeo?
Deny thy father and refuse thy name;
Or, if thou wilt not, be but sworn my love,
And I'll no longer be a Capulet.
"""
custom_result = lx.extract(
text_or_documents=your_text,
prompt_description=prompt,
examples=examples,
model_id="gemini-2.5-flash",
)
print("Extractions from your text:\n")
for e in custom_result.extractions:
print(f"• {e.extraction_class}: '{e.extraction_text}'")
if e.attributes:
for key, value in e.attributes.items():
print(f" - {key}: {value}")