docs/examples/llama_cloud/figure_retrieval.ipynb
This notebook shows an example of retrieving images embedded within a PDF document. More docs on using this feature can be found on the LlamaCloud docs page.
%pip install llama-index llama-index-llms-openai llama-cloud-services
# Get the LlamaCloud API Key
import os
api_key = os.environ["LLAMA_CLOUD_API_KEY"]
org_id = os.environ.get("LLAMA_CLOUD_ORGANIZATION_ID")
openai_api_key = os.environ["OPENAI_API_KEY"]
# print cwd to see where to load PDF file from
import os
print(os.getcwd())
from llama_cloud.types import LlamaParseParameters
from llama_cloud_services import LlamaCloudIndex
embedding_config = {
"type": "OPENAI_EMBEDDING",
"component": {
"api_key": openai_api_key,
"model_name": "text-embedding-ada-002", # You can choose any OpenAI Embedding model
},
}
index = LlamaCloudIndex.create_index(
name="my_index",
organization_id=org_id,
api_key=api_key,
embedding_config=embedding_config,
llama_parse_parameters=LlamaParseParameters(
take_screenshot=True,
extract_layout=True,
),
)
image_figure_slides_path = "../data/figures/image_figure_slides.pdf"
index.upload_file(
image_figure_slides_path, wait_for_ingestion=True, raise_on_error=True
)
from llama_index.core.schema import ImageNode
import base64
import tempfile
from IPython.display import Image, display
retriever = index.as_retriever(
retrieve_page_figure_nodes=True, dense_similarity_top_k=1
)
nodes = retriever.retrieve("Sample query")
image_nodes = [n.node for n in nodes if isinstance(n.node, ImageNode)]
for img_node in image_nodes:
print(img_node.metadata)
with tempfile.NamedTemporaryFile(suffix=".jpg") as temp_file:
temp_file.write(base64.b64decode(img_node.image))
print(f"Image saved to {temp_file.name}")
# Display the image in Jupyter Notebook
display(Image(filename=temp_file.name))