Back to Llama Index

Multi-Modal OpenAI Image Generation + GPT-4V

llama-index-integrations/tools/llama-index-tools-openai/examples/multimodal_openai_image.ipynb

0.14.211.0 KB
Original Source

Multi-Modal OpenAI Image Generation + GPT-4V

python
import os

from PIL import Image
from IPython.display import display
python
from llama_index.tools.openai.image_generation import OpenAIImageGenerationToolSpec

image_generation_tool = OpenAIImageGenerationToolSpec(
    api_key=os.environ["OPENAI_API_KEY"]
)

image_path = image_generation_tool.image_generation(
    "A pink and blue llama in a black background"
)
python
## GPT-4V Image captionalization
python
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
from llama_index import SimpleDirectoryReader

image_documents = SimpleDirectoryReader("../../../img_cache").load_data()

openai_mm_llm = OpenAIMultiModal(
    model="gpt-4o",
    api_key=os.environ["OPENAI_API_KEY"],
    max_new_tokens=300,
)

response = openai_mm_llm.complete(
    prompt="Describe the images as an alternative text", image_documents=image_documents
)

print(response)
python
## Display Img
python
img = Image.open(image_path)

display(img)