docs/examples/multi_modal/nebius_multi_modal.ipynb
<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/multi_modal/nebius_multi_modal.ipynb" target="_parent"></a>
This notebook demonstrates how to use multimodal models from Nebius AI Studio with LlamaIndex. Nebius AI Studio implements all state-of-the-art multimodal models available for commercial use.
First, let's install LlamaIndex and dependencies of Nebius AI Studio. Since AI Studio uses OpenAI-compatible OpenAI, installation of the OpenAI Multimodal package inside Llama-index is also required.
%pip install llama-index-multi-modal-llms-nebius llama-index matplotlib
Upload your Nebius AI Studio key from system variables below or simply insert it. You can get it by registering for free at Nebius AI Studio and issuing the key at API Keys section."
import os
NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY") # NEBIUS_API_KEY = ""
NebiusMultiModal and Load Images from URLsfrom llama_index.multi_modal_llms.nebius import NebiusMultiModal
from llama_index.core.multi_modal_llms.generic_utils import load_image_urls
image_urls = [
"https://townsquare.media/site/442/files/2018/06/wall-e-eve.jpg",
]
image_documents = load_image_urls(image_urls)
mm_llm = NebiusMultiModal(
model="Qwen/Qwen2-VL-72B-Instruct",
api_key=NEBIUS_API_KEY,
max_new_tokens=300,
)
from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt
img_response = requests.get(image_urls[0])
print(image_urls[0])
img = Image.open(BytesIO(img_response.content))
plt.imshow(img)
complete_response = mm_llm.complete(
prompt="Describe the images as an alternative text",
image_documents=image_documents,
)
print(complete_response)
stream_complete_response = mm_llm.stream_complete(
prompt="give me more context for this image",
image_documents=image_documents,
)
for r in stream_complete_response:
print(r.delta, end="")
from llama_index.multi_modal_llms.openai.utils import (
generate_openai_multi_modal_chat_message,
)
chat_msg_1 = generate_openai_multi_modal_chat_message(
prompt="Describe the image as an alternative text",
role="user",
image_documents=image_documents,
)
chat_msg_2 = generate_openai_multi_modal_chat_message(
prompt='The image features two animated characters from the movie "WALL-E."',
role="assistant",
)
chat_msg_3 = generate_openai_multi_modal_chat_message(
prompt="can I know more?",
role="user",
)
chat_messages = [chat_msg_1, chat_msg_2, chat_msg_3]
chat_response = mm_llm.chat(
# prompt="Describe the images as an alternative text",
messages=chat_messages,
)
for msg in chat_messages:
print(msg.role, msg.content)
print(chat_response)
stream_chat_response = mm_llm.stream_chat(
messages=chat_messages,
)
for r in stream_chat_response:
print(r.delta, end="")
response_acomplete = await mm_llm.acomplete(
prompt="Describe the images as an alternative text",
image_documents=image_documents,
)
print(response_acomplete)
response_astream_complete = await mm_llm.astream_complete(
prompt="Describe the images as an alternative text",
image_documents=image_documents,
)
async for delta in response_astream_complete:
print(delta.delta, end="")
achat_response = await mm_llm.achat(
messages=chat_messages,
)
print(achat_response)
astream_chat_response = await mm_llm.astream_chat(
messages=chat_messages,
)
async for delta in astream_chat_response:
print(delta.delta, end="")
from llama_index.core import SimpleDirectoryReader
from llama_index.multi_modal_llms.nebius import NebiusMultiModal
# put your local directory here
path_to_images = "/mnt/share/nebius/images"
image_documents = SimpleDirectoryReader(path_to_images).load_data()
mm_llm = NebiusMultiModal(
model="Qwen/Qwen2-VL-72B-Instruct",
api_key=NEBIUS_API_KEY,
max_new_tokens=300,
)
response = mm_llm.complete(
prompt="Describe the images as an alternative text",
image_documents=image_documents,
)
from PIL import Image
import matplotlib.pyplot as plt
for image_name in os.listdir(path_to_images):
img = Image.open(os.path.join(path_to_images, image_name))
plt.imshow(img)
plt.show()
print(response)