Back to Llama Index

Azure AI Search

docs/examples/vector_stores/AzureAISearchIndexDemo.ipynb

0.14.2110.2 KB
Original Source

<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/vector_stores/AzureAISearchIndexDemo.ipynb" target="_parent"></a>

Azure AI Search

Basic Example

In this notebook, we take a Paul Graham essay, split it into chunks, embed it using an Azure OpenAI embedding model, load it into an Azure AI Search index, and then query it.

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

python
!pip install llama-index
!pip install wget
%pip install llama-index-vector-stores-azureaisearch
%pip install azure-search-documents==11.5.1
%llama-index-embeddings-azure-openai
%llama-index-llms-azure-openai
python
import logging
import sys
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from IPython.display import Markdown, display
from llama_index.core import (
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
)
from llama_index.core.settings import Settings
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore
from llama_index.vector_stores.azureaisearch import (
    IndexManagement,
    MetadataIndexFieldType,
)

Setup Azure OpenAI

python
aoai_api_key = "YOUR_AZURE_OPENAI_API_KEY"
aoai_endpoint = "YOUR_AZURE_OPENAI_ENDPOINT"
aoai_api_version = "2024-10-21"

llm = AzureOpenAI(
    model="YOUR_AZURE_OPENAI_COMPLETION_MODEL_NAME",
    deployment_name="YOUR_AZURE_OPENAI_COMPLETION_DEPLOYMENT_NAME",
    api_key=aoai_api_key,
    azure_endpoint=aoai_endpoint,
    api_version=aoai_api_version,
)

# You need to deploy your own embedding model as well as your own chat completion model
embed_model = AzureOpenAIEmbedding(
    model="YOUR_AZURE_OPENAI_EMBEDDING_MODEL_NAME",
    deployment_name="YOUR_AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME",
    api_key=aoai_api_key,
    azure_endpoint=aoai_endpoint,
    api_version=aoai_api_version,
)
python
search_service_api_key = "YOUR-AZURE-SEARCH-SERVICE-ADMIN-KEY"
search_service_endpoint = "YOUR-AZURE-SEARCH-SERVICE-ENDPOINT"
search_service_api_version = "2024-07-01"
credential = AzureKeyCredential(search_service_api_key)


# Index name to use
index_name = "llamaindex-vector-demo"

# Use index client to demonstrate creating an index
index_client = SearchIndexClient(
    endpoint=search_service_endpoint,
    credential=credential,
)

# Use search client to demonstration using existing index
search_client = SearchClient(
    endpoint=search_service_endpoint,
    index_name=index_name,
    credential=credential,
)

Create Index (if it does not exist)

Demonstrates creating a vector index named "llamaindex-vector-demo" if one doesn't exist. The index has the following fields:

Field NameOData Type
idEdm.String
chunkEdm.String
embeddingCollection(Edm.Single)
metadataEdm.String
doc_idEdm.String
authorEdm.String
themeEdm.String
directorEdm.String
python
metadata_fields = {
    "author": "author",
    "theme": ("topic", MetadataIndexFieldType.STRING),
    "director": "director",
}

vector_store = AzureAISearchVectorStore(
    search_or_index_client=index_client,
    filterable_metadata_field_keys=metadata_fields,
    index_name=index_name,
    index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
    id_field_key="id",
    chunk_field_key="chunk",
    embedding_field_key="embedding",
    embedding_dimensionality=1536,
    metadata_string_field_key="metadata",
    doc_id_field_key="doc_id",
    language_analyzer="en.lucene",
    vector_algorithm_type="exhaustiveKnn",
    # compression_type="binary" # Option to use "scalar" or "binary". NOTE: compression is only supported for HNSW
)
python
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

Loading documents

Load the documents stored in the data/paul_graham/ using the SimpleDirectoryReader

python
# Load documents
documents = SimpleDirectoryReader("../data/paul_graham/").load_data()
storage_context = StorageContext.from_defaults(vector_store=vector_store)

Settings.llm = llm
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)
python
# Query Data
query_engine = index.as_query_engine(similarity_top_k=3)
response = query_engine.query("What did the author do growing up?")
display(Markdown(f"<b>{response}</b>"))
python
response = query_engine.query(
    "What did the author learn?",
)
display(Markdown(f"<b>{response}</b>"))

Use Existing Index

python
index_name = "llamaindex-vector-demo"

metadata_fields = {
    "author": "author",
    "theme": ("topic", MetadataIndexFieldType.STRING),
    "director": "director",
}
vector_store = AzureAISearchVectorStore(
    search_or_index_client=search_client,
    filterable_metadata_field_keys=metadata_fields,
    index_management=IndexManagement.VALIDATE_INDEX,
    id_field_key="id",
    chunk_field_key="chunk",
    embedding_field_key="embedding",
    embedding_dimensionality=1536,
    metadata_string_field_key="metadata",
    doc_id_field_key="doc_id",
)
python
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    [],
    storage_context=storage_context,
)
python
query_engine = index.as_query_engine()
response = query_engine.query("What was a hard moment for the author?")
display(Markdown(f"<b>{response}</b>"))
python
response = query_engine.query("Who is the author?")
display(Markdown(f"<b>{response}</b>"))
python
import time

query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("What happened at interleaf?")

start_time = time.time()

token_count = 0
for token in response.response_gen:
    print(token, end="")
    token_count += 1

time_elapsed = time.time() - start_time
tokens_per_second = token_count / time_elapsed

print(f"\n\nStreamed output at {tokens_per_second} tokens/s")

Adding a document to existing index

python
response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))
python
from llama_index.core import Document

index.insert_nodes([Document(text="The sky is indigo today")])
python
response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))

Filtering

Filters can be applied to queries using either the filters parameter to use llama-index's filter syntax or the odata_filters parameter to pass in filters directly.

python
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
        },
    ),
]
python
index.insert_nodes(nodes)
python
from llama_index.core.vector_stores.types import (
    MetadataFilters,
    MetadataFilter,
    FilterOperator,
    FilterCondition,
)


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Mafia", operator=FilterOperator.EQ)
    ],
    # if you want to apply multiple filters, you can use the AND, OR, NOT condition
    # condition=FilterCondition.AND
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")

Or passing in the odata_filters parameter directly:

python
odata_filters = "theme eq 'Mafia'"
retriever = index.as_retriever(
    vector_store_kwargs={"odata_filters": odata_filters}
)
retriever.retrieve("What is inception about?")

Query Mode

Four query modes are supported: DEFAULT (vector search), SPARSE, HYBRID, and SEMANTIC_HYBRID.

python
from llama_index.core.vector_stores.types import VectorStoreQueryMode

default_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.DEFAULT
)
response = default_retriever.retrieve("What is inception about?")

# Loop through each NodeWithScore in the response
for node_with_score in response:
    node = node_with_score.node  # The TextNode object
    score = node_with_score.score  # The similarity score
    chunk_id = node.id_  # The chunk ID

    # Extract the relevant metadata from the node
    file_name = node.metadata.get("file_name", "Unknown")
    file_path = node.metadata.get("file_path", "Unknown")

    # Extract the text content from the node
    text_content = node.text if node.text else "No content available"

    # Print the results in a user-friendly format
    print(f"Score: {score}")
    print(f"File Name: {file_name}")
    print(f"Id: {chunk_id}")
    print("\nExtracted Content:")
    print(text_content)
    print("\n" + "=" * 40 + " End of Result " + "=" * 40 + "\n")
python
from llama_index.core.vector_stores.types import VectorStoreQueryMode

hybrid_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.HYBRID
)
hybrid_retriever.retrieve("What is inception about?")

Perform a Hybrid Search with Semantic Reranking

This mode incorporates semantic reranking to hybrid search results to improve search relevance.

Please see this link for further details: https://learn.microsoft.com/azure/search/semantic-search-overview

python
hybrid_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.SEMANTIC_HYBRID
)
hybrid_retriever.retrieve("What is inception about?")