Azure AI Search

Basic Example

In this notebook, we take a Paul Graham essay, split it into chunks, embed it using an Azure OpenAI embedding model, load it into an Azure AI Search index, and then query it.

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

python

!pip install llama-index
!pip install wget
%pip install llama-index-vector-stores-azureaisearch
%pip install azure-search-documents==11.5.1
%llama-index-embeddings-azure-openai
%llama-index-llms-azure-openai

python

import logging
import sys
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from IPython.display import Markdown, display
from llama_index.core import (
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
)
from llama_index.core.settings import Settings
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore
from llama_index.vector_stores.azureaisearch import (
    IndexManagement,
    MetadataIndexFieldType,
)

Setup Azure OpenAI

python

aoai_api_key = "YOUR_AZURE_OPENAI_API_KEY"
aoai_endpoint = "YOUR_AZURE_OPENAI_ENDPOINT"
aoai_api_version = "2024-10-21"

llm = AzureOpenAI(
    model="YOUR_AZURE_OPENAI_COMPLETION_MODEL_NAME",
    deployment_name="YOUR_AZURE_OPENAI_COMPLETION_DEPLOYMENT_NAME",
    api_key=aoai_api_key,
    azure_endpoint=aoai_endpoint,
    api_version=aoai_api_version,
)

# You need to deploy your own embedding model as well as your own chat completion model
embed_model = AzureOpenAIEmbedding(
    model="YOUR_AZURE_OPENAI_EMBEDDING_MODEL_NAME",
    deployment_name="YOUR_AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME",
    api_key=aoai_api_key,
    azure_endpoint=aoai_endpoint,
    api_version=aoai_api_version,
)

Setup Azure AI Search

python

search_service_api_key = "YOUR-AZURE-SEARCH-SERVICE-ADMIN-KEY"
search_service_endpoint = "YOUR-AZURE-SEARCH-SERVICE-ENDPOINT"
search_service_api_version = "2024-07-01"
credential = AzureKeyCredential(search_service_api_key)


# Index name to use
index_name = "llamaindex-vector-demo"

# Use index client to demonstrate creating an index
index_client = SearchIndexClient(
    endpoint=search_service_endpoint,
    credential=credential,
)

# Use search client to demonstration using existing index
search_client = SearchClient(
    endpoint=search_service_endpoint,
    index_name=index_name,
    credential=credential,
)

Create Index (if it does not exist)

Demonstrates creating a vector index named "llamaindex-vector-demo" if one doesn't exist. The index has the following fields:

Field Name	OData Type
id	`Edm.String`
chunk	`Edm.String`
embedding	`Collection(Edm.Single)`
metadata	`Edm.String`
doc_id	`Edm.String`
author	`Edm.String`
theme	`Edm.String`
director	`Edm.String`

python

metadata_fields = {
    "author": "author",
    "theme": ("topic", MetadataIndexFieldType.STRING),
    "director": "director",
}

vector_store = AzureAISearchVectorStore(
    search_or_index_client=index_client,
    filterable_metadata_field_keys=metadata_fields,
    index_name=index_name,
    index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
    id_field_key="id",
    chunk_field_key="chunk",
    embedding_field_key="embedding",
    embedding_dimensionality=1536,
    metadata_string_field_key="metadata",
    doc_id_field_key="doc_id",
    language_analyzer="en.lucene",
    vector_algorithm_type="exhaustiveKnn",
    # compression_type="binary" # Option to use "scalar" or "binary". NOTE: compression is only supported for HNSW
)

python

!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

Loading documents

Load the documents stored in the data/paul_graham/ using the SimpleDirectoryReader

python

# Load documents
documents = SimpleDirectoryReader("../data/paul_graham/").load_data()
storage_context = StorageContext.from_defaults(vector_store=vector_store)

Settings.llm = llm
Settings.embed_model = embed_model
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

python

# Query Data
query_engine = index.as_query_engine(similarity_top_k=3)
response = query_engine.query("What did the author do growing up?")
display(Markdown(f"<b>{response}</b>"))

python

response = query_engine.query(
    "What did the author learn?",
)
display(Markdown(f"<b>{response}</b>"))

Use Existing Index

python

index_name = "llamaindex-vector-demo"

metadata_fields = {
    "author": "author",
    "theme": ("topic", MetadataIndexFieldType.STRING),
    "director": "director",
}
vector_store = AzureAISearchVectorStore(
    search_or_index_client=search_client,
    filterable_metadata_field_keys=metadata_fields,
    index_management=IndexManagement.VALIDATE_INDEX,
    id_field_key="id",
    chunk_field_key="chunk",
    embedding_field_key="embedding",
    embedding_dimensionality=1536,
    metadata_string_field_key="metadata",
    doc_id_field_key="doc_id",
)

python

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    [],
    storage_context=storage_context,
)

python

query_engine = index.as_query_engine()
response = query_engine.query("What was a hard moment for the author?")
display(Markdown(f"<b>{response}</b>"))

python

response = query_engine.query("Who is the author?")
display(Markdown(f"<b>{response}</b>"))

python

import time

query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("What happened at interleaf?")

start_time = time.time()

token_count = 0
for token in response.response_gen:
    print(token, end="")
    token_count += 1

time_elapsed = time.time() - start_time
tokens_per_second = token_count / time_elapsed

print(f"\n\nStreamed output at {tokens_per_second} tokens/s")

Adding a document to existing index

python

response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))

python

from llama_index.core import Document

index.insert_nodes([Document(text="The sky is indigo today")])

python

response = query_engine.query("What colour is the sky?")
display(Markdown(f"<b>{response}</b>"))

Filtering

Filters can be applied to queries using either the filters parameter to use llama-index's filter syntax or the odata_filters parameter to pass in filters directly.

python

from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text="The Shawshank Redemption",
        metadata={
            "author": "Stephen King",
            "theme": "Friendship",
        },
    ),
    TextNode(
        text="The Godfather",
        metadata={
            "director": "Francis Ford Coppola",
            "theme": "Mafia",
        },
    ),
    TextNode(
        text="Inception",
        metadata={
            "director": "Christopher Nolan",
        },
    ),
]

python

index.insert_nodes(nodes)

python

from llama_index.core.vector_stores.types import (
    MetadataFilters,
    MetadataFilter,
    FilterOperator,
    FilterCondition,
)


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="theme", value="Mafia", operator=FilterOperator.EQ)
    ],
    # if you want to apply multiple filters, you can use the AND, OR, NOT condition
    # condition=FilterCondition.AND
)

retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")

Or passing in the odata_filters parameter directly:

python

odata_filters = "theme eq 'Mafia'"
retriever = index.as_retriever(
    vector_store_kwargs={"odata_filters": odata_filters}
)
retriever.retrieve("What is inception about?")

Query Mode

Four query modes are supported: DEFAULT (vector search), SPARSE, HYBRID, and SEMANTIC_HYBRID.

Perform a Vector Search

python

from llama_index.core.vector_stores.types import VectorStoreQueryMode

default_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.DEFAULT
)
response = default_retriever.retrieve("What is inception about?")

# Loop through each NodeWithScore in the response
for node_with_score in response:
    node = node_with_score.node  # The TextNode object
    score = node_with_score.score  # The similarity score
    chunk_id = node.id_  # The chunk ID

    # Extract the relevant metadata from the node
    file_name = node.metadata.get("file_name", "Unknown")
    file_path = node.metadata.get("file_path", "Unknown")

    # Extract the text content from the node
    text_content = node.text if node.text else "No content available"

    # Print the results in a user-friendly format
    print(f"Score: {score}")
    print(f"File Name: {file_name}")
    print(f"Id: {chunk_id}")
    print("\nExtracted Content:")
    print(text_content)
    print("\n" + "=" * 40 + " End of Result " + "=" * 40 + "\n")

Perform a Hybrid Search

python

from llama_index.core.vector_stores.types import VectorStoreQueryMode

hybrid_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.HYBRID
)
hybrid_retriever.retrieve("What is inception about?")

Perform a Hybrid Search with Semantic Reranking

This mode incorporates semantic reranking to hybrid search results to improve search relevance.

Please see this link for further details: https://learn.microsoft.com/azure/search/semantic-search-overview

python

hybrid_retriever = index.as_retriever(
    vector_store_query_mode=VectorStoreQueryMode.SEMANTIC_HYBRID
)
hybrid_retriever.retrieve("What is inception about?")