Back to Llama Index

Example: Superlinked + LlamaIndex custom retriever (Steam games)

llama-index-integrations/retrievers/llama-index-retrievers-superlinked/examples/steam_games_example.ipynb

0.14.215.9 KB
Original Source
python
# Install dependencies for Colab\n
%pip -q install -U pip
%pip -q install llama-index-retrievers-superlinked
python
# Example: Superlinked + LlamaIndex custom retriever (Steam games)
# This notebook mirrors examples/steam_games_example.py

import argparse
from typing import List, Optional

import pandas as pd

import superlinked.framework as sl
from llama_index.retrievers.superlinked import SuperlinkedRetriever

try:
    from llama_index.core.query_engine import RetrieverQueryEngine
    from llama_index.core.response_synthesizers import get_response_synthesizer
except Exception:
    RetrieverQueryEngine = None  # type: ignore
    get_response_synthesizer = None  # type: ignore


python
def build_dataframe(csv_path: Optional[str]) -> pd.DataFrame:
    if csv_path:
        df = pd.read_csv(csv_path)
    else:
        df = pd.DataFrame(
            [
                {
                    "game_number": 1,
                    "name": "Star Tactics",
                    "desc_snippet": "Turn-based strategy in deep space.",
                    "game_details": "Tactical combat, fleet management",
                    "languages": "en",
                    "genre": "Strategy, Sci-Fi",
                    "game_description": "Engage in strategic battles among the stars.",
                    "original_price": 29.99,
                    "discount_price": 19.99,
                },
                {
                    "game_number": 2,
                    "name": "Wizard Party",
                    "desc_snippet": "Co-op party game with spells.",
                    "game_details": "Local co-op, party",
                    "languages": "en",
                    "genre": "Party, Casual, Magic",
                    "game_description": "Cast spells with friends in chaotic party modes.",
                    "original_price": 14.99,
                    "discount_price": 9.99,
                },
            ]
        )

    required = [
        "game_number",
        "name",
        "desc_snippet",
        "game_details",
        "languages",
        "genre",
        "game_description",
        "original_price",
        "discount_price",
    ]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    df["combined_text"] = (
        df["name"].astype(str)
        + " "
        + df["desc_snippet"].astype(str)
        + " "
        + df["genre"].astype(str)
        + " "
        + df["game_details"].astype(str)
        + " "
        + df["game_description"].astype(str)
    )
    return df

python
def build_superlinked_app(df: pd.DataFrame):
    class GameSchema(sl.Schema):
        id: sl.IdField
        name: sl.String
        desc_snippet: sl.String
        game_details: sl.String
        languages: sl.String
        genre: sl.String
        game_description: sl.String
        original_price: sl.Float
        discount_price: sl.Float
        combined_text: sl.String

    game = GameSchema()

    text_space = sl.TextSimilaritySpace(
        text=game.combined_text,
        model="sentence-transformers/all-mpnet-base-v2",
    )
    index = sl.Index([text_space])

    parser = sl.DataFrameParser(
        game,
        mapping={
            game.id: "game_number",
            game.name: "name",
            game.desc_snippet: "desc_snippet",
            game.game_details: "game_details",
            game.languages: "languages",
            game.genre: "genre",
            game.game_description: "game_description",
            game.original_price: "original_price",
            game.discount_price: "discount_price",
            game.combined_text: "combined_text",
        },
    )

    source = sl.InMemorySource(schema=game, parser=parser)
    executor = sl.InMemoryExecutor(sources=[source], indices=[index])
    app = executor.run()

    source.put([df])

    query = (
        sl.Query(index)
        .find(game)
        .similar(text_space, sl.Param("query_text"))
        .select(
            [
                game.id,
                game.name,
                game.desc_snippet,
                game.game_details,
                game.languages,
                game.genre,
                game.game_description,
                game.original_price,
                game.discount_price,
            ]
        )
    )

    return app, query, game

python
def run_demo(csv_path: Optional[str], top_k: int, query_text: str) -> None:
    df = build_dataframe(csv_path)
    app, query_descriptor, game = build_superlinked_app(df)

    retriever = SuperlinkedRetriever(
        sl_client=app,
        sl_query=query_descriptor,
        page_content_field="desc_snippet",
        query_text_param="query_text",
        metadata_fields=[
            "id",
            "name",
            "genre",
            "game_details",
            "languages",
            "game_description",
            "original_price",
            "discount_price",
        ],
        top_k=top_k,
    )

    print(f"\nRetrieving for: {query_text!r}")
    nodes = retriever.retrieve(query_text)
    for i, nws in enumerate(nodes, 1):
        print(f"#{i} score={nws.score:.4f} text={nws.node.text!r}")
        print(f"   metadata: {nws.node.metadata}")

    if RetrieverQueryEngine and get_response_synthesizer:
        print("\nBuilding RetrieverQueryEngine...")
        try:
            engine = RetrieverQueryEngine(
                retriever=retriever, response_synthesizer=get_response_synthesizer()
            )
            response = engine.query(query_text)
            print("\nEngine response:", response)
        except Exception as e:
            print("Engine invocation failed (likely missing LLM setup):", e)

python
# Parameters (for Colab users)
csv_path = None  # @param {type:"string"}
top_k = 3        # @param {type:"integer"}
query_text = "strategic sci-fi game"  # @param {type:"string"}

run_demo(csv_path, top_k, query_text)