Waii - Llama Index — ContextQMD

python

from llama_index.tools.waii import WaiiToolSpec

waii_tool = WaiiToolSpec(
    url="https://tweakit.waii.ai/api/",
    # API Key of Waii (not OpenAI API key)
    api_key="3........",
    # Which database you want to use, you need add the db connection to Waii first
    database_key="snowflake://....",
    verbose=True,
)

python

from llama_index import VectorStoreIndex

# Use as Data Loader, load data to index and query it
documents = waii_tool.load_data("Get all tables with their number of columns")
index = VectorStoreIndex.from_documents(documents).as_query_engine()

index.query(
    "Which table contains most columns, tell me top 5 tables with number of columns?"
).response

python

# Use as tool, initialize it
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.llms.openai import OpenAI

agent = FunctionAgent(
    waii_tool.to_tool_list(), llm=OpenAI(model="gpt-4.1"),
)

from llama_index.core.workflow import Context

ctx = Context(agent)

print(await agent.run("Give me top 3 countries with the most number of car factory", ctx=ctx))
print(await agent.run("What are the car factories of these countries", ctx=ctx))

python

# Do performance analysis
print(
    await agent.run(
        "Give me top 3 longest running queries, include the complete query_id and their duration. And analyze performance of the first query",
        ctx=ctx,
    )
)

python

# Diff two queries
previous_query = """
SELECT
    employee_id,
    department,
    salary,
    AVG(salary) OVER (PARTITION BY department) AS department_avg_salary,
    salary - AVG(salary) OVER (PARTITION BY department) AS diff_from_avg
FROM
    employees;
"""
current_query = """
SELECT
    employee_id,
    department,
    salary,
    MAX(salary) OVER (PARTITION BY department) AS department_max_salary,
    salary - AVG(salary) OVER (PARTITION BY department) AS diff_from_avg
FROM
    employees;
LIMIT 100;
"""
print(await agent.run(f"tell me difference between {previous_query} and {current_query}", ctx=ctx))

python

# Describe dataset
print(await agent.run("Summarize the dataset", ctx=ctx))

python

q = """
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, lag, lead, round
from pyspark.sql.window import Window

spark = SparkSession.builder.appName("yearly_car_analysis").getOrCreate()

yearly_avg_hp = cars_data.groupBy("year").agg(avg("horsepower").alias("avg_horsepower"))

windowSpec = Window.orderBy("year")

yearly_comparisons = yearly_avg_hp.select(
    "year",
    "avg_horsepower",
    lag("avg_horsepower").over(windowSpec).alias("prev_year_hp"),
    lead("avg_horsepower").over(windowSpec).alias("next_year_hp")
)

final_result = yearly_comparisons.select(
    "year",
    "avg_horsepower",
    round(
        (yearly_comparisons.avg_horsepower - yearly_comparisons.prev_year_hp) / 
        yearly_comparisons.prev_year_hp * 100, 2
    ).alias("percentage_diff_prev_year"),
    round(
        (yearly_comparisons.next_year_hp - yearly_comparisons.avg_horsepower) / 
        yearly_comparisons.avg_horsepower * 100, 2
    ).alias("percentage_diff_next_year")
).orderBy("year")

final_result.show()
"""
print(await agent.run(f"translate this pyspark query {q}, to Snowflake", ctx=ctx))