llama-index-integrations/tools/llama-index-tools-waii/examples/waii.ipynb
from llama_index.tools.waii import WaiiToolSpec
waii_tool = WaiiToolSpec(
url="https://tweakit.waii.ai/api/",
# API Key of Waii (not OpenAI API key)
api_key="3........",
# Which database you want to use, you need add the db connection to Waii first
database_key="snowflake://....",
verbose=True,
)
from llama_index import VectorStoreIndex
# Use as Data Loader, load data to index and query it
documents = waii_tool.load_data("Get all tables with their number of columns")
index = VectorStoreIndex.from_documents(documents).as_query_engine()
index.query(
"Which table contains most columns, tell me top 5 tables with number of columns?"
).response
# Use as tool, initialize it
from llama_index.core.agent.workflow import FunctionAgent
from llama_index.llms.openai import OpenAI
agent = FunctionAgent(
waii_tool.to_tool_list(), llm=OpenAI(model="gpt-4.1"),
)
from llama_index.core.workflow import Context
ctx = Context(agent)
print(await agent.run("Give me top 3 countries with the most number of car factory", ctx=ctx))
print(await agent.run("What are the car factories of these countries", ctx=ctx))
# Do performance analysis
print(
await agent.run(
"Give me top 3 longest running queries, include the complete query_id and their duration. And analyze performance of the first query",
ctx=ctx,
)
)
# Diff two queries
previous_query = """
SELECT
employee_id,
department,
salary,
AVG(salary) OVER (PARTITION BY department) AS department_avg_salary,
salary - AVG(salary) OVER (PARTITION BY department) AS diff_from_avg
FROM
employees;
"""
current_query = """
SELECT
employee_id,
department,
salary,
MAX(salary) OVER (PARTITION BY department) AS department_max_salary,
salary - AVG(salary) OVER (PARTITION BY department) AS diff_from_avg
FROM
employees;
LIMIT 100;
"""
print(await agent.run(f"tell me difference between {previous_query} and {current_query}", ctx=ctx))
# Describe dataset
print(await agent.run("Summarize the dataset", ctx=ctx))
q = """
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, lag, lead, round
from pyspark.sql.window import Window
spark = SparkSession.builder.appName("yearly_car_analysis").getOrCreate()
yearly_avg_hp = cars_data.groupBy("year").agg(avg("horsepower").alias("avg_horsepower"))
windowSpec = Window.orderBy("year")
yearly_comparisons = yearly_avg_hp.select(
"year",
"avg_horsepower",
lag("avg_horsepower").over(windowSpec).alias("prev_year_hp"),
lead("avg_horsepower").over(windowSpec).alias("next_year_hp")
)
final_result = yearly_comparisons.select(
"year",
"avg_horsepower",
round(
(yearly_comparisons.avg_horsepower - yearly_comparisons.prev_year_hp) /
yearly_comparisons.prev_year_hp * 100, 2
).alias("percentage_diff_prev_year"),
round(
(yearly_comparisons.next_year_hp - yearly_comparisons.avg_horsepower) /
yearly_comparisons.avg_horsepower * 100, 2
).alias("percentage_diff_next_year")
).orderBy("year")
final_result.show()
"""
print(await agent.run(f"translate this pyspark query {q}, to Snowflake", ctx=ctx))