llama-index-integrations/tools/llama-index-tools-playwright/examples/playwright_browser_agent.ipynb
<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-playwright/examples/playwright_browser_agent.ipynb" target="_parent"></a>
This tutorial walks through using the LLM tools provided by the Playwright to allow LLMs to easily navigate and scrape content from the Internet.
%pip install llama-index-tools-playwright llama-index
# set up async playwright browser
# To enable more llamaindex usecases, we only offer async playwright tools at the moment
# install playwright
!playwright install
# This import is required only for jupyter notebooks, since they have their own eventloop
import nest_asyncio
nest_asyncio.apply()
# import the tools
from llama_index.tools.playwright.base import PlaywrightToolSpec
# create the tools
browser = await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
playwright_tool = PlaywrightToolSpec.from_async_browser(browser)
playwright_tool_list = playwright_tool.to_tool_list()
for tool in playwright_tool_list:
print(tool.metadata.name)
await playwright_tool.navigate_to("https://playwright.dev/python/docs/intro")
### Print the current page URL
print(await playwright_tool.get_current_page())
print(await playwright_tool.extract_hyperlinks())
print(await playwright_tool.extract_text())
Get element attributes for navigating to the next page. You can retrieve the selector from google chrome dev tools.
element = await playwright_tool.get_elements(
selector="#__docusaurus_skipToContent_fallback > div > div > main > div > div > div.col.docItemCol_VOVn > div > nav > a",
attributes=["innerText"],
)
print(element)
Click on the search bar
await playwright_tool.click(
selector="#__docusaurus > nav > div.navbar__inner > div.navbar__items.navbar__items--right > div.navbarSearchContainer_Bca1 > button"
)
Fill in the search bar with "Mouse click"
await playwright_tool.fill(selector="#docsearch-input", value="Mouse click")
Click on the first result, we should be redirected to the Mouse click page
await playwright_tool.click(selector="#docsearch-hits0-item-0")
print(await playwright_tool.get_current_page())
To get started, you will need an OpenAI api key
# set your openai key, if using openai
import os
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"
from llama_index.core.agent import FunctionAgent
from llama_index.llms.openai import OpenAI
playwright_tool_list = playwright_tool.to_tool_list()
agent = FunctionAgent(
tools=playwright_tool_list,
llm=OpenAI(model="gpt-4o"),
)
print(
await agent.run(
"Navigate to https://blog.samaltman.com/productivity, extract the text on this page and return a summary of the article."
)
)
from llama_index.llms.openai import OpenAI
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.agent.workflow import (
AgentInput,
AgentOutput,
ToolCall,
ToolCallResult,
AgentStream,
)
llm = OpenAI(model="gpt-4o")
workflow = AgentWorkflow.from_tools_or_functions(
playwright_tool_list,
llm=llm,
system_prompt="You are a helpful assistant that can do browser automation and data extraction",
)
handler = workflow.run(
user_msg="Navigate to https://blog.samaltman.com/productivity, extract the text on this page and return a summary of the article."
)
async for event in handler.stream_events():
if isinstance(event, AgentStream):
print(event.delta, end="", flush=True)
# print(event.response) # the current full response
# print(event.raw) # the raw llm api response
# print(event.current_agent_name) # the current agent name
# elif isinstance(event, AgentInput):
# print(event.input) # the current input messages
# print(event.current_agent_name) # the current agent name
# elif isinstance(event, AgentOutput):
# print(event.response) # the current full response
# print(event.tool_calls) # the selected tool calls, if any
# print(event.raw) # the raw llm api response
elif isinstance(event, ToolCallResult):
print(event.tool_name) # the tool name
print(event.tool_kwargs) # the tool kwargs
print(event.tool_output) # the tool output
# elif isinstance(event, ToolCall):
# print(event.tool_name) # the tool name
# print(event.tool_kwargs) # the tool kwargs