Visualizing Website Topics (Claude + Firecrawl + E2B)

Powered by Claude 3.5 Sonnet, Firecrawl, and Code Interpreter SDK by E2B

Scrape a website with Firecrawl and then plot the most common topics using Claude and Code Interpreter

python

%pip install e2b_code_interpreter anthropic firecrawl-py

python

import os
import datetime
import time
from firecrawl import FirecrawlApp
import json

# TODO: Get your Anthropic API key from https://anthropic.com
anthropic_api_key = "your-anthropic-api-key"
# TODO: Get your Firecrawl API key from https://www.firecrawl.dev
firecrawl_api_key = "your-firecrawl-api-key"
# TODO: Get your E2B API key from https://e2b.dev/docs
e2b_api_key = "your-e2b-api-key"

python

# Initialize the FirecrawlApp with your API key
app = FirecrawlApp(api_key=firecrawl_api_key)

# Crawl a website
crawl_url = 'https://python.langchain.com/v0.2/docs'
params = {
    'crawlerOptions': {
        'limit': 5
    }
}
crawl_result = app.crawl_url(crawl_url, params=params)
cleaned_crawl_result = []
if crawl_result is not None:
    # Convert crawl results to JSON format, excluding 'content' field from each entry
    cleaned_crawl_result = [{k: v for k, v in entry.items() if k != 'content'} for entry in crawl_result]
else:
    print("No data returned from crawl.")

python

MODEL_NAME = "claude-3-5-sonnet-20240620"

SYSTEM_PROMPT = """
## your job & context
you are a python data scientist. you are given tasks to complete and you run python code to solve them.
- the python code runs in jupyter notebook.
- every time you call `execute_python` tool, the python code is executed in a separate cell. it's okay to multiple calls to `execute_python`.
- display visualizations using matplotlib or any other visualization library directly in the notebook. don't worry about saving the visualizations to a file.
- you have access to the internet and can make api requests.
- you also have access to the filesystem and can read/write files.
- you can install any pip package (if it exists) if you need to but the usual packages for data analysis are already preinstalled.
- you can run any python code you want, everything is running in a secure sandbox environment.

## style guide
tool response values that have text inside "[]"  mean that a visual element got rended in the notebook. for example:
- "[chart]" means that a chart was generated in the notebook.
"""

tools = [
    {
        "name": "execute_python",
        "description": "Execute python code in a Jupyter notebook cell and returns any result, stdout, stderr, display_data, and error.",
        "input_schema": {
            "type": "object",
            "properties": {
                "code": {
                    "type": "string",
                    "description": "The python code to execute in a single cell."
                }
            },
            "required": ["code"]
        }
    }
]

python

def code_interpret(e2b_code_interpreter, code):
  print("Running code interpreter...")
  exec = e2b_code_interpreter.notebook.exec_cell(
    code,
    on_stderr=lambda stderr: print("[Code Interpreter]", stderr),
    on_stdout=lambda stdout: print("[Code Interpreter]", stdout),
    # You can also stream code execution results
    # on_result=...
  )

  if exec.error:
    print("[Code Interpreter ERROR]", exec.error)
  else:
    return exec.results

python

from anthropic import Anthropic
client = Anthropic(
    api_key=anthropic_api_key,
)

def process_tool_call(e2b_code_interpreter, tool_name, tool_input):
    if tool_name == "execute_python":
        return code_interpret(e2b_code_interpreter, tool_input["code"])
    return []

def chat_with_claude(e2b_code_interpreter, user_message):
    print(f"\n{'='*50}\nUser Message: {user_message}\n{'='*50}")

    message = client.messages.create(
        model=MODEL_NAME,
        system=SYSTEM_PROMPT,
        messages=[{"role": "user", "content": user_message}],
        max_tokens=4096,
        tools=tools,
    )

    print(f"\nInitial Response:")
    print(f"Stop Reason: {message.stop_reason}")
    print(f"Content: {message.content}")

    if message.stop_reason == "tool_use":
        tool_use = next(block for block in message.content if block.type == "tool_use")
        tool_name = tool_use.name
        tool_input = tool_use.input

        print(f"\nTool Used: {tool_name}")
        print(f"Tool Input: {tool_input}")

        code_interpreter_results = process_tool_call(e2b_code_interpreter, tool_name, tool_input)

        print(f"Tool Result: {code_interpreter_results}")
        return code_interpreter_results

python

from e2b_code_interpreter import CodeInterpreter

with CodeInterpreter(api_key=e2b_api_key) as code_interpreter:
  code_interpreter_results = chat_with_claude(
    code_interpreter,
    "Use python to identify the most common topics in the crawl results. For each topic, count the number of times it appears in the crawl results and plot them. Here is the crawl results: " + str(cleaned_crawl_result),
  )
print(code_interpreter_results)

python

result = code_interpreter_results[0]
print(result)

result