apps/docs/src/content/docs/en/guides/data-analysis-with-ai.mdx
import { TabItem, Tabs } from '@astrojs/starlight/components' import { Image } from 'astro:assets'
import chartImage from '../../../../assets/docs/images/chart-0.png'
You can use Daytona Sandbox to run AI-generated code to analyze data. Here's how the AI data analysis workflow typically looks:
This example shows how to build an AI-powered data analyst that automatically generates insights and visualizations from CSV data using Daytona's secure sandbox environment.
What we'll build: A system that analyzes a vehicle valuation dataset, identifies price relation to manufacturing year, and generates professional visualizations - all through natural language prompts to Claude. The system uses an agentic loop that allows Claude to iteratively refine the code based on execution results.
Install the Daytona SDK and Anthropic SDK to your project:
<Tabs syncKey="language"> <TabItem label="Python" icon="seti:python"> `bash pip install daytona anthropic python-dotenv ` </TabItem> <TabItem label="TypeScript" icon="seti:typescript"> `bash npm install @daytona/sdk @anthropic-ai/sdk dotenv ` </TabItem> <TabItem label="Ruby" icon="seti:ruby"> `bash gem install daytona anthropic dotenv ` </TabItem> </Tabs>Get your API keys and configure your environment:
Create a .env file in your project:
DAYTONA_API_KEY=dtn_***
ANTHROPIC_API_KEY=sk-ant-***
We'll be using a publicly available dataset of vehicle valuation. You can download it directly from:
https://download.daytona.io/dataset.csv
Download the file and save it as dataset.csv in your project directory.
Now create a Daytona sandbox and upload your dataset:
<Tabs syncKey="language"> <TabItem label="Python" icon="seti:python"> ```python from dotenv import load_dotenv from daytona import Daytona import osload_dotenv()
# Create sandbox
daytona = Daytona() # The sandbox language is Python by default.
sandbox = daytona.create()
# Upload the dataset to the sandbox
sandbox.fs.upload_file("dataset.csv", "/home/daytona/dataset.csv")
```
// Create sandbox
const daytona = new Daytona(); // The sandbox language is Python by default.
const sandbox = await daytona.create()
// Upload the dataset to the sandbox
await sandbox.fs.uploadFile('dataset.csv', '/home/daytona/dataset.csv')
```
# Create sandbox
daytona = Daytona::Daytona.new # The sandbox language is Python by default.
sandbox = daytona.create
# Upload the dataset to the sandbox
sandbox.fs.upload_file(File.read('dataset.csv'), '/home/daytona/dataset.csv')
```
Now we'll create the core functionality that connects Claude with Daytona to analyze data and generate visualizations.
First, let's create a function to handle code execution and chart extraction. This function returns execution results that can be fed back to the AI model:
<Tabs syncKey="language"> <TabItem label="Python" icon="seti:python"> ```python import base64 from typing import TypedDictclass ExecutionResult(TypedDict):
stdout: str
exit_code: int
charts: list
def run_ai_generated_code(sandbox, ai_generated_code: str) -> ExecutionResult:
execution = sandbox.process.code_run(ai_generated_code)
result = ExecutionResult(
stdout=execution.result or "",
exit_code=execution.exit_code,
charts=execution.artifacts.charts if execution.artifacts else []
)
# Save any charts that were generated
if execution.artifacts and execution.artifacts.charts:
result_idx = 0
for chart in execution.artifacts.charts:
if chart.png:
filename = f'chart-{result_idx}.png'
with open(filename, 'wb') as f:
f.write(base64.b64decode(chart.png))
print(f'✓ Chart saved to {filename}')
result_idx += 1
return result
```
interface ExecutionResult {
stdout: string
exitCode: number
charts?: Array<{ png?: string }>
}
async function runAIGeneratedCode(
sandbox: Sandbox,
aiGeneratedCode: string
): Promise<ExecutionResult> {
const execution = await sandbox.process.codeRun(aiGeneratedCode)
const result: ExecutionResult = {
stdout: execution.result || "",
exitCode: execution.exitCode,
charts: execution.artifacts?.charts
}
// Save any charts that were generated
if (execution.artifacts?.charts) {
let resultIdx = 0
for (const chart of execution.artifacts.charts) {
if (chart.png) {
const filename = `chart-${resultIdx}.png`
fs.writeFileSync(filename, chart.png, { encoding: 'base64' })
console.log(`✓ Chart saved to ${filename}`)
resultIdx++
}
}
}
return result
}
```
Next, we'll create the prompt that tells Claude about our dataset and what analysis we want. This prompt includes:
prompt = f"""
I have a CSV file with vehicle valuations saved in the sandbox at /home/daytona/dataset.csv.
Relevant columns:
- 'year': integer, the manufacturing year of the vehicle
- 'price_in_euro': float, the listed price of the vehicle in Euros
Analyze how price varies by manufacturing year.
Drop rows where 'year' or 'price_in_euro' is missing, non-numeric, or an outlier.
Create a line chart showing average price per year.
Write Python code that analyzes the dataset based on my request and produces a matplotlib chart accordingly.
Always finish with plt.show() to display the chart."""
anthropic = Anthropic()
```
const prompt = `
I have a CSV file with vehicle valuations saved in the sandbox at /home/daytona/dataset.csv.
Relevant columns:
- 'year': integer, the manufacturing year of the vehicle
- 'price_in_euro': float, the listed price of the vehicle in Euros
Analyze how price varies by manufacturing year.
Drop rows where 'year' or 'price_in_euro' is missing, non-numeric, or an outlier.
Create a line chart showing average price per year.
Write Python code that analyzes the dataset based on my request and produces a matplotlib chart accordingly.
Always finish with plt.show() to display the chart.`
const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY })
```
Define the tool that allows Claude to execute Python code in the sandbox:
<Tabs syncKey="language"> <TabItem label="Python" icon="seti:python"> ```python tools = [ { 'name': 'run_python_code', 'description': 'Run Python code in the sandbox environment and get execution results', 'input_schema': { 'type': 'object', 'properties': { 'code': { 'type': 'string', 'description': 'The Python code to run', }, }, 'required': ['code'], }, }, ] ``` </TabItem> <TabItem label="TypeScript" icon="seti:typescript"> ```typescript import type { Tool, ToolUseBlock } from '@anthropic-ai/sdk/resources/messages.mjs'const tools: Tool[] = [
{
name: 'run_python_code',
description: 'Run Python code in the sandbox environment and get execution results',
input_schema: {
type: 'object',
properties: {
code: {
type: 'string',
description: 'The Python code to run',
},
},
required: ['code'],
},
},
]
```
Now we'll implement the agentic loop that allows Claude to iteratively refine the code based on execution feedback. This enables Claude to fix errors, handle edge cases, and improve the analysis through multiple iterations:
<Tabs syncKey="language"> <TabItem label="Python" icon="seti:python"> ```python # Initialize conversation history messages = [{'role': 'user', 'content': prompt}]continue_loop = True
iteration_count = 0
max_iterations = 10
print("Starting agentic loop...\n")
while continue_loop and iteration_count < max_iterations:
iteration_count += 1
print(f"\n=== Iteration {iteration_count} ===")
print("Waiting for the model response...")
# Get response from Claude
msg = anthropic.messages.create(
model='claude-sonnet-4-5',
max_tokens=64000,
messages=messages,
tools=tools
)
# Log Claude's text response
for content_block in msg.content:
if content_block.type == 'text':
print("\nClaude's response:")
print(content_block.text)
# Check if Claude wants to use any tools
tool_uses = [block for block in msg.content if block.type == 'tool_use']
if len(tool_uses) == 0:
# No more tool uses, Claude is done
print("\nTask completed - no more actions needed.")
continue_loop = False
break
# Add Claude's response to message history
messages.append({'role': 'assistant', 'content': msg.content})
# Execute all tool calls and collect results
tool_results = []
for tool_use in tool_uses:
if tool_use.name == 'run_python_code':
code = tool_use.input['code']
print("\n--- Executing Python code in sandbox ---")
print(code)
print("--- End of code ---\n")
# Execute the code in the sandbox
execution_result = run_ai_generated_code(sandbox, code)
# Format the tool result
result_content = ""
if execution_result['exit_code'] == 0:
result_content += "Execution successful!\n\n"
if execution_result['stdout']:
result_content += f"Output:\n{execution_result['stdout']}\n"
if execution_result['charts'] and len(execution_result['charts']) > 0:
result_content += f"\nGenerated {len(execution_result['charts'])} chart(s)."
else:
result_content += "\nNote: No charts were generated. Make sure to use plt.show() to display the chart."
else:
result_content += f"Execution failed with exit code {execution_result['exit_code']}\n\n"
if execution_result['stdout']:
result_content += f"Output:\n{execution_result['stdout']}\n"
tool_results.append({
'type': 'tool_result',
'tool_use_id': tool_use.id,
'content': result_content
})
print("Execution result sent back to Claude.")
# Add tool results to conversation history
messages.append({'role': 'user', 'content': tool_results})
if iteration_count >= max_iterations:
print("\n⚠️ Reached maximum iteration limit. Task may not be complete.")
print("\n=== Agentic loop completed ===")
```
interface CodeRunToolInput {
code: string
}
// Initialize conversation history
const messages: MessageParam[] = [
{ role: 'user', content: initialPrompt }
]
let continueLoop = true
let iterationCount = 0
const maxIterations = 10
console.log("Starting agentic loop...\n")
while (continueLoop && iterationCount < maxIterations) {
iterationCount++
console.log(`\n=== Iteration ${iterationCount} ===`)
console.log("Waiting for the model response...")
// Get response from Claude
const stream = anthropic.messages.stream({
model: 'claude-sonnet-4-5',
max_tokens: 64000,
messages: messages,
tools: tools
})
const message = await stream.finalMessage()
// Log Claude's text response
for (const contentBlock of message.content) {
if (contentBlock.type === 'text') {
console.log("\nClaude's response:")
console.log(contentBlock.text)
}
}
// Check if Claude wants to use any tools
const toolUses = message.content.filter(
(block): block is ToolUseBlock => block.type === 'tool_use'
)
if (toolUses.length === 0) {
// No more tool uses, Claude is done
console.log("\nTask completed - no more actions needed.")
continueLoop = false
break
}
// Add Claude's response to message history
messages.push({
role: 'assistant',
content: message.content
})
// Execute all tool calls and collect results
const toolResults = []
for (const toolUse of toolUses) {
if (toolUse.name === 'run_python_code') {
const code = (toolUse.input as CodeRunToolInput).code
console.log("\n--- Executing Python code in sandbox ---")
console.log(code)
console.log("--- End of code ---\n")
// Execute the code in the sandbox
const executionResult = await runAIGeneratedCode(sandbox, code)
// Format the tool result
let resultContent = ""
if (executionResult.exitCode === 0) {
resultContent += "Execution successful!\n\n"
if (executionResult.stdout) {
resultContent += `Output:\n${executionResult.stdout}\n`
}
if (executionResult.charts && executionResult.charts.length > 0) {
resultContent += `\nGenerated ${executionResult.charts.length} chart(s).`
} else {
resultContent += "\nNote: No charts were generated. Make sure to use plt.show() to display the chart."
}
} else {
resultContent += `Execution failed with exit code ${executionResult.exitCode}\n\n`
if (executionResult.stdout) {
resultContent += `Output:\n${executionResult.stdout}\n`
}
}
toolResults.push({
type: 'tool_result' as const,
tool_use_id: toolUse.id,
content: resultContent
})
console.log("Execution result sent back to Claude.")
}
}
// Add tool results to conversation history
messages.push({
role: 'user',
content: toolResults
})
}
if (iterationCount >= maxIterations) {
console.log("\n⚠️ Reached maximum iteration limit. Task may not be complete.")
}
console.log("\n=== Agentic loop completed ===")
```
The agentic loop works as follows:
This approach allows Claude to:
Key advantages of this approach:
Now you can run the complete code to see the results.
<Tabs syncKey="language"> <TabItem label="Python" icon="seti:python"> ```bash python data-analysis.py ``` </TabItem> <TabItem label="TypeScript" icon="seti:typescript"> ```bash npx tsx data-analysis.ts ``` </TabItem> </Tabs>You should see the chart in your project directory that will look similar to this:
<Image src={chartImage} alt="Vehicle valuation by manufacturing year chart" width={600} style="max-width: 100%; height: auto; margin: 1rem 0;" />Here are the complete, ready-to-run examples with the agentic loop:
<Tabs syncKey="language"> <TabItem label="Python" icon="seti:python"> ```python import base64 from dotenv import load_dotenv from daytona import Daytona, Sandbox from anthropic import Anthropic from typing import TypedDictclass ExecutionResult(TypedDict):
stdout: str
exit_code: int
charts: list
def main():
load_dotenv()
# Create sandbox
daytona = Daytona()
sandbox = daytona.create()
# Upload the dataset to the sandbox
sandbox.fs.upload_file("dataset.csv", "/home/daytona/dataset.csv")
initial_prompt = """
I have a CSV file with vehicle valuations saved in the sandbox at /home/daytona/dataset.csv.
Relevant columns:
- 'year': integer, the manufacturing year of the vehicle
- 'price_in_euro': float, the listed price of the vehicle in Euros
Analyze how price varies by manufacturing year.
Drop rows where 'year' or 'price_in_euro' is missing, non-numeric, or an outlier.
Create a line chart showing average price per year.
Write Python code that analyzes the dataset based on my request and produces a matplotlib chart accordingly.
Always finish with plt.show() to display the chart."""
anthropic = Anthropic()
tools = [
{
'name': 'run_python_code',
'description': 'Run Python code in the sandbox environment and get execution results',
'input_schema': {
'type': 'object',
'properties': {
'code': {
'type': 'string',
'description': 'The Python code to run',
},
},
'required': ['code'],
},
},
]
# Initialize conversation history
messages = [{'role': 'user', 'content': initial_prompt}]
continue_loop = True
iteration_count = 0
max_iterations = 10
print("Starting agentic loop...\n")
while continue_loop and iteration_count < max_iterations:
iteration_count += 1
print(f"\n=== Iteration {iteration_count} ===")
print("Waiting for the model response...")
# Get response from Claude
msg = anthropic.messages.create(
model='claude-sonnet-4-5',
max_tokens=64000,
messages=messages,
tools=tools
)
# Log Claude's text response
for content_block in msg.content:
if content_block.type == 'text':
print("\nClaude's response:")
print(content_block.text)
# Check if Claude wants to use any tools
tool_uses = [block for block in msg.content if block.type == 'tool_use']
if len(tool_uses) == 0:
# No more tool uses, Claude is done
print("\nTask completed - no more actions needed.")
continue_loop = False
break
# Add Claude's response to message history
messages.append({'role': 'assistant', 'content': msg.content})
# Execute all tool calls and collect results
tool_results = []
for tool_use in tool_uses:
if tool_use.name == 'run_python_code':
code = tool_use.input['code']
print("\n--- Executing Python code in sandbox ---")
print(code)
print("--- End of code ---\n")
# Execute the code in the sandbox
execution_result = run_ai_generated_code(sandbox, code)
# Format the tool result
result_content = ""
if execution_result['exit_code'] == 0:
result_content += "Execution successful!\n\n"
if execution_result['stdout']:
result_content += f"Output:\n{execution_result['stdout']}\n"
if execution_result['charts'] and len(execution_result['charts']) > 0:
result_content += f"\nGenerated {len(execution_result['charts'])} chart(s)."
else:
result_content += "\nNote: No charts were generated. Make sure to use plt.show() to display the chart."
else:
result_content += f"Execution failed with exit code {execution_result['exit_code']}\n\n"
if execution_result['stdout']:
result_content += f"Output:\n{execution_result['stdout']}\n"
tool_results.append({
'type': 'tool_result',
'tool_use_id': tool_use.id,
'content': result_content
})
print("Execution result sent back to Claude.")
# Add tool results to conversation history
messages.append({'role': 'user', 'content': tool_results})
if iteration_count >= max_iterations:
print("\n⚠️ Reached maximum iteration limit. Task may not be complete.")
print("\n=== Agentic loop completed ===")
def run_ai_generated_code(sandbox: Sandbox, ai_generated_code: str) -> ExecutionResult:
execution = sandbox.process.code_run(ai_generated_code)
result = ExecutionResult(
stdout=execution.result or "",
exit_code=execution.exit_code,
charts=execution.artifacts.charts if execution.artifacts else []
)
# Save any charts that were generated
if execution.artifacts and execution.artifacts.charts:
result_idx = 0
for chart in execution.artifacts.charts:
if chart.png:
filename = f'chart-{result_idx}.png'
with open(filename, 'wb') as f:
f.write(base64.b64decode(chart.png))
print(f'✓ Chart saved to {filename}')
result_idx += 1
return result
if __name__ == "__main__":
main()
```
interface CodeRunToolInput {
code: string;
}
interface ExecutionResult {
stdout: string;
exitCode: number;
charts?: Array<{ png?: string }>;
}
async function main() {
// Create sandbox
const daytona = new Daytona();
const sandbox = await daytona.create();
// Upload the dataset to the sandbox
await sandbox.fs.uploadFile("dataset.csv", "/home/daytona/dataset.csv");
const initialPrompt = `
I have a CSV file with vehicle valuations saved in the sandbox at /home/daytona/dataset.csv.
Relevant columns:
- 'year': integer, the manufacturing year of the vehicle
- 'price_in_euro': float, the listed price of the vehicle in Euros
Analyze how price varies by manufacturing year.
Drop rows where 'year' or 'price_in_euro' is missing, non-numeric, or an outlier.
Create a line chart showing average price per year.
Write Python code that analyzes the dataset based on my request and produces a matplotlib chart accordingly.
Always finish with plt.show() to display the chart.`;
const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
const tools: Tool[] = [
{
name: "run_python_code",
description: "Run Python code in the sandbox environment and get execution results",
input_schema: {
type: "object",
properties: {
code: {
type: "string",
description: "The Python code to run",
},
},
required: ["code"],
},
},
];
// Initialize conversation history
const messages: MessageParam[] = [
{ role: "user", content: initialPrompt },
];
let continueLoop = true;
let iterationCount = 0;
const maxIterations = 10;
console.log("Starting agentic loop...\n");
while (continueLoop && iterationCount < maxIterations) {
iterationCount++;
console.log(`\n=== Iteration ${iterationCount} ===`);
console.log("Waiting for the model response...");
// Get response from Claude
const stream = anthropic.messages.stream({
model: "claude-sonnet-4-5",
max_tokens: 64000,
messages: messages,
tools: tools,
});
const message = await stream.finalMessage();
// Log Claude's text response
for (const contentBlock of message.content) {
if (contentBlock.type === "text") {
console.log("\nClaude's response:");
console.log(contentBlock.text);
}
}
// Check if Claude wants to use any tools
const toolUses = message.content.filter(
(block): block is ToolUseBlock => block.type === "tool_use"
);
if (toolUses.length === 0) {
// No more tool uses, Claude is done
console.log("\nTask completed - no more actions needed.");
continueLoop = false;
break;
}
// Add Claude's response to message history
messages.push({
role: "assistant",
content: message.content,
});
// Execute all tool calls and collect results
const toolResults = [];
for (const toolUse of toolUses) {
if (toolUse.name === "run_python_code") {
const code = (toolUse.input as CodeRunToolInput).code;
console.log("\n--- Executing Python code in sandbox ---");
console.log(code);
console.log("--- End of code ---\n");
// Execute the code in the sandbox
const executionResult = await runAIGeneratedCode(sandbox, code);
// Format the tool result
let resultContent = "";
if (executionResult.exitCode === 0) {
resultContent += `Execution successful!\n\n`;
if (executionResult.stdout) {
resultContent += `Output:\n${executionResult.stdout}\n`;
}
if (executionResult.charts && executionResult.charts.length > 0) {
resultContent += `\nGenerated ${executionResult.charts.length} chart(s).`;
} else {
resultContent += `\nNote: No charts were generated. Make sure to use plt.show() to display the chart.`;
}
} else {
resultContent += `Execution failed with exit code ${executionResult.exitCode}\n\n`;
if (executionResult.stdout) {
resultContent += `Output:\n${executionResult.stdout}\n`;
}
}
toolResults.push({
type: "tool_result" as const,
tool_use_id: toolUse.id,
content: resultContent,
});
console.log("Execution result sent back to Claude.");
}
}
// Add tool results to conversation history
messages.push({
role: "user",
content: toolResults,
});
}
if (iterationCount >= maxIterations) {
console.log(
"\n⚠️ Reached maximum iteration limit. Task may not be complete."
);
}
console.log("\n=== Agentic loop completed ===");
}
async function runAIGeneratedCode(
sandbox: Sandbox,
aiGeneratedCode: string
): Promise<ExecutionResult> {
const execution = await sandbox.process.codeRun(aiGeneratedCode);
const result: ExecutionResult = {
stdout: execution.result || "",
exitCode: execution.exitCode,
charts: execution.artifacts?.charts,
};
// Save any charts that were generated
if (execution.artifacts?.charts) {
let resultIdx = 0;
for (const chart of execution.artifacts.charts) {
if (chart.png) {
const filename = `chart-${resultIdx}.png`;
fs.writeFileSync(filename, chart.png, {
encoding: "base64",
});
console.log(`✓ Chart saved to ${filename}`);
resultIdx++;
}
}
}
return result;
}
main().catch(console.error);
```