apps/docs/src/content/docs/en/python-sdk/sync/computer-use.mdx
class ComputerUse()
Computer Use functionality for interacting with the desktop environment.
Provides access to mouse, keyboard, screenshot, display, and recording operations for automating desktop interactions within a sandbox.
Attributes:
mouse Mouse - Mouse operations interface.keyboard Keyboard - Keyboard operations interface.screenshot Screenshot - Screenshot operations interface.display Display - Display operations interface.recording RecordingService - Screen recording operations interface.@intercept_errors(message_prefix="Failed to start computer use: ")
@with_instrumentation()
def start() -> ComputerUseStartResponse
Starts all computer use processes (Xvfb, xfce4, x11vnc, novnc).
Returns:
ComputerUseStartResponse - Computer use start response.Example:
result = sandbox.computer_use.start()
print("Computer use processes started:", result.message)
@intercept_errors(message_prefix="Failed to stop computer use: ")
@with_instrumentation()
def stop() -> ComputerUseStopResponse
Stops all computer use processes.
Returns:
ComputerUseStopResponse - Computer use stop response.Example:
result = sandbox.computer_use.stop()
print("Computer use processes stopped:", result.message)
@intercept_errors(message_prefix="Failed to get computer use status: ")
@with_instrumentation()
def get_status() -> ComputerUseStatusResponse
Gets the status of all computer use processes.
Returns:
ComputerUseStatusResponse - Status information about all VNC desktop processes.Example:
response = sandbox.computer_use.get_status()
print("Computer use status:", response.status)
@intercept_errors(message_prefix="Failed to get process status: ")
@with_instrumentation()
def get_process_status(process_name: str) -> ProcessStatusResponse
Gets the status of a specific VNC process.
Arguments:
process_name str - Name of the process to check.Returns:
ProcessStatusResponse - Status information about the specific process.Example:
xvfb_status = sandbox.computer_use.get_process_status("xvfb")
no_vnc_status = sandbox.computer_use.get_process_status("novnc")
@intercept_errors(message_prefix="Failed to restart process: ")
@with_instrumentation()
def restart_process(process_name: str) -> ProcessRestartResponse
Restarts a specific VNC process.
Arguments:
process_name str - Name of the process to restart.Returns:
ProcessRestartResponse - Process restart response.Example:
result = sandbox.computer_use.restart_process("xfce4")
print("XFCE4 process restarted:", result.message)
@intercept_errors(message_prefix="Failed to get process logs: ")
@with_instrumentation()
def get_process_logs(process_name: str) -> ProcessLogsResponse
Gets logs for a specific VNC process.
Arguments:
process_name str - Name of the process to get logs for.Returns:
ProcessLogsResponse - Process logs.Example:
logs = sandbox.computer_use.get_process_logs("novnc")
print("NoVNC logs:", logs)
@intercept_errors(message_prefix="Failed to get process errors: ")
@with_instrumentation()
def get_process_errors(process_name: str) -> ProcessErrorsResponse
Gets error logs for a specific VNC process.
Arguments:
process_name str - Name of the process to get error logs for.Returns:
ProcessErrorsResponse - Process error logs.Example:
errors = sandbox.computer_use.get_process_errors("x11vnc")
print("X11VNC errors:", errors)
class Mouse()
Mouse operations for computer use functionality.
@intercept_errors(message_prefix="Failed to get mouse position: ")
@with_instrumentation()
def get_position() -> MousePositionResponse
Gets the current mouse cursor position.
Returns:
MousePositionResponse - Current mouse position with x and y coordinates.Example:
position = sandbox.computer_use.mouse.get_position()
print(f"Mouse is at: {position.x}, {position.y}")
@intercept_errors(message_prefix="Failed to move mouse: ")
@with_instrumentation()
def move(x: int, y: int) -> MousePositionResponse
Moves the mouse cursor to the specified coordinates.
Arguments:
x int - The x coordinate to move to.y int - The y coordinate to move to.Returns:
MousePositionResponse - Position after move.Example:
result = sandbox.computer_use.mouse.move(100, 200)
print(f"Mouse moved to: {result.x}, {result.y}")
@intercept_errors(message_prefix="Failed to click mouse: ")
@with_instrumentation()
def click(x: int,
y: int,
button: str = "left",
double: bool = False) -> MouseClickResponse
Clicks the mouse at the specified coordinates.
Arguments:
x int - The x coordinate to click at.y int - The y coordinate to click at.button str - The mouse button to click ('left', 'right', 'middle').double bool - Whether to perform a double-click.Returns:
MouseClickResponse - Click operation result.Example:
# Single left click
result = sandbox.computer_use.mouse.click(100, 200)
# Double click
double_click = sandbox.computer_use.mouse.click(100, 200, "left", True)
# Right click
right_click = sandbox.computer_use.mouse.click(100, 200, "right")
@intercept_errors(message_prefix="Failed to drag mouse: ")
@with_instrumentation()
def drag(start_x: int,
start_y: int,
end_x: int,
end_y: int,
button: str = "left") -> MouseDragResponse
Drags the mouse from start coordinates to end coordinates.
Arguments:
start_x int - The starting x coordinate.start_y int - The starting y coordinate.end_x int - The ending x coordinate.end_y int - The ending y coordinate.button str - The mouse button to use for dragging.Returns:
MouseDragResponse - Drag operation result.Example:
result = sandbox.computer_use.mouse.drag(50, 50, 150, 150)
print(f"Drag ended at {result.x}, {result.y}")
@intercept_errors(message_prefix="Failed to scroll mouse: ")
@with_instrumentation()
def scroll(x: int, y: int, direction: str, amount: int = 1) -> bool
Scrolls the mouse wheel at the specified coordinates.
Arguments:
x int - The x coordinate to scroll at.y int - The y coordinate to scroll at.direction str - The direction to scroll ('up' or 'down').amount int - The amount to scroll.Returns:
bool - Whether the scroll operation was successful.Example:
# Scroll up
scroll_up = sandbox.computer_use.mouse.scroll(100, 200, "up", 3)
# Scroll down
scroll_down = sandbox.computer_use.mouse.scroll(100, 200, "down", 5)
class Keyboard()
Keyboard operations for computer use functionality.
@intercept_errors(message_prefix="Failed to type text: ")
@with_instrumentation()
def type(text: str, delay: int | None = None) -> None
Types the specified text.
Arguments:
text str - The text to type.delay int - Delay between characters in milliseconds.Raises:
DaytonaError - If the type operation fails.Example:
try:
sandbox.computer_use.keyboard.type("Hello, World!")
print(f"Operation success")
except Exception as e:
print(f"Operation failed: {e}")
# With delay between characters
try:
sandbox.computer_use.keyboard.type("Slow typing", 100)
print(f"Operation success")
except Exception as e:
print(f"Operation failed: {e}")
@intercept_errors(message_prefix="Failed to press key: ")
@with_instrumentation()
def press(key: str, modifiers: list[str] | None = None) -> None
Presses a key with optional modifiers.
Arguments:
key str - The key to press. Canonical names include 'enter', 'escape',
'tab', letters, digits, unshifted punctuation, function keys, and
grammar-safe numpad names such as 'num_plus'. Named keys are
case-insensitive, and common aliases such as 'Return' and 'Escape'
are normalized.modifiers list[str] - Canonical modifier names are 'ctrl', 'alt',
'shift', and 'cmd'. Common aliases such as 'control', 'option',
'meta', and 'win' are normalized.Raises:
DaytonaError - If the press operation fails.Example:
# Press Enter
try:
sandbox.computer_use.keyboard.press("enter")
print(f"Operation success")
except Exception as e:
print(f"Operation failed: {e}")
# Press Ctrl+C
try:
sandbox.computer_use.keyboard.press("c", ["ctrl"])
print(f"Operation success")
# Press Ctrl+Shift+T
try:
sandbox.computer_use.keyboard.press("t", ["ctrl", "shift"])
print(f"Operation success")
except Exception as e:
print(f"Operation failed: {e}")
@intercept_errors(message_prefix="Failed to press hotkey: ")
@with_instrumentation()
def hotkey(keys: str) -> None
Presses a hotkey combination.
Arguments:
keys str - A single atomic hotkey chord (e.g., 'ctrl+c', 'alt+tab',
'cmd+shift+t', 'ctrl + c', 'shift'). Uses the same normalized key
contract as press().Raises:
DaytonaError - If the hotkey operation fails.Example:
# Copy
try:
sandbox.computer_use.keyboard.hotkey("ctrl+c")
print(f"Operation success")
except Exception as e:
print(f"Operation failed: {e}")
# Paste
try:
sandbox.computer_use.keyboard.hotkey("ctrl+v")
print(f"Operation success")
except Exception as e:
print(f"Operation failed: {e}")
# Alt+Tab
try:
sandbox.computer_use.keyboard.hotkey("alt+tab")
print(f"Operation success")
except Exception as e:
print(f"Operation failed: {e}")
class Screenshot()
Screenshot operations for computer use functionality.
@intercept_errors(message_prefix="Failed to take screenshot: ")
@with_instrumentation()
def take_full_screen(show_cursor: bool = False) -> ScreenshotResponse
Takes a screenshot of the entire screen.
Arguments:
show_cursor bool - Whether to show the cursor in the screenshot.Returns:
ScreenshotResponse - Screenshot data with base64 encoded image.Example:
screenshot = sandbox.computer_use.screenshot.take_full_screen()
print(f"Screenshot size: {screenshot.width}x{screenshot.height}")
# With cursor visible
with_cursor = sandbox.computer_use.screenshot.take_full_screen(True)
@intercept_errors(message_prefix="Failed to take region screenshot: ")
@with_instrumentation()
def take_region(region: ScreenshotRegion,
show_cursor: bool = False) -> ScreenshotResponse
Takes a screenshot of a specific region.
Arguments:
region ScreenshotRegion - The region to capture.show_cursor bool - Whether to show the cursor in the screenshot.Returns:
ScreenshotResponse - Screenshot data with base64 encoded image.Example:
region = ScreenshotRegion(x=100, y=100, width=300, height=200)
screenshot = sandbox.computer_use.screenshot.take_region(region)
print(f"Captured region: {screenshot.region.width}x{screenshot.region.height}")
@intercept_errors(message_prefix="Failed to take compressed screenshot: ")
@with_instrumentation()
def take_compressed(
options: ScreenshotOptions | None = None) -> ScreenshotResponse
Takes a compressed screenshot of the entire screen.
Arguments:
options ScreenshotOptions | None - Compression and display options.Returns:
ScreenshotResponse - Compressed screenshot data.Example:
# Default compression
screenshot = sandbox.computer_use.screenshot.take_compressed()
# High quality JPEG
jpeg = sandbox.computer_use.screenshot.take_compressed(
ScreenshotOptions(format="jpeg", quality=95, show_cursor=True)
)
# Scaled down PNG
scaled = sandbox.computer_use.screenshot.take_compressed(
ScreenshotOptions(format="png", scale=0.5)
)
@intercept_errors(
message_prefix="Failed to take compressed region screenshot: ")
@with_instrumentation()
def take_compressed_region(
region: ScreenshotRegion,
options: ScreenshotOptions | None = None) -> ScreenshotResponse
Takes a compressed screenshot of a specific region.
Arguments:
region ScreenshotRegion - The region to capture.options ScreenshotOptions | None - Compression and display options.Returns:
ScreenshotResponse - Compressed screenshot data.Example:
region = ScreenshotRegion(x=0, y=0, width=800, height=600)
screenshot = sandbox.computer_use.screenshot.take_compressed_region(
region,
ScreenshotOptions(format="webp", quality=80, show_cursor=True)
)
print(f"Compressed size: {screenshot.size_bytes} bytes")
class Display()
Display operations for computer use functionality.
@intercept_errors(message_prefix="Failed to get display info: ")
@with_instrumentation()
def get_info() -> DisplayInfoResponse
Gets information about the displays.
Returns:
DisplayInfoResponse - Display information including primary display and all available displays.Example:
info = sandbox.computer_use.display.get_info()
print(f"Primary display: {info.primary_display.width}x{info.primary_display.height}")
print(f"Total displays: {info.total_displays}")
for i, display in enumerate(info.displays):
print(f"Display {i}: {display.width}x{display.height} at {display.x},{display.y}")
@intercept_errors(message_prefix="Failed to get windows: ")
@with_instrumentation()
def get_windows() -> WindowsResponse
Gets the list of open windows.
Returns:
WindowsResponse - List of open windows with their IDs and titles.Example:
windows = sandbox.computer_use.display.get_windows()
print(f"Found {windows.count} open windows:")
for window in windows.windows:
print(f"- {window.title} (ID: {window.id})")
class RecordingService()
Recording operations for computer use functionality.
@intercept_errors(message_prefix="Failed to start recording: ")
@with_instrumentation()
def start(label: str | None = None) -> Recording
Starts a new screen recording session.
Arguments:
label str | None - Optional custom label for the recording.Returns:
Recording - Recording start response.Example:
# Start a recording with a label
recording = sandbox.computer_use.recording.start("my-test-recording")
print(f"Recording started: {recording.id}")
print(f"File: {recording.file_path}")
@intercept_errors(message_prefix="Failed to stop recording: ")
@with_instrumentation()
def stop(recording_id: str) -> Recording
Stops an active screen recording session.
Arguments:
recording_id str - The ID of the recording to stop.Returns:
Recording - Recording stop response.Example:
result = sandbox.computer_use.recording.stop(recording.id)
print(f"Recording stopped: {result.duration_seconds} seconds")
print(f"Saved to: {result.file_path}")
@intercept_errors(message_prefix="Failed to list recordings: ")
@with_instrumentation()
def list() -> ListRecordingsResponse
Lists all recordings (active and completed).
Returns:
ListRecordingsResponse - List of all recordings.Example:
recordings = sandbox.computer_use.recording.list()
print(f"Found {len(recordings.recordings)} recordings")
for rec in recordings.recordings:
print(f"- {rec.file_name}: {rec.status}")
@intercept_errors(message_prefix="Failed to get recording: ")
@with_instrumentation()
def get(recording_id: str) -> Recording
Gets details of a specific recording by ID.
Arguments:
recording_id str - The ID of the recording to retrieve.Returns:
Recording - Recording details.Example:
recording = sandbox.computer_use.recording.get(recording_id)
print(f"Recording: {recording.file_name}")
print(f"Status: {recording.status}")
print(f"Duration: {recording.duration_seconds} seconds")
@intercept_errors(message_prefix="Failed to delete recording: ")
@with_instrumentation()
def delete(recording_id: str) -> None
Deletes a recording by ID.
Arguments:
recording_id str - The ID of the recording to delete.Example:
sandbox.computer_use.recording.delete(recording_id)
print("Recording deleted")
@intercept_errors(message_prefix="Failed to download recording: ")
@with_instrumentation()
def download(recording_id: str, local_path: str) -> None
Downloads a recording file from the Sandbox and saves it to a local file.
The file is streamed directly to disk without loading the entire content into memory.
Arguments:
recording_id str - The ID of the recording to download.local_path str - Path to save the recording file locally.Example:
# Download recording to file
sandbox.computer_use.recording.download(recording_id, "local_recording.mp4")
print("Recording downloaded")
class ScreenshotRegion(BaseModel)
Region coordinates for screenshot operations.
Attributes:
x int - X coordinate of the region.y int - Y coordinate of the region.width int - Width of the region.height int - Height of the region.class ScreenshotOptions(BaseModel)
Options for screenshot compression and display.
Attributes:
show_cursor bool | None - Whether to show the cursor in the screenshot.fmt str | None - Image format (e.g., 'png', 'jpeg', 'webp').quality int | None - Compression quality (0-100).scale float | None - Scale factor for the screenshot.