Skip to content

Add option to await browser tasks via env var #19

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 27, 2025
5 changes: 4 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -2,4 +2,7 @@
CHROME_PATH=

# OpenAI API key for OpenAI model access
OPENAI_API_KEY=your-api-key-here
OPENAI_API_KEY=your-api-key-here

# Set to true if you want api calls to wait for tasks to complete (default is false)
PATIENT=false
10 changes: 3 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -79,20 +79,16 @@ jobs:

permissions:
contents: read
packages: write
# packages: write
# attestations: write
id-token: write

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Log in to registry
run: docker login ghcr.io -u "${{ secrets.COBROWSER_PACKAGE_USER }}" --password-stdin <<< "${{ secrets.COBROWSER_PACKAGE_TOKEN }}"

- name: Extract metadata (tags, labels) for Docker
id: meta
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@ uv run server --port 8000
```
OPENAI_API_KEY=[your api key]
CHROME_PATH=[only change this if you have a custom chrome build]
PATIENT=false # Set to true if you want api calls to wait for tasks to complete (default is false)
```

- we will be adding support for other LLM providers to power browser-use
168 changes: 134 additions & 34 deletions server/server.py
Original file line number Diff line number Diff line change
@@ -44,7 +44,26 @@
load_dotenv()


def init_configuration() -> Dict[str, any]:
def parse_bool_env(env_var: str, default: bool = False) -> bool:
"""
Parse a boolean environment variable.

Args:
env_var: The environment variable name
default: Default value if not set

Returns:
Boolean value of the environment variable
"""
value = os.environ.get(env_var)
if value is None:
return default

# Consider various representations of boolean values
return value.lower() in ("true", "yes", "1", "y", "on")


def init_configuration() -> Dict[str, Any]:
"""
Initialize configuration from environment variables with defaults.

@@ -78,6 +97,8 @@ def init_configuration() -> Dict[str, any]:
"--disable-dev-shm-usage",
"--remote-debugging-port=0", # Use random port to avoid conflicts
],
# Patient mode - if true, functions wait for task completion before returning
"PATIENT_MODE": parse_bool_env("PATIENT", False),
}

return config
@@ -163,6 +184,9 @@ async def run_browser_task_async(
This function executes a browser automation task with the given URL and action,
and updates the task store with progress and results.

When PATIENT_MODE is enabled, the calling function will wait for this function
to complete before returning to the client.

Args:
task_id: Unique identifier for the task
url: URL to navigate to
@@ -382,7 +406,9 @@ async def call_tool(
arguments: The arguments to pass to the tool

Returns:
A list of content objects to return to the client
A list of content objects to return to the client.
When PATIENT_MODE is enabled, the browser_use tool will wait for the task to complete
and return the full result immediately instead of just the task ID.

Raises:
ValueError: If required arguments are missing
@@ -408,7 +434,7 @@ async def call_tool(
}

# Start task in background
asyncio.create_task(
_task = asyncio.create_task(
run_browser_task_async(
task_id=task_id,
url=arguments["url"],
@@ -420,6 +446,38 @@ async def call_tool(
)
)

# If PATIENT is set, wait for the task to complete
if CONFIG["PATIENT_MODE"]:
try:
await _task
# Return the completed task result instead of just the ID
task_data = task_store[task_id]
if task_data["status"] == "failed":
logger.error(
f"Task {task_id} failed: {task_data.get('error', 'Unknown error')}"
)
return [
types.TextContent(
type="text",
text=json.dumps(task_data, indent=2),
)
]
except Exception as e:
logger.error(f"Error in patient mode execution: {str(e)}")
traceback_str = traceback.format_exc()
# Update task store with error
task_store[task_id]["status"] = "failed"
task_store[task_id]["error"] = str(e)
task_store[task_id]["traceback"] = traceback_str
task_store[task_id]["end_time"] = datetime.now().isoformat()
# Return error information
return [
types.TextContent(
type="text",
text=json.dumps(task_store[task_id], indent=2),
)
]

# Return task ID immediately with explicit sleep instruction
return [
types.TextContent(
@@ -497,43 +555,85 @@ async def list_tools() -> list[types.Tool]:
"""
List the available tools for the MCP client.

Returns different tool descriptions based on the PATIENT_MODE configuration.
When PATIENT_MODE is enabled, the browser_use tool description indicates it returns
complete results directly. When disabled, it indicates async operation.

Returns:
A list of tool definitions
A list of tool definitions appropriate for the current configuration
"""
return [
types.Tool(
name="browser_use",
description="Performs a browser action and returns a task ID for async execution",
inputSchema={
"type": "object",
"required": ["url", "action"],
"properties": {
"url": {
"type": "string",
"description": "URL to navigate to",
patient_mode = CONFIG["PATIENT_MODE"]

if patient_mode:
return [
types.Tool(
name="browser_use",
description="Performs a browser action and returns the complete result directly (patient mode active)",
inputSchema={
"type": "object",
"required": ["url", "action"],
"properties": {
"url": {
"type": "string",
"description": "URL to navigate to",
},
"action": {
"type": "string",
"description": "Action to perform in the browser",
},
},
"action": {
"type": "string",
"description": "Action to perform in the browser",
},
),
types.Tool(
name="browser_get_result",
description="Gets the result of an asynchronous browser task (not needed in patient mode as browser_use returns complete results directly)",
inputSchema={
"type": "object",
"required": ["task_id"],
"properties": {
"task_id": {
"type": "string",
"description": "ID of the task to get results for",
}
},
},
),
]
else:
return [
types.Tool(
name="browser_use",
description="Performs a browser action and returns a task ID for async execution",
inputSchema={
"type": "object",
"required": ["url", "action"],
"properties": {
"url": {
"type": "string",
"description": "URL to navigate to",
},
"action": {
"type": "string",
"description": "Action to perform in the browser",
},
},
},
},
),
types.Tool(
name="browser_get_result",
description="Gets the result of an asynchronous browser task",
inputSchema={
"type": "object",
"required": ["task_id"],
"properties": {
"task_id": {
"type": "string",
"description": "ID of the task to get results for",
}
),
types.Tool(
name="browser_get_result",
description="Gets the result of an asynchronous browser task",
inputSchema={
"type": "object",
"required": ["task_id"],
"properties": {
"task_id": {
"type": "string",
"description": "ID of the task to get results for",
}
},
},
},
),
]
),
]

@app.list_resources()
async def list_resources() -> list[types.Resource]: