From af8bbf3fc8db46feb917ed82101aba4abd508c00 Mon Sep 17 00:00:00 2001 From: Samantha Barron Date: Sun, 23 Mar 2025 02:21:08 +0000 Subject: [PATCH 01/10] add sleep option from env var --- server/server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/server.py b/server/server.py index 301e585..69aaa7d 100644 --- a/server/server.py +++ b/server/server.py @@ -421,6 +421,8 @@ async def call_tool( ) # Return task ID immediately with explicit sleep instruction + if (_sleep_interval := int(os.environ.get("SLEEP_INTERVAL", 0))): + await asyncio.sleep(_sleep_interval) return [ types.TextContent( type="text", From b3f2c2cab6f002317f669be517b3c0c55816a490 Mon Sep 17 00:00:00 2001 From: Samantha Barron Date: Sun, 23 Mar 2025 02:25:22 +0000 Subject: [PATCH 02/10] lint --- server/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server.py b/server/server.py index 69aaa7d..ecc734c 100644 --- a/server/server.py +++ b/server/server.py @@ -421,7 +421,7 @@ async def call_tool( ) # Return task ID immediately with explicit sleep instruction - if (_sleep_interval := int(os.environ.get("SLEEP_INTERVAL", 0))): + if _sleep_interval := int(os.environ.get("SLEEP_INTERVAL", 0)): await asyncio.sleep(_sleep_interval) return [ types.TextContent( From bdc07037bfe67485f26dcbcea84381f2c095d6eb Mon Sep 17 00:00:00 2001 From: Samantha Barron Date: Sun, 23 Mar 2025 02:41:01 +0000 Subject: [PATCH 03/10] await task instead --- server/server.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/server/server.py b/server/server.py index ecc734c..8afdd2d 100644 --- a/server/server.py +++ b/server/server.py @@ -408,7 +408,7 @@ async def call_tool( } # Start task in background - asyncio.create_task( + _task = asyncio.create_task( run_browser_task_async( task_id=task_id, url=arguments["url"], @@ -420,9 +420,11 @@ async def call_tool( ) ) + # If PATIENT is set, wait for the task to complete + if os.environ.get("PATIENT", None) == "true": + await _task + # Return task ID immediately with explicit sleep instruction - if _sleep_interval := int(os.environ.get("SLEEP_INTERVAL", 0)): - await asyncio.sleep(_sleep_interval) return [ types.TextContent( type="text", @@ -460,6 +462,7 @@ async def call_tool( ] # Get the current task data + await _sleepy() task_data = task_store[task_id].copy() # If task is still running, add simple guidance From ace13a530dfc89ddccb9180eeed07433531a40d4 Mon Sep 17 00:00:00 2001 From: Samantha Barron Date: Sun, 23 Mar 2025 02:42:52 +0000 Subject: [PATCH 04/10] oops --- server/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server/server.py b/server/server.py index 8afdd2d..1014604 100644 --- a/server/server.py +++ b/server/server.py @@ -462,7 +462,6 @@ async def call_tool( ] # Get the current task data - await _sleepy() task_data = task_store[task_id].copy() # If task is still running, add simple guidance From 1e416bc26033c6b920304a22ff67c5d428af17c4 Mon Sep 17 00:00:00 2001 From: Samantha Barron Date: Sun, 23 Mar 2025 02:52:57 +0000 Subject: [PATCH 05/10] readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0a68e8a..3592ddc 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ uv run server --port 8000 ``` OPENAI_API_KEY=[your api key] CHROME_PATH=[only change this if you have a custom chrome build] +PATIENT=false # Set to true if you want api calls to wait for tasks to complete (default is false) ``` - we will be adding support for other LLM providers to power browser-use From 1aeec10f6ff12a1833fde1cac057766133dfb6ef Mon Sep 17 00:00:00 2001 From: Michel Osswald Date: Thu, 27 Mar 2025 13:44:04 -0700 Subject: [PATCH 06/10] feat: Add PATIENT mode for fully synchronous browser operations Implemented PATIENT mode that allows browser tasks to complete synchronously, with proper error handling and configuration. When enabled, browser_use tool returns complete results directly. Updated tool descriptions to clearly indicate behavior in each mode. --- .env.example | 5 +- server/server.py | 166 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 135 insertions(+), 36 deletions(-) diff --git a/.env.example b/.env.example index fc7f5e5..6226a80 100644 --- a/.env.example +++ b/.env.example @@ -2,4 +2,7 @@ CHROME_PATH= # OpenAI API key for OpenAI model access -OPENAI_API_KEY=your-api-key-here \ No newline at end of file +OPENAI_API_KEY=your-api-key-here + +# Set to true if you want api calls to wait for tasks to complete (default is false) +PATIENT=false \ No newline at end of file diff --git a/server/server.py b/server/server.py index 1014604..8472957 100644 --- a/server/server.py +++ b/server/server.py @@ -44,7 +44,26 @@ load_dotenv() -def init_configuration() -> Dict[str, any]: +def parse_bool_env(env_var: str, default: bool = False) -> bool: + """ + Parse a boolean environment variable. + + Args: + env_var: The environment variable name + default: Default value if not set + + Returns: + Boolean value of the environment variable + """ + value = os.environ.get(env_var) + if value is None: + return default + + # Consider various representations of boolean values + return value.lower() in ("true", "yes", "1", "y", "on") + + +def init_configuration() -> Dict[str, Any]: """ Initialize configuration from environment variables with defaults. @@ -78,6 +97,8 @@ def init_configuration() -> Dict[str, any]: "--disable-dev-shm-usage", "--remote-debugging-port=0", # Use random port to avoid conflicts ], + # Patient mode - if true, functions wait for task completion before returning + "PATIENT_MODE": parse_bool_env("PATIENT", False), } return config @@ -163,6 +184,9 @@ async def run_browser_task_async( This function executes a browser automation task with the given URL and action, and updates the task store with progress and results. + When PATIENT_MODE is enabled, the calling function will wait for this function + to complete before returning to the client. + Args: task_id: Unique identifier for the task url: URL to navigate to @@ -382,7 +406,9 @@ async def call_tool( arguments: The arguments to pass to the tool Returns: - A list of content objects to return to the client + A list of content objects to return to the client. + When PATIENT_MODE is enabled, the browser_use tool will wait for the task to complete + and return the full result immediately instead of just the task ID. Raises: ValueError: If required arguments are missing @@ -421,8 +447,36 @@ async def call_tool( ) # If PATIENT is set, wait for the task to complete - if os.environ.get("PATIENT", None) == "true": - await _task + if CONFIG["PATIENT_MODE"]: + try: + await _task + # Return the completed task result instead of just the ID + task_data = task_store[task_id] + if task_data["status"] == "failed": + logger.error( + f"Task {task_id} failed: {task_data.get('error', 'Unknown error')}" + ) + return [ + types.TextContent( + type="text", + text=json.dumps(task_data, indent=2), + ) + ] + except Exception as e: + logger.error(f"Error in patient mode execution: {str(e)}") + traceback_str = traceback.format_exc() + # Update task store with error + task_store[task_id]["status"] = "failed" + task_store[task_id]["error"] = str(e) + task_store[task_id]["traceback"] = traceback_str + task_store[task_id]["end_time"] = datetime.now().isoformat() + # Return error information + return [ + types.TextContent( + type="text", + text=json.dumps(task_store[task_id], indent=2), + ) + ] # Return task ID immediately with explicit sleep instruction return [ @@ -501,43 +555,85 @@ async def list_tools() -> list[types.Tool]: """ List the available tools for the MCP client. + Returns different tool descriptions based on the PATIENT_MODE configuration. + When PATIENT_MODE is enabled, the browser_use tool description indicates it returns + complete results directly. When disabled, it indicates async operation. + Returns: - A list of tool definitions + A list of tool definitions appropriate for the current configuration """ - return [ - types.Tool( - name="browser_use", - description="Performs a browser action and returns a task ID for async execution", - inputSchema={ - "type": "object", - "required": ["url", "action"], - "properties": { - "url": { - "type": "string", - "description": "URL to navigate to", + patient_mode = CONFIG["PATIENT_MODE"] + + if patient_mode: + return [ + types.Tool( + name="browser_use", + description="Performs a browser action and returns the complete result directly (patient mode active)", + inputSchema={ + "type": "object", + "required": ["url", "action"], + "properties": { + "url": { + "type": "string", + "description": "URL to navigate to", + }, + "action": { + "type": "string", + "description": "Action to perform in the browser", + }, }, - "action": { - "type": "string", - "description": "Action to perform in the browser", + }, + ), + types.Tool( + name="browser_get_result", + description="Gets the result of an asynchronous browser task (not needed in patient mode as browser_use returns complete results directly)", + inputSchema={ + "type": "object", + "required": ["task_id"], + "properties": { + "task_id": { + "type": "string", + "description": "ID of the task to get results for", + } }, }, - }, - ), - types.Tool( - name="browser_get_result", - description="Gets the result of an asynchronous browser task", - inputSchema={ - "type": "object", - "required": ["task_id"], - "properties": { - "task_id": { - "type": "string", - "description": "ID of the task to get results for", - } + ), + ] + else: + return [ + types.Tool( + name="browser_use", + description="Performs a browser action and returns a task ID for async execution", + inputSchema={ + "type": "object", + "required": ["url", "action"], + "properties": { + "url": { + "type": "string", + "description": "URL to navigate to", + }, + "action": { + "type": "string", + "description": "Action to perform in the browser", + }, + }, }, - }, - ), - ] + ), + types.Tool( + name="browser_get_result", + description="Gets the result of an asynchronous browser task", + inputSchema={ + "type": "object", + "required": ["task_id"], + "properties": { + "task_id": { + "type": "string", + "description": "ID of the task to get results for", + } + }, + }, + ), + ] @app.list_resources() async def list_resources() -> list[types.Resource]: From 65c121b6cf6f193452ee8605f43b942f030b3d48 Mon Sep 17 00:00:00 2001 From: Tobias Date: Thu, 27 Mar 2025 23:10:04 +0100 Subject: [PATCH 07/10] fix: Update CI workflow to use PAT for package authentication --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d01ecd6..41afada 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,7 +79,7 @@ jobs: permissions: contents: read - packages: write + # packages: write # attestations: write id-token: write @@ -92,7 +92,7 @@ jobs: with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + password: ${{ secrets.COBROWSER_PACKAGE_TOKEN }} - name: Extract metadata (tags, labels) for Docker id: meta From 22347f7ffafc087bd43e89ca5b7e379d0eb12353 Mon Sep 17 00:00:00 2001 From: Tobias Date: Thu, 27 Mar 2025 23:13:51 +0100 Subject: [PATCH 08/10] fix: Update CI workflow to use secrets for Docker registry authentication --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41afada..79c1b53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,7 +91,7 @@ jobs: uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} + username: ${{ secrets.COBROWSER_PACKAGE_USER }} password: ${{ secrets.COBROWSER_PACKAGE_TOKEN }} - name: Extract metadata (tags, labels) for Docker From 7ddfdbaa8f29812ca3e01fc84583945914520a10 Mon Sep 17 00:00:00 2001 From: Tobias Date: Thu, 27 Mar 2025 23:18:41 +0100 Subject: [PATCH 09/10] fix: Docker registry login step in CI workflow --- .github/workflows/ci.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79c1b53..35acd9b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,12 +87,8 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - - name: Log in to the Container registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ secrets.COBROWSER_PACKAGE_USER }} - password: ${{ secrets.COBROWSER_PACKAGE_TOKEN }} + - name: Log in to registry + run: echo "${{ secrets.COBROWSER_PACKAGE_TOKEN }}" | docker login ghcr.io -u ${{ secrets.COBROWSER_PACKAGE_USER }} --password-stdin - name: Extract metadata (tags, labels) for Docker id: meta From cfba72271358dcc86fdf67a9720355ee32b9171b Mon Sep 17 00:00:00 2001 From: Tobias Date: Thu, 27 Mar 2025 23:21:55 +0100 Subject: [PATCH 10/10] fix: Change to tty-less login --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 35acd9b..34e9945 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,7 +88,7 @@ jobs: uses: actions/checkout@v4 - name: Log in to registry - run: echo "${{ secrets.COBROWSER_PACKAGE_TOKEN }}" | docker login ghcr.io -u ${{ secrets.COBROWSER_PACKAGE_USER }} --password-stdin + run: docker login ghcr.io -u "${{ secrets.COBROWSER_PACKAGE_USER }}" --password-stdin <<< "${{ secrets.COBROWSER_PACKAGE_TOKEN }}" - name: Extract metadata (tags, labels) for Docker id: meta