Initial commit

Lucas Hild · Lucas Hild · commit 47dc379d11d7 · 2024-12-03T19:08:44.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+
+# Virtual environments
+.venv
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.13
diff --git a/README.md b/README.md
@@ -0,0 +1,115 @@
+# BigQuery MCP server
+
+A Model Context Protocol server that provides access to BigQuery. This server enables LLMs to inspect database schemas and execute queries.
+
+## Components
+
+### Tools
+
+The server implements one tool:
+
+- `execute-query`: Executes a SQL query using BigQuery dialect
+- `list-tables`: Lists all tables in the BigQuery database
+- `describe-table`: Describes the schema of a specific table
+
+## Configuration
+
+The server can be configured with the following arguments:
+
+- `--project` (required): The GCP project ID.
+- `--location` (required): The GCP location (e.g. `europe-west9`).
+- `--dataset` (optional): Only take specific BigQuery datasets into consideration. Several datasets can be specified by repeating the argument (e.g. `--dataset my_dataset_1 --dataset my_dataset_2`). If not provided, all tables in the project will be considered.
+
+## Quickstart
+
+### Install
+
+#### Claude Desktop
+
+On MacOS: `~/Library/Application\ Support/Claude/claude_desktop_config.json`
+On Windows: `%APPDATA%/Claude/claude_desktop_config.json`
+
+<details>
+  <summary>Development/Unpublished Servers Configuration</summary>
+  ```
+  "mcpServers": {
+    "bigquery": {
+      "command": "uv",
+      "args": [
+        "--directory",
+        "{{PATH_TO_REPO}}",
+        "run",
+        "mcp-server-bigquery",
+        "--project",
+        "{{GCP_PROJECT_ID}}",
+        "--location",
+        "{{GCP_LOCATION}}"
+      ]
+    }
+  }
+  ```
+</details>
+
+<details>
+  <summary>Published Servers Configuration</summary>
+  ```
+  "mcpServers": {
+    "bigquery": {
+      "command": "uvx",
+      "args": [
+        "mcp-server-bigquery",
+        "--project",
+        "{{GCP_PROJECT_ID}}",
+        "--location",
+        "{{GCP_LOCATION}}"
+      ]
+    }
+  }
+  ```
+</details>
+
+Replace `{{PATH_TO_REPO}}`, `{{GCP_PROJECT_ID}}`, and `{{GCP_LOCATION}}` with the appropriate values.
+
+## Development
+
+### Building and Publishing
+
+To prepare the package for distribution:
+
+1. Sync dependencies and update lockfile:
+
+```bash
+uv sync
+```
+
+2. Build package distributions:
+
+```bash
+uv build
+```
+
+This will create source and wheel distributions in the `dist/` directory.
+
+3. Publish to PyPI:
+
+```bash
+uv publish
+```
+
+Note: You'll need to set PyPI credentials via environment variables or command flags:
+
+- Token: `--token` or `UV_PUBLISH_TOKEN`
+- Or username/password: `--username`/`UV_PUBLISH_USERNAME` and `--password`/`UV_PUBLISH_PASSWORD`
+
+### Debugging
+
+Since MCP servers run over stdio, debugging can be challenging. For the best debugging
+experience, we strongly recommend using the [MCP Inspector](https://github.com/modelcontextprotocol/inspector).
+
+You can launch the MCP Inspector via [`npm`](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) with this command:
+
+```bash
+npx @modelcontextprotocol/inspector uv --directory {{PATH_TO_REPO}} run mcp-server-bigquery
+```
+
+Upon launching, the Inspector will display a URL that you can access in your browser to begin debugging.
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "mcp-server-bigquery"
+version = "0.2.0"
+description = "A Model Context Protocol server that provides access to BigQuery. This server enables LLMs to inspect database schemas and execute queries."
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+ "google-cloud-bigquery>=3.27.0",
+ "mcp>=1.0.0",
+]
+[[project.authors]]
+name = "Lucas Hild"
+email = ""
+
+[build-system]
+requires = [ "hatchling",]
+build-backend = "hatchling.build"
+
+[project.scripts]
+mcp-server-bigquery = "mcp_server_bigquery:main"
diff --git a/src/mcp_server_bigquery/__init__.py b/src/mcp_server_bigquery/__init__.py
@@ -0,0 +1,17 @@
+from . import server
+import asyncio
+import argparse
+def main():
+    """Main entry point for the package."""
+    parser = argparse.ArgumentParser(description='BigQuery MCP Server')
+    parser.add_argument('--project', help='BigQuery project', required=False)
+    parser.add_argument('--location', help='BigQuery location', required=False)
+    parser.add_argument('--dataset', help='BigQuery dataset', required=False, action='append')
+    
+    args = parser.parse_args()
+
+    datasets_filter = args.dataset if args.dataset else []
+    asyncio.run(server.main(args.project, args.location, datasets_filter))
+
+# Optionally expose other important items at package level
+__all__ = ['main', 'server']
diff --git a/src/mcp_server_bigquery/server.py b/src/mcp_server_bigquery/server.py
@@ -0,0 +1,182 @@
+from google.cloud import bigquery
+import logging
+from mcp.server.models import InitializationOptions
+import mcp.types as types
+from mcp.server import NotificationOptions, Server
+import mcp.server.stdio
+from typing import Any
+
+# Set up logging to both stdout and file
+logger = logging.getLogger('mcp_bigquery_server')
+handler_stdout = logging.StreamHandler()
+handler_file = logging.FileHandler('/tmp/mcp_bigquery_server.log')
+
+# Set format for both handlers
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+handler_stdout.setFormatter(formatter)
+handler_file.setFormatter(formatter)
+
+# Add both handlers to logger
+logger.addHandler(handler_stdout)
+logger.addHandler(handler_file)
+
+# Set overall logging level
+logger.setLevel(logging.DEBUG)
+
+logger.info("Starting MCP BigQuery Server")
+
+class BigQueryDatabase:
+    def __init__(self, project: str, location: str, datasets_filter: list[str]):
+        """Initialize a BigQuery database client"""
+        if not project:
+            raise ValueError("Project is required")
+        if not location:
+            raise ValueError("Location is required")
+
+        self.client = bigquery.Client(project=project, location=location)
+        self.datasets_filter = datasets_filter
+
+    def execute_query(self, query: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
+        """Execute a SQL query and return results as a list of dictionaries"""
+        logger.debug(f"Executing query: {query}")
+        try:
+            if params:
+                job = self.client.query(query, job_config=bigquery.QueryJobConfig(query_parameters=params))
+            else:
+                job = self.client.query(query)
+                
+            results = job.result()
+            rows = [dict(row.items()) for row in results]
+            logger.debug(f"Query returned {len(rows)} rows")
+            return rows
+        except Exception as e:
+            logger.error(f"Database error executing query: {e}")
+            raise
+    
+    def list_tables(self) -> list[str]:
+        """List all tables in the BigQuery database"""
+        logger.debug("Listing all tables")
+
+        if self.datasets_filter:
+            datasets = [self.client.dataset(dataset) for dataset in self.datasets_filter]
+        else:
+            datasets = list(self.client.list_datasets())
+
+        logger.debug(f"Found {len(datasets)} datasets")
+
+        tables = []
+        for dataset in datasets:
+            dataset_tables = self.client.list_tables(dataset.dataset_id)
+            tables.extend([
+                f"{dataset.dataset_id}.{table.table_id}" for table in dataset_tables
+            ])
+
+        logger.debug(f"Found {len(tables)} tables")
+        return tables
+
+    def describe_table(self, table_name: str) -> list[dict[str, Any]]:
+        """Describe a table in the BigQuery database"""
+        logger.debug(f"Describing table: {table_name}")
+
+        parts = table_name.split(".")
+        if len(parts) != 2:
+            raise ValueError(f"Invalid table name: {table_name}")
+
+        dataset_id = parts[0]
+        table_id = parts[1]
+
+        query = f"""
+            SELECT ddl
+            FROM {dataset_id}.INFORMATION_SCHEMA.TABLES
+            WHERE table_name = @table_name;
+        """
+        return self.execute_query(query, params=[
+            bigquery.ScalarQueryParameter("table_name", "STRING", table_id),
+        ])
+
+async def main(project: str, location: str, datasets_filter: list[str]):
+    logger.info(f"Starting BigQuery MCP Server with project: {project} and location: {location}")
+
+    db = BigQueryDatabase(project, location, datasets_filter)
+    server = Server("bigquery-manager")
+
+    # Register handlers
+    logger.debug("Registering handlers")
+
+    @server.list_tools()
+    async def handle_list_tools() -> list[types.Tool]:
+        """List available tools"""
+        return [
+            types.Tool(
+                name="execute-query",
+                description="Execute a SELECT query on the BigQuery database",
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "query": {"type": "string", "description": "SELECT SQL query to execute using BigQuery dialect"},
+                    },
+                    "required": ["query"],
+                },
+            ),
+            types.Tool(
+                name="list-tables",
+                description="List all tables in the BigQuery database",
+                inputSchema={
+                    "type": "object",
+                    "properties": {},
+                },
+            ),
+            types.Tool(
+                name="describe-table",
+                description="Get the schema information for a specific table",
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "table_name": {"type": "string", "description": "Name of the table to describe (e.g. my_dataset.my_table)"},
+                    },
+                    "required": ["table_name"],
+                },
+            ),
+        ]
+
+    @server.call_tool()
+    async def handle_call_tool(
+        name: str, arguments: dict[str, Any] | None
+    ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
+        """Handle tool execution requests"""
+        logger.debug(f"Handling tool execution request: {name}")
+
+        try:
+            if name == "list-tables":
+                results = db.list_tables()
+                return [types.TextContent(type="text", text=str(results))]
+
+            elif name == "describe-table":
+                if not arguments or "table_name" not in arguments:
+                    raise ValueError("Missing table_name argument")
+                results = db.describe_table(arguments["table_name"])
+                return [types.TextContent(type="text", text=str(results))]
+
+            if name == "execute-query":
+                results = db.execute_query(arguments["query"])
+                return [types.TextContent(type="text", text=str(results))]
+
+            else:
+                raise ValueError(f"Unknown tool: {name}")
+        except Exception as e:
+            return [types.TextContent(type="text", text=f"Error: {str(e)}")]
+
+    async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
+        logger.info("Server running with stdio transport")
+        await server.run(
+            read_stream,
+            write_stream,
+            InitializationOptions(
+                server_name="bigquery",
+                server_version="0.2.0",
+                capabilities=server.get_capabilities(
+                    notification_options=NotificationOptions(),
+                    experimental_capabilities={},
+                ),
+            ),
+        )
diff --git a/uv.lock b/uv.lock