fix #33: move to o3-mini, add cursor rules, fix window error

ahmedkhaleel2004 · ahmedkhaleel2004 · commit ae7d8a60f607 · 2025-03-06T19:34:45.000-05:00
diff --git a/.cursor/rules/summary.mdc b/.cursor/rules/summary.mdc
@@ -0,0 +1,69 @@
+---
+description: summary of project
+globs: 
+alwaysApply: true
+---
+# GitDiagram Project Summary
+
+## Project Overview
+This is GitDiagram, a web application that converts any GitHub repository structure into an interactive system design/architecture diagram for visualization. It allows users to quickly understand the architecture of any repository by generating visual diagrams, and provides interactivity by letting users click on components to navigate directly to source files and relevant directories.
+
+## Key Features
+- Instant conversion of GitHub repositories into system design diagrams
+- Interactive components that link to source files and directories
+- Support for both public and private repositories (with GitHub token)
+- Customizable diagrams through user instructions
+- URL shortcut: replace `hub` with `diagram` in any GitHub URL to access its diagram
+
+## Tech Stack
+- **Frontend**: Next.js 15, TypeScript, Tailwind CSS, ShadCN UI components
+- **Backend**: FastAPI (Python), Server Actions
+- **Database**: PostgreSQL with Drizzle ORM, Neon Database for serverless PostgreSQL
+- **AI**: Claude 3.5 Sonnet (previously) / OpenAI o3-mini (currently) for diagram generation
+- **Deployment**: Vercel (Frontend), EC2 (Backend)
+- **CI/CD**: GitHub Actions
+- **Analytics**: PostHog, Api-Analytics
+
+## Architecture
+The project follows a modern full-stack architecture:
+
+1. **Frontend (Next.js)**:
+   - Organized using the App Router pattern
+   - Uses server components and server actions
+   - Implements Mermaid.js for rendering diagrams
+   - Provides UI for repository input and diagram customization
+
+2. **Backend (FastAPI)**:
+   - Handles repository data extraction
+   - Implements complex prompt engineering through a pipeline:
+     - First prompt analyzes the repository and creates an explanation
+     - Second prompt maps relevant directories and files to diagram components
+     - Third prompt generates the final Mermaid.js code
+   - Manages API rate limiting and authentication
+
+3. **Database (PostgreSQL)**:
+   - Stores user data, repository information, and generated diagrams
+   - Uses Drizzle ORM for type-safe database operations
+
+4. **AI Integration**:
+   - Uses LLMs to analyze repository structure
+   - Generates detailed diagrams based on file trees and README content
+   - Implements sophisticated prompt engineering to extract accurate information
+
+## Project Structure
+- `/src`: Frontend source code (Next.js) and server actions for db calls with drizzle
+- `/backend`: Python FastAPI backend
+- `/public`: Static assets
+- `/docs`: Documentation and images
+
+## Development Setup
+The project supports both local development and self-hosting:
+- Dependencies managed with pnpm
+- Docker Compose for containerization
+- Environment configuration via .env files
+- Database initialization scripts
+
+## Future Development
+- Implementation of font-awesome icons in diagrams
+- Embedded feature for progressive diagram updates as commits are made
+- Expanded API access for third-party integration
diff --git a/backend/app/routers/generate.py b/backend/app/routers/generate.py
@@ -2,7 +2,7 @@
 from fastapi.responses import StreamingResponse
 from dotenv import load_dotenv
 from app.services.github_service import GitHubService
-from app.services.o1_mini_openai_service import OpenAIO1Service
+from app.services.o3_mini_openai_service import OpenAIo3Service
 from app.prompts import (
     SYSTEM_FIRST_PROMPT,
     SYSTEM_SECOND_PROMPT,
@@ -25,7 +25,7 @@
 
 # Initialize services
 # claude_service = ClaudeService()
-o1_service = OpenAIO1Service()
+o3_service = OpenAIo3Service()
 
 
 # cache github data to avoid double API calls from cost and generate
@@ -65,8 +65,8 @@ async def get_generation_cost(request: Request, body: ApiRequest):
         # file_tree_tokens = claude_service.count_tokens(file_tree)
         # readme_tokens = claude_service.count_tokens(readme)
 
-        file_tree_tokens = o1_service.count_tokens(file_tree)
-        readme_tokens = o1_service.count_tokens(readme)
+        file_tree_tokens = o3_service.count_tokens(file_tree)
+        readme_tokens = o3_service.count_tokens(readme)
 
         # CLAUDE: Calculate approximate cost
         # Input cost: $3 per 1M tokens ($0.000003 per token)
@@ -75,7 +75,6 @@ async def get_generation_cost(request: Request, body: ApiRequest):
         # output_cost = 3500 * 0.000015
         # estimated_cost = input_cost + output_cost
 
-        # O3: Calculate approximate cost temp: o1-mini, same price as o3-mini
         # Input cost: $1.1 per 1M tokens ($0.0000011 per token)
         # Output cost: $4.4 per 1M tokens ($0.0000044 per token)
         input_cost = ((file_tree_tokens * 2 + readme_tokens) + 3000) * 0.0000011
@@ -149,13 +148,13 @@ async def event_generator():
 
                 # Token count check
                 combined_content = f"{file_tree}\n{readme}"
-                token_count = o1_service.count_tokens(combined_content)
+                token_count = o3_service.count_tokens(combined_content)
 
                 if 50000 < token_count < 195000 and not body.api_key:
                     yield f"data: {json.dumps({'error': f'File tree and README combined exceeds token limit (50,000). Current size: {token_count} tokens. This GitHub repository is too large for my wallet, but you can continue by providing your own OpenAI API key.'})}\n\n"
                     return
                 elif token_count > 195000:
-                    yield f"data: {json.dumps({'error': f'Repository is too large (>195k tokens) for analysis. OpenAI o1-mini\'s max context length is 200k tokens. Current size: {token_count} tokens.'})}\n\n"
+                    yield f"data: {json.dumps({'error': f'Repository is too large (>195k tokens) for analysis. OpenAI o3-mini\'s max context length is 200k tokens. Current size: {token_count} tokens.'})}\n\n"
                     return
 
                 # Prepare prompts
@@ -174,18 +173,19 @@ async def event_generator():
                     )
 
                 # Phase 1: Get explanation
-                yield f"data: {json.dumps({'status': 'explanation_sent', 'message': 'Sending explanation request to o1-mini...'})}\n\n"
+                yield f"data: {json.dumps({'status': 'explanation_sent', 'message': 'Sending explanation request to o3-mini...'})}\n\n"
                 await asyncio.sleep(0.1)
                 yield f"data: {json.dumps({'status': 'explanation', 'message': 'Analyzing repository structure...'})}\n\n"
                 explanation = ""
-                async for chunk in o1_service.call_o1_api_stream(
+                async for chunk in o3_service.call_o3_api_stream(
                     system_prompt=first_system_prompt,
                     data={
                         "file_tree": file_tree,
                         "readme": readme,
                         "instructions": body.instructions,
                     },
                     api_key=body.api_key,
+                    reasoning_effort="medium",
                 ):
                     explanation += chunk
                     yield f"data: {json.dumps({'status': 'explanation_chunk', 'chunk': chunk})}\n\n"
@@ -195,14 +195,15 @@ async def event_generator():
                     return
 
                 # Phase 2: Get component mapping
-                yield f"data: {json.dumps({'status': 'mapping_sent', 'message': 'Sending component mapping request to o1-mini...'})}\n\n"
+                yield f"data: {json.dumps({'status': 'mapping_sent', 'message': 'Sending component mapping request to o3-mini...'})}\n\n"
                 await asyncio.sleep(0.1)
                 yield f"data: {json.dumps({'status': 'mapping', 'message': 'Creating component mapping...'})}\n\n"
                 full_second_response = ""
-                async for chunk in o1_service.call_o1_api_stream(
+                async for chunk in o3_service.call_o3_api_stream(
                     system_prompt=SYSTEM_SECOND_PROMPT,
                     data={"explanation": explanation, "file_tree": file_tree},
                     api_key=body.api_key,
+                    reasoning_effort="low",
                 ):
                     full_second_response += chunk
                     yield f"data: {json.dumps({'status': 'mapping_chunk', 'chunk': chunk})}\n\n"
@@ -218,18 +219,19 @@ async def event_generator():
                 ]
 
                 # Phase 3: Generate Mermaid diagram
-                yield f"data: {json.dumps({'status': 'diagram_sent', 'message': 'Sending diagram generation request to o1-mini...'})}\n\n"
+                yield f"data: {json.dumps({'status': 'diagram_sent', 'message': 'Sending diagram generation request to o3-mini...'})}\n\n"
                 await asyncio.sleep(0.1)
                 yield f"data: {json.dumps({'status': 'diagram', 'message': 'Generating diagram...'})}\n\n"
                 mermaid_code = ""
-                async for chunk in o1_service.call_o1_api_stream(
+                async for chunk in o3_service.call_o3_api_stream(
                     system_prompt=third_system_prompt,
                     data={
                         "explanation": explanation,
                         "component_mapping": component_mapping_text,
                         "instructions": body.instructions,
                     },
                     api_key=body.api_key,
+                    reasoning_effort="medium",
                 ):
                     mermaid_code += chunk
                     yield f"data: {json.dumps({'status': 'diagram_chunk', 'chunk': chunk})}\n\n"
diff --git a/backend/app/services/o3_mini_openai_service.py b/backend/app/services/o3_mini_openai_service.py
@@ -0,0 +1,186 @@
+from openai import OpenAI
+from dotenv import load_dotenv
+from app.utils.format_message import format_user_message
+import tiktoken
+import os
+import aiohttp
+import json
+from typing import AsyncGenerator, Literal
+
+load_dotenv()
+
+
+class OpenAIo3Service:
+    def __init__(self):
+        self.default_client = OpenAI(
+            api_key=os.getenv("OPENAI_API_KEY"),
+        )
+        self.encoding = tiktoken.get_encoding("o200k_base")  # Encoder for OpenAI models
+        self.base_url = "https://api.openai.com/v1/chat/completions"
+
+    def call_o3_api(
+        self,
+        system_prompt: str,
+        data: dict,
+        api_key: str | None = None,
+        reasoning_effort: Literal["low", "medium", "high"] = "low",
+    ) -> str:
+        """
+        Makes an API call to OpenAI o3-mini and returns the response.
+
+        Args:
+            system_prompt (str): The instruction/system prompt
+            data (dict): Dictionary of variables to format into the user message
+            api_key (str | None): Optional custom API key
+
+        Returns:
+            str: o3-mini's response text
+        """
+        # Create the user message with the data
+        user_message = format_user_message(data)
+
+        # Use custom client if API key provided, otherwise use default
+        client = OpenAI(api_key=api_key) if api_key else self.default_client
+
+        try:
+            print(
+                f"Making non-streaming API call to o3-mini with API key: {'custom key' if api_key else 'default key'}"
+            )
+
+            completion = client.chat.completions.create(
+                model="o3-mini",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_message},
+                ],
+                max_completion_tokens=12000,  # Adjust as needed
+                temperature=0.2,
+                reasoning_effort=reasoning_effort,
+            )
+
+            print("API call completed successfully")
+
+            if completion.choices[0].message.content is None:
+                raise ValueError("No content returned from OpenAI o3-mini")
+
+            return completion.choices[0].message.content
+
+        except Exception as e:
+            print(f"Error in OpenAI o3-mini API call: {str(e)}")
+            raise
+
+    async def call_o3_api_stream(
+        self,
+        system_prompt: str,
+        data: dict,
+        api_key: str | None = None,
+        reasoning_effort: Literal["low", "medium", "high"] = "low",
+    ) -> AsyncGenerator[str, None]:
+        """
+        Makes a streaming API call to OpenAI o3-mini and yields the responses.
+
+        Args:
+            system_prompt (str): The instruction/system prompt
+            data (dict): Dictionary of variables to format into the user message
+            api_key (str | None): Optional custom API key
+
+        Yields:
+            str: Chunks of o3-mini's response text
+        """
+        # Create the user message with the data
+        user_message = format_user_message(data)
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key or self.default_client.api_key}",
+        }
+
+        # payload = {
+        #     "model": "o3-mini",
+        #     "messages": [
+        #         {
+        #             "role": "user",
+        #             "content": f"""
+        #             <VERY_IMPORTANT_SYSTEM_INSTRUCTIONS>
+        #             {system_prompt}
+        #             </VERY_IMPORTANT_SYSTEM_INSTRUCTIONS>
+        #             <USER_INSTRUCTIONS>
+        #             {user_message}
+        #             </USER_INSTRUCTIONS>
+        #             """,
+        #         },
+        #     ],
+        #     "max_completion_tokens": 12000,
+        #     "stream": True,
+        # }
+
+        payload = {
+            "model": "o3-mini",
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_message},
+            ],
+            "max_completion_tokens": 12000,
+            "stream": True,
+            "reasoning_effort": reasoning_effort,
+        }
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    self.base_url, headers=headers, json=payload
+                ) as response:
+
+                    if response.status != 200:
+                        error_text = await response.text()
+                        print(f"Error response: {error_text}")
+                        raise ValueError(
+                            f"OpenAI API returned status code {response.status}: {error_text}"
+                        )
+
+                    line_count = 0
+                    async for line in response.content:
+                        line = line.decode("utf-8").strip()
+                        if not line:
+                            continue
+
+                        line_count += 1
+
+                        if line.startswith("data: "):
+                            if line == "data: [DONE]":
+                                break
+                            try:
+                                data = json.loads(line[6:])
+                                content = (
+                                    data.get("choices", [{}])[0]
+                                    .get("delta", {})
+                                    .get("content")
+                                )
+                                if content:
+                                    yield content
+                            except json.JSONDecodeError as e:
+                                print(f"JSON decode error: {e} for line: {line}")
+                                continue
+
+                    if line_count == 0:
+                        print("Warning: No lines received in stream response")
+
+        except aiohttp.ClientError as e:
+            print(f"Connection error: {str(e)}")
+            raise ValueError(f"Failed to connect to OpenAI API: {str(e)}")
+        except Exception as e:
+            print(f"Unexpected error in streaming API call: {str(e)}")
+            raise
+
+    def count_tokens(self, prompt: str) -> int:
+        """
+        Counts the number of tokens in a prompt.
+
+        Args:
+            prompt (str): The prompt to count tokens for
+
+        Returns:
+            int: Estimated number of input tokens
+        """
+        num_tokens = len(self.encoding.encode(prompt))
+        return num_tokens
diff --git a/src/components/mermaid-diagram.tsx b/src/components/mermaid-diagram.tsx