Merge pull request #18 from jlowin/linked

jlowin · web-flow · commit 373062fa92ce · 2024-12-02T20:32:29.000-05:00
Include linked items
diff --git a/src/ai_labeler/ai.py b/src/ai_labeler/ai.py
@@ -32,9 +32,20 @@ def validate_labels(result: list[str]):
 
     class Reasoning(BaseModel):
         label_name: str
-        # reasoning: str
         should_apply: bool
 
+    # Format linked items for context
+    linked_items_context = ""
+    if item.linked_items:
+        linked_items_context = "\nLinked Items:\n"
+        for linked in item.linked_items:
+            linked_items_context += f"""
+            {linked.type.replace('_', ' ').title()} #{linked.number}:
+            Title: {linked.title}
+            Labels: {', '.join(linked.labels)}
+            Body: {linked.body}
+            """
+
     reasoning = cf.run(
         """
         Consider the provided PR/issue, its context, and any provided
@@ -54,6 +65,11 @@ class Reasoning(BaseModel):
         
         You do not need to return reasoning about labels that are obviously
         irrelevant.
+
+        When evaluating labels, consider any linked items and their context:
+        - Look for patterns or relationships between the current item and linked items
+        - Consider if linked items provide additional context about the scope or impact
+        - Check if linked items have relevant labels that could inform this decision
         """,
         instructions=instructions,
         result_type=list[Reasoning],
@@ -62,6 +78,7 @@ class Reasoning(BaseModel):
             "available_labels": dict(enumerate(labels)),
             "additional_context": context_files,
             "labeling_instructions": instructions,
+            "linked_items_context": linked_items_context,
         },
         agents=[labeler],
         completion_tools=["SUCCEED"],  # the task can not be marked as failed
diff --git a/src/ai_labeler/github.py b/src/ai_labeler/github.py
@@ -1,22 +1,35 @@
 import os
 import json
-from typing import Optional
+import re
+from typing import Optional, List
 from github import Github
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from .config_parser import Config
 
 
+class LinkedItem(BaseModel):
+    """Represents a linked GitHub issue or PR"""
+
+    number: int
+    title: str
+    body: str
+    labels: List[str]
+    type: str = Field(..., pattern="^(issue|pull_request)$")
+
+
 class PullRequest(BaseModel):
     title: str
     body: str
     files: dict[str, str | None]
     author: str  # GitHub username
+    linked_items: List[LinkedItem] = Field(default_factory=list)
 
 
 class Issue(BaseModel):
     title: str
     body: str
     author: str  # GitHub username
+    linked_items: List[LinkedItem] = Field(default_factory=list)
 
 
 class Label(BaseModel):
@@ -135,3 +148,48 @@ def get_available_labels_from_config(
         labels.append(label)
 
     return labels
+
+
+def parse_github_links(text: str) -> List[int]:
+    """Extract GitHub issue/PR numbers from text using common formats:
+    - #123
+    - repo#123
+    - org/repo#123
+    """
+    # Match #123 format
+    numbers = set()
+
+    # Basic #123 format
+    matches = re.finditer(r"(?:^|\s)#(\d+)(?:\s|$)", text)
+    numbers.update(int(m.group(1)) for m in matches)
+
+    # org/repo#123 or repo#123 format (only care about numbers in current repo)
+    matches = re.finditer(r"(?:[\w-]+/)?[\w-]+#(\d+)", text)
+    numbers.update(int(m.group(1)) for m in matches)
+
+    return sorted(numbers)
+
+
+def fetch_linked_items(gh_client: Github, numbers: List[int]) -> List[LinkedItem]:
+    """Fetch full context of linked issues/PRs"""
+    repo = gh_client.get_repo(os.getenv("GITHUB_REPOSITORY"))
+    linked_items = []
+
+    for number in numbers:
+        try:
+            issue = repo.get_issue(number)
+            item_type = "pull_request" if issue.pull_request else "issue"
+
+            linked_items.append(
+                LinkedItem(
+                    number=number,
+                    title=issue.title,
+                    body=issue.body or "",
+                    labels=[label.name for label in issue.labels],
+                    type=item_type,
+                )
+            )
+        except Exception as e:
+            print(f"Warning: Failed to fetch item #{number}: {e}")
+
+    return linked_items
diff --git a/src/ai_labeler/label_workflow.py b/src/ai_labeler/label_workflow.py
@@ -9,6 +9,8 @@
     PullRequest,
     Issue,
     get_event_number,
+    parse_github_links,
+    fetch_linked_items,
 )
 from ai_labeler.ai import labeling_workflow
 
@@ -67,17 +69,28 @@ def run_label_workflow(
     issue = repo.get_issue(number)
     if issue.pull_request:
         pr = repo.get_pull(number)
+
+        # Parse and fetch linked items
+        linked_numbers = parse_github_links(pr.body or "")
+        linked_items = fetch_linked_items(gh, linked_numbers)
+
         item = PullRequest(
             title=pr.title,
             body=pr.body or "",
             files={f.filename: f.patch for f in pr.get_files()},
             author=pr.user.login,
+            linked_items=linked_items,
         )
     else:
+        # Parse and fetch linked items
+        linked_numbers = parse_github_links(issue.body or "")
+        linked_items = fetch_linked_items(gh, linked_numbers)
+
         item = Issue(
             title=issue.title,
             body=issue.body or "",
             author=issue.user.login,
+            linked_items=linked_items,
         )
 
     # Run the labeling workflow