Skip to content

Commit 3f7b4e2

Browse files
committed
fix typing
1 parent e4b408f commit 3f7b4e2

24 files changed

+1156
-176
lines changed

.pre-commit-config.yaml

+7-6
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@ repos:
55
- id: ruff-format
66
- id: ruff
77
args: [--fix, --exit-non-zero-on-fix]
8-
- repo: https://github.com/pre-commit/mirrors-mypy
9-
rev: v1.8.0
8+
- repo: local
109
hooks:
11-
- id: mypy
12-
additional_dependencies:
13-
- "aiofiles"
14-
- "types-aiofiles"
10+
- id: typecheck
11+
name: Typecheck
12+
entry: uv run mypy .
13+
types: [python]
14+
language: system
15+
pass_filenames: false

examples/chat_with_X/repo.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99

1010
import asyncio
1111
import warnings
12+
from typing import Any
1213

1314
import httpx
14-
from marvin.beta.assistants import Assistant
15+
from marvin.beta.assistants import Assistant # type: ignore
1516
from prefect import flow, task
17+
from prefect.context import TaskRunContext
1618
from rich.status import Status
1719

1820
from raggy.documents import Document
@@ -22,7 +24,9 @@
2224
TPUF_NS = "demo"
2325

2426

25-
def get_last_commit_sha(context, parameters) -> str | None:
27+
def get_last_commit_sha(
28+
context: TaskRunContext, parameters: dict[str, Any]
29+
) -> str | None:
2630
"""Cache based on Last-Modified header of the first URL."""
2731
try:
2832
return httpx.get(
@@ -47,10 +51,10 @@ async def ingest_repo(repo: str):
4751
Args:
4852
repo: The repository to ingest (format: "owner/repo").
4953
"""
50-
documents = await gather_documents(repo)
54+
documents: list[Document] = await gather_documents(repo) # type: ignore
5155
with TurboPuffer(namespace=TPUF_NS) as tpuf:
52-
print(f"Upserting {len(documents)} documents into {TPUF_NS}")
53-
await task(tpuf.upsert_batched)(documents)
56+
print(f"Upserting {len(documents)} documents into {TPUF_NS}") # type: ignore
57+
await task(tpuf.upsert_batched)(documents) # type: ignore
5458

5559

5660
@task(task_run_name="querying: {query_texts}")
@@ -84,7 +88,7 @@ async def chat_with_repo(initial_message: str | None = None, clean_up: bool = Tr
8488
ingest_repo,
8589
do_research,
8690
],
87-
) as assistant:
91+
) as assistant: # type: ignore
8892
assistant.chat(initial_message=initial_message) # type: ignore
8993

9094
finally:

examples/chat_with_X/website.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
import re
1212
import warnings
1313
from datetime import timedelta
14+
from typing import Any
1415

1516
import httpx
16-
from marvin.beta.assistants import Assistant
17+
from marvin.beta.assistants import Assistant # type: ignore
1718
from prefect import flow, task
19+
from prefect.context import TaskRunContext
1820
from rich.status import Status
1921

2022
from raggy.documents import Document
@@ -24,7 +26,9 @@
2426
TPUF_NS = "demo"
2527

2628

27-
def get_last_modified(context, parameters):
29+
def get_last_modified(
30+
context: TaskRunContext, parameters: dict[str, Any]
31+
) -> str | None:
2832
"""Cache based on Last-Modified header of the first URL."""
2933
try:
3034
with httpx.Client() as client:
@@ -40,25 +44,25 @@ def get_last_modified(context, parameters):
4044
cache_expiration=timedelta(hours=24),
4145
)
4246
async def gather_documents(
43-
urls: list[str], exclude: list[str | re.Pattern] | None = None
47+
urls: list[str], exclude: list[str | re.Pattern[str]] | None = None
4448
) -> list[Document]:
4549
return await SitemapLoader(urls=urls, exclude=exclude or []).load()
4650

4751

4852
@flow(flow_run_name="{urls}")
4953
async def ingest_website(
50-
urls: list[str], exclude: list[str | re.Pattern] | None = None
54+
urls: list[str], exclude: list[str | re.Pattern[str]] | None = None
5155
):
5256
"""Ingest a website into the vector database.
5357
5458
Args:
5559
urls: The URLs to ingest (exact or glob patterns).
5660
exclude: The URLs to exclude (exact or glob patterns).
5761
"""
58-
documents = await gather_documents(urls, exclude)
62+
documents: list[Document] = await gather_documents(urls, exclude) # type: ignore
5963
with TurboPuffer(namespace=TPUF_NS) as tpuf:
60-
print(f"Upserting {len(documents)} documents into {TPUF_NS}")
61-
await tpuf.upsert_batched(documents)
64+
print(f"Upserting {len(documents)} documents into {TPUF_NS}") # type: ignore
65+
await tpuf.upsert_batched(documents) # type: ignore
6266

6367

6468
@task(task_run_name="querying: {query_texts}")
@@ -92,7 +96,7 @@ async def chat_with_website(initial_message: str | None = None, clean_up: bool =
9296
ingest_website,
9397
do_research,
9498
],
95-
) as assistant:
99+
) as assistant: # type: ignore
96100
assistant.chat(initial_message=initial_message) # type: ignore
97101

98102
finally:

examples/reddit_thread.py

+19-17
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@
88

99
from functools import lru_cache
1010

11-
import marvin
12-
import praw
13-
from marvin.utilities.logging import get_logger
11+
import marvin # type: ignore
12+
import praw # type: ignore
13+
from marvin.utilities.logging import get_logger # type: ignore
1414
from pydantic_settings import BaseSettings, SettingsConfigDict
1515

1616
from raggy.documents import Document, document_to_excerpts
1717
from raggy.vectorstores.tpuf import TurboPuffer, query_namespace
1818

19-
logger = get_logger("reddit_thread_example")
19+
logger = get_logger("reddit_thread_example") # type: ignore
2020

2121

2222
class Settings(BaseSettings):
@@ -26,45 +26,47 @@ class Settings(BaseSettings):
2626
settings = Settings()
2727

2828

29-
def create_reddit_client() -> praw.Reddit:
30-
return praw.Reddit(
29+
def create_reddit_client() -> praw.Reddit: # type: ignore
30+
return praw.Reddit( # type: ignore
3131
client_id=getattr(settings, "reddit_client_id"),
3232
client_secret=getattr(settings, "reddit_client_secret"),
3333
user_agent="testscript by /u/_n80n8",
3434
)
3535

3636

3737
@lru_cache
38-
def read_thread(submission_id: str):
39-
logger.info(f"Reading thread {submission_id}")
40-
submission = create_reddit_client().submission(submission_id)
38+
def read_thread(submission_id: str) -> str:
39+
logger.info(f"Reading thread {submission_id}") # type: ignore
40+
submission: praw.models.Submission = create_reddit_client().submission( # type: ignore
41+
submission_id
42+
)
4143

4244
text_buffer = ""
43-
text_buffer += f"Title: {submission.title}\n"
44-
text_buffer += f"Selftext: {submission.selftext}\n"
45+
text_buffer += f"Title: {submission.title}\n" # type: ignore
46+
text_buffer += f"Selftext: {submission.selftext}\n" # type: ignore
4547

46-
submission.comments.replace_more(limit=None) # Retrieve all comments
47-
for comment in submission.comments.list():
48+
submission.comments.replace_more(limit=None) # type: ignore
49+
for comment in submission.comments.list(): # type: ignore
4850
text_buffer += "\n---\n"
49-
text_buffer += f"Comment Text: {comment.body}\n"
51+
text_buffer += f"Comment Text: {comment.body}\n" # type: ignore
5052

5153
return text_buffer
5254

5355

54-
@marvin.fn
56+
@marvin.fn # type: ignore
5557
def summarize_results(relevant_excerpts: str) -> str: # type: ignore[empty-body]
5658
"""give a summary of the relevant excerpts"""
5759

5860

5961
async def main(thread_id: str):
60-
logger.info("Starting Reddit thread example")
62+
logger.info("Starting Reddit thread example") # type: ignore
6163
thread_text = read_thread(thread_id)
6264
chunked_documents = await document_to_excerpts(Document(text=thread_text))
6365

6466
with TurboPuffer(namespace="reddit_thread") as tpuf:
6567
tpuf.upsert(chunked_documents)
6668

67-
logger.info("Thread saved!")
69+
logger.info("Thread saved!") # type: ignore
6870

6971
query = "how do people feel about the return of the water taxis?"
7072
results = query_namespace(query, namespace="reddit_thread")

examples/refresh_vectorstore/chroma_collection.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,10 @@ def refresh_chroma(
6565
mode: Literal["upsert", "reset"] = "upsert",
6666
):
6767
"""Flow updating vectorstore with info from the Prefect community."""
68-
documents = [
68+
documents: list[Document] = [
6969
doc
7070
for future in run_loader.map(prefect_loaders) # type: ignore
71-
for doc in future.result()
71+
for doc in future.result() # type: ignore
7272
]
7373

7474
print(f"Loaded {len(documents)} documents from the Prefect community.")

examples/refresh_vectorstore/tpuf_namespace.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77

88
import os
99
from datetime import timedelta
10+
from typing import Any, Sequence
1011

1112
from prefect import flow, task
13+
from prefect.context import TaskRunContext
1214
from prefect.tasks import task_input_hash
1315
from prefect.utilities.annotations import quote
1416

@@ -58,7 +60,9 @@
5860
}
5961

6062

61-
def _cache_key_with_invalidation(context, parameters):
63+
def _cache_key_with_invalidation(
64+
context: TaskRunContext, parameters: dict[str, Any]
65+
) -> str:
6266
return f"{task_input_hash(context, parameters)}:{os.getenv("RAGGY_CACHE_VERSION", "0")}"
6367

6468

@@ -81,16 +85,16 @@ async def run_loader(loader: Loader) -> list[Document]:
8185
)
8286
def refresh_tpuf_namespace(
8387
namespace: str,
84-
namespace_loaders: list[Loader],
88+
namespace_loaders: Sequence[Loader],
8589
reset: bool = False,
8690
batch_size: int = 100,
8791
max_concurrent: int = 8,
8892
):
8993
"""Flow updating vectorstore with info from the Prefect community."""
9094
documents: list[Document] = [
9195
doc
92-
for future in run_loader.map(quote(namespace_loaders))
93-
for doc in future.result()
96+
for future in run_loader.map(quote(namespace_loaders)) # type: ignore
97+
for doc in future.result() # type: ignore
9498
]
9599

96100
print(f"Loaded {len(documents)} documents from the Prefect community.")
@@ -100,8 +104,10 @@ def refresh_tpuf_namespace(
100104
task(tpuf.reset)()
101105
print(f"RESETTING: Deleted all documents from tpuf ns {namespace!r}.")
102106

103-
task(tpuf.upsert_batched).submit(
104-
documents=documents, batch_size=batch_size, max_concurrent=max_concurrent
107+
task(tpuf.upsert_batched).submit( # type: ignore
108+
documents=documents,
109+
batch_size=batch_size,
110+
max_concurrent=max_concurrent,
105111
).wait()
106112

107113
print(f"Updated tpuf ns {namespace!r} with {len(documents)} documents.")

pyproject.toml

+42
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ dependencies = [
1919
"chardet",
2020
"fake-useragent",
2121
"gh-util",
22+
"prefect",
2223
"pydantic-ai-slim[openai]",
2324
"pypdf",
2425
"tenacity",
@@ -103,3 +104,44 @@ skip-magic-trailing-comma = false
103104

104105
[tool.setuptools_scm]
105106
write_to = "src/raggy/_version.py"
107+
108+
[tool.mypy]
109+
ignore_missing_imports = true
110+
check_untyped_defs = true
111+
warn_redundant_casts = true
112+
warn_unused_ignores = true
113+
warn_return_any = true
114+
warn_unreachable = true
115+
strict_optional = true
116+
exclude = ["examples/.*"]
117+
118+
[[tool.mypy.overrides]]
119+
module = [
120+
"bs4.*",
121+
"fake_useragent.*",
122+
"gh_util.*",
123+
"turbopuffer.*",
124+
"prefect.*",
125+
"marvin.*",
126+
"praw.*",
127+
"yake.*",
128+
]
129+
ignore_missing_imports = true
130+
131+
[[tool.mypy.overrides]]
132+
module = [
133+
"raggy.cli",
134+
"raggy.settings",
135+
"raggy.utilities.text",
136+
"raggy.utilities.asyncutils",
137+
"raggy.vectorstores.tpuf",
138+
"raggy.vectorstores.chroma",
139+
"raggy.loaders.web",
140+
"raggy.loaders.pdf",
141+
]
142+
disable_error_code = [
143+
"unreachable",
144+
"no-any-return",
145+
"unused-ignore",
146+
"no-redef",
147+
]

src/raggy/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
from .documents import Document
22
from .settings import settings
3+
4+
__all__ = ["Document", "settings"]

0 commit comments

Comments
 (0)