File tree 2 files changed +23
-2
lines changed
examples/refresh_vectorstore
2 files changed +23
-2
lines changed Original file line number Diff line number Diff line change 47
47
repo = "PrefectHQ/prefect" ,
48
48
include_globs = ["flows/" ],
49
49
),
50
+ GitHubRepoLoader (
51
+ repo = "PrefectHQ/prefect" ,
52
+ include_globs = ["src/prefect/*.py" ],
53
+ ),
54
+ GitHubRepoLoader (
55
+ repo = "PrefectHQ/prefect-background-task-examples" ,
56
+ include_globs = ["**/*.py" , "**/*.md" ],
57
+ ),
58
+ GitHubRepoLoader (
59
+ repo = "zzstoatzz/prefect-pack" ,
60
+ include_globs = ["**/*.py" , "**/*.md" ],
61
+ ),
62
+ GitHubRepoLoader (
63
+ repo = "zzstoatzz/prefect-monorepo" ,
64
+ include_globs = ["**/*.py" , "**/*.md" , "**/*.yaml" ],
65
+ ),
50
66
],
51
67
"controlflow" : [
52
68
SitemapLoader (
@@ -97,7 +113,7 @@ def refresh_tpuf_namespace(
97
113
for doc in future .result () # type: ignore
98
114
]
99
115
100
- print (f"Loaded { len (documents )} documents from the Prefect community." )
116
+ print (f"Gathered { len (documents )} documents from the Prefect community." )
101
117
102
118
with TurboPuffer (namespace = namespace ) as tpuf :
103
119
if reset :
Original file line number Diff line number Diff line change @@ -194,6 +194,8 @@ class GitHubRepoLoader(Loader):
194
194
repo : str = Field (...)
195
195
include_globs : list [str ] | None = Field (default = None )
196
196
exclude_globs : list [str ] | None = Field (default = None )
197
+ chunk_size : int = Field (default = 500 )
198
+ overlap : float = Field (default = 0.1 )
197
199
198
200
@field_validator ("repo" )
199
201
def validate_repo (cls , v : str ) -> str :
@@ -243,7 +245,10 @@ async def load(self) -> list[Document]:
243
245
Document (
244
246
text = await read_file_with_chardet (Path (tmp_dir ) / file ),
245
247
metadata = metadata ,
246
- )
248
+ ),
249
+ chunk_size = self .chunk_size ,
250
+ overlap = self .overlap ,
247
251
)
248
252
)
249
253
return documents
254
+
You can’t perform that action at this time.
0 commit comments