Skip to content

Commit ccfa914

Browse files
authored
More Posthog batching (chroma-core#1342)
## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - Batch posthog requests much more aggressively. We really should do batches of ever-increasing size for a given collection but I want to get this fix out this week. - New functionality - ... ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python, `yarn test` for js ## Documentation Changes *Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?*
1 parent 69cd2ba commit ccfa914

File tree

1 file changed

+45
-2
lines changed

1 file changed

+45
-2
lines changed

chromadb/telemetry/product/events.py

+45-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __init__(self, collection_uuid: str, embedding_function: str):
2626

2727

2828
class CollectionAddEvent(ProductTelemetryEvent):
29-
max_batch_size: ClassVar[int] = 100
29+
max_batch_size: ClassVar[int] = 1000
3030
batch_size: int
3131
collection_uuid: str
3232
add_amount: int
@@ -67,6 +67,8 @@ def batch(self, other: "ProductTelemetryEvent") -> "CollectionAddEvent":
6767

6868

6969
class CollectionUpdateEvent(ProductTelemetryEvent):
70+
max_batch_size: ClassVar[int] = 100
71+
batch_size: int
7072
collection_uuid: str
7173
update_amount: int
7274
with_embeddings: int
@@ -80,17 +82,36 @@ def __init__(
8082
with_embeddings: int,
8183
with_metadata: int,
8284
with_documents: int,
85+
batch_size: int = 1,
8386
):
8487
super().__init__()
8588
self.collection_uuid = collection_uuid
8689
self.update_amount = update_amount
8790
self.with_embeddings = with_embeddings
8891
self.with_metadata = with_metadata
8992
self.with_documents = with_documents
93+
self.batch_size = batch_size
9094

95+
@property
96+
def batch_key(self) -> str:
97+
return self.collection_uuid + self.name
98+
99+
def batch(self, other: "ProductTelemetryEvent") -> "CollectionUpdateEvent":
100+
if not self.batch_key == other.batch_key:
101+
raise ValueError("Cannot batch events")
102+
other = cast(CollectionUpdateEvent, other)
103+
total_amount = self.update_amount + other.update_amount
104+
return CollectionUpdateEvent(
105+
collection_uuid=self.collection_uuid,
106+
update_amount=total_amount,
107+
with_documents=self.with_documents + other.with_documents,
108+
with_metadata=self.with_metadata + other.with_metadata,
109+
with_embeddings=self.with_embeddings + other.with_embeddings,
110+
batch_size=self.batch_size + other.batch_size,
111+
)
91112

92113
class CollectionQueryEvent(ProductTelemetryEvent):
93-
max_batch_size: ClassVar[int] = 20
114+
max_batch_size: ClassVar[int] = 1000
94115
batch_size: int
95116
collection_uuid: str
96117
query_amount: int
@@ -147,6 +168,8 @@ def batch(self, other: "ProductTelemetryEvent") -> "CollectionQueryEvent":
147168

148169

149170
class CollectionGetEvent(ProductTelemetryEvent):
171+
max_batch_size: ClassVar[int] = 100
172+
batch_size: int
150173
collection_uuid: str
151174
ids_count: int
152175
limit: int
@@ -160,13 +183,33 @@ def __init__(
160183
limit: int,
161184
include_metadata: int,
162185
include_documents: int,
186+
batch_size: int = 1,
163187
):
164188
super().__init__()
165189
self.collection_uuid = collection_uuid
166190
self.ids_count = ids_count
167191
self.limit = limit
168192
self.include_metadata = include_metadata
169193
self.include_documents = include_documents
194+
self.batch_size = batch_size
195+
196+
@property
197+
def batch_key(self) -> str:
198+
return self.collection_uuid + self.name + str(self.limit)
199+
200+
def batch(self, other: "ProductTelemetryEvent") -> "CollectionGetEvent":
201+
if not self.batch_key == other.batch_key:
202+
raise ValueError("Cannot batch events")
203+
other = cast(CollectionGetEvent, other)
204+
total_amount = self.ids_count + other.ids_count
205+
return CollectionGetEvent(
206+
collection_uuid=self.collection_uuid,
207+
ids_count=total_amount,
208+
limit=self.limit,
209+
include_metadata=self.include_metadata + other.include_metadata,
210+
include_documents=self.include_documents + other.include_documents,
211+
batch_size=self.batch_size + other.batch_size,
212+
)
170213

171214

172215
class CollectionDeleteEvent(ProductTelemetryEvent):

0 commit comments

Comments
 (0)