8
8
import torch
9
9
from pydantic import BaseModel , ValidationInfo , field_validator
10
10
from sentence_transformers import SentenceTransformer
11
+ from sqlalchemy import or_
11
12
12
13
from seer .db import DbGroupingRecord , Session
13
14
@@ -18,8 +19,8 @@ class GroupingRequest(BaseModel):
18
19
project_id : int
19
20
stacktrace : str
20
21
message : str
21
- group_id : int | None = None
22
- stacktrace_hash : str | None = None
22
+ hash : str
23
+ group_id : Optional [ int ] = None
23
24
k : int = 1
24
25
threshold : float = 0.01
25
26
@@ -32,19 +33,19 @@ def check_field_is_not_empty(cls, v, info: ValidationInfo):
32
33
33
34
34
35
class GroupingRecord (BaseModel ):
35
- group_id : int | None
36
+ group_id : Optional [ int ]
36
37
project_id : int
37
38
message : str
38
39
stacktrace_embedding : np .ndarray
39
- stacktrace_hash : str | None
40
+ hash : str
40
41
41
42
def to_db_model (self ) -> DbGroupingRecord :
42
43
return DbGroupingRecord (
43
44
group_id = self .group_id ,
44
45
project_id = self .project_id ,
45
46
message = self .message ,
46
47
stacktrace_embedding = self .stacktrace_embedding ,
47
- stacktrace_hash = self .stacktrace_hash ,
48
+ hash = self .hash ,
48
49
)
49
50
50
51
class Config :
@@ -56,14 +57,14 @@ class Config:
56
57
57
58
class GroupingResponse (BaseModel ):
58
59
parent_group_id : Optional [int ]
60
+ parent_hash : str
59
61
stacktrace_distance : float
60
62
message_distance : float
61
63
should_group : bool
62
64
63
65
64
66
class SimilarityResponse (BaseModel ):
65
67
responses : List [GroupingResponse ]
66
- token : Optional [int ]
67
68
68
69
69
70
class SimilarityBenchmarkResponse (BaseModel ):
@@ -149,25 +150,6 @@ def get_nearest_neighbors(self, issue: GroupingRequest) -> SimilarityResponse:
149
150
stacktrace similarity scores, message similarity scores, and grouping flags.
150
151
"""
151
152
with Session () as session :
152
- # If an exact match of the stacktrace hash is found, return this record
153
- if hasattr (issue , "stacktrace_hash" ) and issue .stacktrace_hash :
154
- existing_record = (
155
- session .query (DbGroupingRecord )
156
- .filter_by (stacktrace_hash = issue .stacktrace_hash )
157
- .first ()
158
- )
159
- if existing_record :
160
- similarity_response = SimilarityResponse (responses = [], token = None )
161
- similarity_response .responses .append (
162
- GroupingResponse (
163
- parent_group_id = existing_record .group_id ,
164
- stacktrace_distance = 0.00 ,
165
- message_distance = 0.00 ,
166
- should_group = True ,
167
- )
168
- )
169
- return similarity_response
170
-
171
153
embedding = self .encode_text (issue .stacktrace ).astype ("float32" )
172
154
173
155
results = (
@@ -180,22 +162,24 @@ def get_nearest_neighbors(self, issue: GroupingRequest) -> SimilarityResponse:
180
162
.filter (
181
163
DbGroupingRecord .project_id == issue .project_id ,
182
164
DbGroupingRecord .stacktrace_embedding .cosine_distance (embedding ) <= 0.15 ,
183
- DbGroupingRecord .group_id != issue .group_id ,
184
- DbGroupingRecord .group_id != None ,
165
+ or_ (
166
+ DbGroupingRecord .group_id != issue .group_id ,
167
+ DbGroupingRecord .group_id == None ,
168
+ ),
169
+ # TODO We can return a group as similar group to itself if it exists in the old table with no hash
170
+ DbGroupingRecord .hash != issue .hash ,
185
171
)
186
172
.order_by ("distance" )
187
173
.limit (issue .k )
188
174
.all ()
189
175
)
190
176
191
177
# If no existing groups within the threshold, insert the request as a new GroupingRecord
192
- token = None
193
178
if not any (distance <= issue .threshold for _ , distance in results ):
194
- token = self .insert_new_grouping_record (session , issue , embedding )
195
-
179
+ self .insert_new_grouping_record (session , issue , embedding )
196
180
session .commit ()
197
181
198
- similarity_response = SimilarityResponse (responses = [], token = token )
182
+ similarity_response = SimilarityResponse (responses = [])
199
183
for record , distance in results :
200
184
message_similarity_score = difflib .SequenceMatcher (
201
185
None , issue .message , record .message
@@ -204,7 +188,8 @@ def get_nearest_neighbors(self, issue: GroupingRequest) -> SimilarityResponse:
204
188
205
189
similarity_response .responses .append (
206
190
GroupingResponse (
207
- parent_group_id = record .group_id ,
191
+ parent_group_id = record .group_id if hasattr (record , "group_id" ) else None ,
192
+ parent_hash = record .hash ,
208
193
stacktrace_distance = distance ,
209
194
message_distance = 1.0 - message_similarity_score ,
210
195
should_group = should_group ,
@@ -215,7 +200,7 @@ def get_nearest_neighbors(self, issue: GroupingRequest) -> SimilarityResponse:
215
200
216
201
def insert_new_grouping_record (
217
202
self , session , issue : GroupingRequest , embedding : np .ndarray
218
- ) -> int :
203
+ ) -> None :
219
204
"""
220
205
Inserts a new GroupingRecord into the database if the group_id does not already exist.
221
206
If new grouping record was created, return the id.
@@ -224,22 +209,14 @@ def insert_new_grouping_record(
224
209
:param issue: The issue to insert as a new GroupingRecord.
225
210
:param embedding: The embedding of the stacktrace.
226
211
"""
227
- existing_record = None
228
- if issue .group_id :
229
- existing_record = (
230
- session .query (DbGroupingRecord ).filter_by (group_id = issue .group_id ).first ()
231
- )
212
+ existing_record = session .query (DbGroupingRecord ).filter_by (hash = issue .hash ).first ()
232
213
233
214
if existing_record is None :
234
215
new_record = GroupingRecord (
235
- group_id = issue .group_id ,
216
+ group_id = issue .group_id if hasattr ( issue , "group_id" ) else None ,
236
217
project_id = issue .project_id ,
237
218
message = issue .message ,
238
219
stacktrace_embedding = embedding ,
239
- stacktrace_hash = issue .stacktrace_hash ,
220
+ hash = issue .hash ,
240
221
).to_db_model ()
241
222
session .add (new_record )
242
- session .commit ()
243
- return new_record .id
244
-
245
- return existing_record .id
0 commit comments