2
2
import os
3
3
import asyncio
4
4
5
+ from loguru import logger
5
6
from typing import Dict , List , Optional
6
7
from pymilvus import (
7
8
Collection ,
@@ -124,14 +125,6 @@ def __init__(
124
125
self ._create_collection (MILVUS_COLLECTION , create_new ) # type: ignore
125
126
self ._create_index ()
126
127
127
- def _print_info (self , msg ):
128
- # TODO: logger
129
- print (msg )
130
-
131
- def _print_err (self , msg ):
132
- # TODO: logger
133
- print (msg )
134
-
135
128
def _get_schema (self ):
136
129
return SCHEMA_V1 if self ._schema_ver == "V1" else SCHEMA_V2
137
130
@@ -143,7 +136,7 @@ def _create_connection(self):
143
136
addr = connections .get_connection_addr (x [0 ])
144
137
if x [1 ] and ('address' in addr ) and (addr ['address' ] == "{}:{}" .format (MILVUS_HOST , MILVUS_PORT )):
145
138
self .alias = x [0 ]
146
- self . _print_info ("Reuse connection to Milvus server '{}:{}' with alias '{:s}'"
139
+ logger . info ("Reuse connection to Milvus server '{}:{}' with alias '{:s}'"
147
140
.format (MILVUS_HOST , MILVUS_PORT , self .alias ))
148
141
break
149
142
@@ -158,10 +151,10 @@ def _create_connection(self):
158
151
password = MILVUS_PASSWORD , # type: ignore
159
152
secure = MILVUS_USE_SECURITY ,
160
153
)
161
- self . _print_info ("Create connection to Milvus server '{}:{}' with alias '{:s}'"
154
+ logger . info ("Create connection to Milvus server '{}:{}' with alias '{:s}'"
162
155
.format (MILVUS_HOST , MILVUS_PORT , self .alias ))
163
156
except Exception as e :
164
- self . _print_err ("Failed to create connection to Milvus server '{}:{}', error: {}"
157
+ logger . error ("Failed to create connection to Milvus server '{}:{}', error: {}"
165
158
.format (MILVUS_HOST , MILVUS_PORT , e ))
166
159
167
160
def _create_collection (self , collection_name , create_new : bool ) -> None :
@@ -189,7 +182,7 @@ def _create_collection(self, collection_name, create_new: bool) -> None:
189
182
consistency_level = self ._consistency_level ,
190
183
)
191
184
self ._schema_ver = "V2"
192
- self . _print_info ("Create Milvus collection '{}' with schema {} and consistency level {}"
185
+ logger . info ("Create Milvus collection '{}' with schema {} and consistency level {}"
193
186
.format (collection_name , self ._schema_ver , self ._consistency_level ))
194
187
else :
195
188
# If the collection exists, point to it
@@ -201,10 +194,10 @@ def _create_collection(self, collection_name, create_new: bool) -> None:
201
194
if field .name == "id" and field .is_primary :
202
195
self ._schema_ver = "V2"
203
196
break
204
- self . _print_info ("Milvus collection '{}' already exists with schema {}"
197
+ logger . info ("Milvus collection '{}' already exists with schema {}"
205
198
.format (collection_name , self ._schema_ver ))
206
199
except Exception as e :
207
- self . _print_err ("Failed to create collection '{}', error: {}" .format (collection_name , e ))
200
+ logger . error ("Failed to create collection '{}', error: {}" .format (collection_name , e ))
208
201
209
202
def _create_index (self ):
210
203
# TODO: verify index/search params passed by os.environ
@@ -216,7 +209,7 @@ def _create_index(self):
216
209
if self .index_params is not None :
217
210
# Convert the string format to JSON format parameters passed by MILVUS_INDEX_PARAMS
218
211
self .index_params = json .loads (self .index_params )
219
- self . _print_info ("Create Milvus index: {}" .format (self .index_params ))
212
+ logger . info ("Create Milvus index: {}" .format (self .index_params ))
220
213
# Create an index on the 'embedding' field with the index params found in init
221
214
self .col .create_index (EMBEDDING_FIELD , index_params = self .index_params )
222
215
else :
@@ -227,24 +220,24 @@ def _create_index(self):
227
220
"index_type" : "HNSW" ,
228
221
"params" : {"M" : 8 , "efConstruction" : 64 },
229
222
}
230
- self . _print_info ("Attempting creation of Milvus '{}' index" .format (i_p ["index_type" ]))
223
+ logger . info ("Attempting creation of Milvus '{}' index" .format (i_p ["index_type" ]))
231
224
self .col .create_index (EMBEDDING_FIELD , index_params = i_p )
232
225
self .index_params = i_p
233
- self . _print_info ("Creation of Milvus '{}' index successful" .format (i_p ["index_type" ]))
226
+ logger . info ("Creation of Milvus '{}' index successful" .format (i_p ["index_type" ]))
234
227
# If create fails, most likely due to being Zilliz Cloud instance, try to create an AutoIndex
235
228
except MilvusException :
236
- self . _print_info ("Attempting creation of Milvus default index" )
229
+ logger . info ("Attempting creation of Milvus default index" )
237
230
i_p = {"metric_type" : "IP" , "index_type" : "AUTOINDEX" , "params" : {}}
238
231
self .col .create_index (EMBEDDING_FIELD , index_params = i_p )
239
232
self .index_params = i_p
240
- self . _print_info ("Creation of Milvus default index successful" )
233
+ logger . info ("Creation of Milvus default index successful" )
241
234
# If an index already exists, grab its params
242
235
else :
243
236
# How about if the first index is not vector index?
244
237
for index in self .col .indexes :
245
238
idx = index .to_dict ()
246
239
if idx ["field" ] == EMBEDDING_FIELD :
247
- self . _print_info ("Index already exists: {}" .format (idx ))
240
+ logger . info ("Index already exists: {}" .format (idx ))
248
241
self .index_params = idx ['index_param' ]
249
242
break
250
243
@@ -272,9 +265,9 @@ def _create_index(self):
272
265
}
273
266
# Set the search params
274
267
self .search_params = default_search_params [self .index_params ["index_type" ]]
275
- self . _print_info ("Milvus search parameters: {}" .format (self .search_params ))
268
+ logger . info ("Milvus search parameters: {}" .format (self .search_params ))
276
269
except Exception as e :
277
- self . _print_err ("Failed to create index, error: {}" .format (e ))
270
+ logger . error ("Failed to create index, error: {}" .format (e ))
278
271
279
272
async def _upsert (self , chunks : Dict [str , List [DocumentChunk ]]) -> List [str ]:
280
273
"""Upsert chunks into the datastore.
@@ -319,18 +312,18 @@ async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]:
319
312
for batch in batches :
320
313
if len (batch [0 ]) != 0 :
321
314
try :
322
- self . _print_info (f"Upserting batch of size { len (batch [0 ])} " )
315
+ logger . info (f"Upserting batch of size { len (batch [0 ])} " )
323
316
self .col .insert (batch )
324
- self . _print_info (f"Upserted batch successfully" )
317
+ logger . info (f"Upserted batch successfully" )
325
318
except Exception as e :
326
- self . _print_err (f"Failed to insert batch records, error: { e } " )
319
+ logger . error (f"Failed to insert batch records, error: { e } " )
327
320
raise e
328
321
329
322
# This setting perfoms flushes after insert. Small insert == bad to use
330
323
# self.col.flush()
331
324
return doc_ids
332
325
except Exception as e :
333
- self . _print_err ("Failed to insert records, error: {}" .format (e ))
326
+ logger . error ("Failed to insert records, error: {}" .format (e ))
334
327
return []
335
328
336
329
@@ -365,7 +358,7 @@ def _get_values(self, chunk: DocumentChunk) -> List[any] | None: # type: ignore
365
358
x = values .get (key ) or default
366
359
# If one of our required fields is missing, ignore the entire entry
367
360
if x is Required :
368
- self . _print_info ("Chunk " + values ["id" ] + " missing " + key + " skipping" )
361
+ logger . info ("Chunk " + values ["id" ] + " missing " + key + " skipping" )
369
362
return None
370
363
# Add the corresponding value if it passes the tests
371
364
ret .append (x )
@@ -436,7 +429,7 @@ async def _single_query(query: QueryWithEmbedding) -> QueryResult:
436
429
437
430
return QueryResult (query = query .query , results = results )
438
431
except Exception as e :
439
- self . _print_err ("Failed to query, error: {}" .format (e ))
432
+ logger . error ("Failed to query, error: {}" .format (e ))
440
433
return QueryResult (query = query .query , results = [])
441
434
442
435
results : List [QueryResult ] = await asyncio .gather (
@@ -460,7 +453,7 @@ async def delete(
460
453
# If deleting all, drop and create the new collection
461
454
if delete_all :
462
455
coll_name = self .col .name
463
- self . _print_info ("Delete the entire collection {} and create new one" .format (coll_name ))
456
+ logger . info ("Delete the entire collection {} and create new one" .format (coll_name ))
464
457
# Release the collection from memory
465
458
self .col .release ()
466
459
# Drop the collection
@@ -490,7 +483,7 @@ async def delete(
490
483
pks = ['"' + pk + '"' for pk in pks ]
491
484
492
485
# Delete by ids batch by batch(avoid too long expression)
493
- self . _print_info ("Apply {:d} deletions to schema {:s}" .format (len (pks ), self ._schema_ver ))
486
+ logger . info ("Apply {:d} deletions to schema {:s}" .format (len (pks ), self ._schema_ver ))
494
487
while len (pks ) > 0 :
495
488
batch_pks = pks [:batch_size ]
496
489
pks = pks [batch_size :]
@@ -499,7 +492,7 @@ async def delete(
499
492
# Increment our deleted count
500
493
delete_count += int (res .delete_count ) # type: ignore
501
494
except Exception as e :
502
- self . _print_err ("Failed to delete by ids, error: {}" .format (e ))
495
+ logger . error ("Failed to delete by ids, error: {}" .format (e ))
503
496
504
497
try :
505
498
# Check if empty filter
@@ -524,9 +517,9 @@ async def delete(
524
517
# Increment our delete count
525
518
delete_count += int (res .delete_count ) # type: ignore
526
519
except Exception as e :
527
- self . _print_err ("Failed to delete by filter, error: {}" .format (e ))
520
+ logger . error ("Failed to delete by filter, error: {}" .format (e ))
528
521
529
- self . _print_info ("{:d} records deleted" .format (delete_count ))
522
+ logger . info ("{:d} records deleted" .format (delete_count ))
530
523
531
524
# This setting performs flushes after delete. Small delete == bad to use
532
525
# self.col.flush()
0 commit comments