Skip to content

Commit 36354a2

Browse files
raghu017Raghu Ganapathi
and
Raghu Ganapathi
authored
Add loguru for logging (#282)
* Add loguru for logging * Add logger * Replace exception with error --------- Co-authored-by: Raghu Ganapathi <[email protected]>
1 parent 66d91c9 commit 36354a2

File tree

20 files changed

+141
-132
lines changed

20 files changed

+141
-132
lines changed

datastore/providers/analyticdb_datastore.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import asyncio
33
from typing import Dict, List, Optional, Tuple, Any
44
from datetime import datetime
5+
from loguru import logger
56

67
from psycopg2cffi import compat
78

@@ -252,7 +253,7 @@ def create_results(data):
252253
QueryResult(query=query.query, results=results)
253254
)
254255
except Exception as e:
255-
print("error:", e)
256+
logger.error(e)
256257
query_results.append(QueryResult(query=query.query, results=[]))
257258
return query_results
258259
finally:
@@ -275,7 +276,7 @@ async def execute_delete(query: str, params: Optional[List] = None) -> bool:
275276
self.conn.commit()
276277
return True
277278
except Exception as e:
278-
print(f"Error: {e}")
279+
logger.error(e)
279280
return False
280281
finally:
281282
self.connection_pool.putconn(conn)

datastore/providers/milvus_datastore.py

+26-33
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import asyncio
44

5+
from loguru import logger
56
from typing import Dict, List, Optional
67
from pymilvus import (
78
Collection,
@@ -124,14 +125,6 @@ def __init__(
124125
self._create_collection(MILVUS_COLLECTION, create_new) # type: ignore
125126
self._create_index()
126127

127-
def _print_info(self, msg):
128-
# TODO: logger
129-
print(msg)
130-
131-
def _print_err(self, msg):
132-
# TODO: logger
133-
print(msg)
134-
135128
def _get_schema(self):
136129
return SCHEMA_V1 if self._schema_ver == "V1" else SCHEMA_V2
137130

@@ -143,7 +136,7 @@ def _create_connection(self):
143136
addr = connections.get_connection_addr(x[0])
144137
if x[1] and ('address' in addr) and (addr['address'] == "{}:{}".format(MILVUS_HOST, MILVUS_PORT)):
145138
self.alias = x[0]
146-
self._print_info("Reuse connection to Milvus server '{}:{}' with alias '{:s}'"
139+
logger.info("Reuse connection to Milvus server '{}:{}' with alias '{:s}'"
147140
.format(MILVUS_HOST, MILVUS_PORT, self.alias))
148141
break
149142

@@ -158,10 +151,10 @@ def _create_connection(self):
158151
password=MILVUS_PASSWORD, # type: ignore
159152
secure=MILVUS_USE_SECURITY,
160153
)
161-
self._print_info("Create connection to Milvus server '{}:{}' with alias '{:s}'"
154+
logger.info("Create connection to Milvus server '{}:{}' with alias '{:s}'"
162155
.format(MILVUS_HOST, MILVUS_PORT, self.alias))
163156
except Exception as e:
164-
self._print_err("Failed to create connection to Milvus server '{}:{}', error: {}"
157+
logger.error("Failed to create connection to Milvus server '{}:{}', error: {}"
165158
.format(MILVUS_HOST, MILVUS_PORT, e))
166159

167160
def _create_collection(self, collection_name, create_new: bool) -> None:
@@ -189,7 +182,7 @@ def _create_collection(self, collection_name, create_new: bool) -> None:
189182
consistency_level=self._consistency_level,
190183
)
191184
self._schema_ver = "V2"
192-
self._print_info("Create Milvus collection '{}' with schema {} and consistency level {}"
185+
logger.info("Create Milvus collection '{}' with schema {} and consistency level {}"
193186
.format(collection_name, self._schema_ver, self._consistency_level))
194187
else:
195188
# If the collection exists, point to it
@@ -201,10 +194,10 @@ def _create_collection(self, collection_name, create_new: bool) -> None:
201194
if field.name == "id" and field.is_primary:
202195
self._schema_ver = "V2"
203196
break
204-
self._print_info("Milvus collection '{}' already exists with schema {}"
197+
logger.info("Milvus collection '{}' already exists with schema {}"
205198
.format(collection_name, self._schema_ver))
206199
except Exception as e:
207-
self._print_err("Failed to create collection '{}', error: {}".format(collection_name, e))
200+
logger.error("Failed to create collection '{}', error: {}".format(collection_name, e))
208201

209202
def _create_index(self):
210203
# TODO: verify index/search params passed by os.environ
@@ -216,7 +209,7 @@ def _create_index(self):
216209
if self.index_params is not None:
217210
# Convert the string format to JSON format parameters passed by MILVUS_INDEX_PARAMS
218211
self.index_params = json.loads(self.index_params)
219-
self._print_info("Create Milvus index: {}".format(self.index_params))
212+
logger.info("Create Milvus index: {}".format(self.index_params))
220213
# Create an index on the 'embedding' field with the index params found in init
221214
self.col.create_index(EMBEDDING_FIELD, index_params=self.index_params)
222215
else:
@@ -227,24 +220,24 @@ def _create_index(self):
227220
"index_type": "HNSW",
228221
"params": {"M": 8, "efConstruction": 64},
229222
}
230-
self._print_info("Attempting creation of Milvus '{}' index".format(i_p["index_type"]))
223+
logger.info("Attempting creation of Milvus '{}' index".format(i_p["index_type"]))
231224
self.col.create_index(EMBEDDING_FIELD, index_params=i_p)
232225
self.index_params = i_p
233-
self._print_info("Creation of Milvus '{}' index successful".format(i_p["index_type"]))
226+
logger.info("Creation of Milvus '{}' index successful".format(i_p["index_type"]))
234227
# If create fails, most likely due to being Zilliz Cloud instance, try to create an AutoIndex
235228
except MilvusException:
236-
self._print_info("Attempting creation of Milvus default index")
229+
logger.info("Attempting creation of Milvus default index")
237230
i_p = {"metric_type": "IP", "index_type": "AUTOINDEX", "params": {}}
238231
self.col.create_index(EMBEDDING_FIELD, index_params=i_p)
239232
self.index_params = i_p
240-
self._print_info("Creation of Milvus default index successful")
233+
logger.info("Creation of Milvus default index successful")
241234
# If an index already exists, grab its params
242235
else:
243236
# How about if the first index is not vector index?
244237
for index in self.col.indexes:
245238
idx = index.to_dict()
246239
if idx["field"] == EMBEDDING_FIELD:
247-
self._print_info("Index already exists: {}".format(idx))
240+
logger.info("Index already exists: {}".format(idx))
248241
self.index_params = idx['index_param']
249242
break
250243

@@ -272,9 +265,9 @@ def _create_index(self):
272265
}
273266
# Set the search params
274267
self.search_params = default_search_params[self.index_params["index_type"]]
275-
self._print_info("Milvus search parameters: {}".format(self.search_params))
268+
logger.info("Milvus search parameters: {}".format(self.search_params))
276269
except Exception as e:
277-
self._print_err("Failed to create index, error: {}".format(e))
270+
logger.error("Failed to create index, error: {}".format(e))
278271

279272
async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]:
280273
"""Upsert chunks into the datastore.
@@ -319,18 +312,18 @@ async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]:
319312
for batch in batches:
320313
if len(batch[0]) != 0:
321314
try:
322-
self._print_info(f"Upserting batch of size {len(batch[0])}")
315+
logger.info(f"Upserting batch of size {len(batch[0])}")
323316
self.col.insert(batch)
324-
self._print_info(f"Upserted batch successfully")
317+
logger.info(f"Upserted batch successfully")
325318
except Exception as e:
326-
self._print_err(f"Failed to insert batch records, error: {e}")
319+
logger.error(f"Failed to insert batch records, error: {e}")
327320
raise e
328321

329322
# This setting perfoms flushes after insert. Small insert == bad to use
330323
# self.col.flush()
331324
return doc_ids
332325
except Exception as e:
333-
self._print_err("Failed to insert records, error: {}".format(e))
326+
logger.error("Failed to insert records, error: {}".format(e))
334327
return []
335328

336329

@@ -365,7 +358,7 @@ def _get_values(self, chunk: DocumentChunk) -> List[any] | None: # type: ignore
365358
x = values.get(key) or default
366359
# If one of our required fields is missing, ignore the entire entry
367360
if x is Required:
368-
self._print_info("Chunk " + values["id"] + " missing " + key + " skipping")
361+
logger.info("Chunk " + values["id"] + " missing " + key + " skipping")
369362
return None
370363
# Add the corresponding value if it passes the tests
371364
ret.append(x)
@@ -436,7 +429,7 @@ async def _single_query(query: QueryWithEmbedding) -> QueryResult:
436429

437430
return QueryResult(query=query.query, results=results)
438431
except Exception as e:
439-
self._print_err("Failed to query, error: {}".format(e))
432+
logger.error("Failed to query, error: {}".format(e))
440433
return QueryResult(query=query.query, results=[])
441434

442435
results: List[QueryResult] = await asyncio.gather(
@@ -460,7 +453,7 @@ async def delete(
460453
# If deleting all, drop and create the new collection
461454
if delete_all:
462455
coll_name = self.col.name
463-
self._print_info("Delete the entire collection {} and create new one".format(coll_name))
456+
logger.info("Delete the entire collection {} and create new one".format(coll_name))
464457
# Release the collection from memory
465458
self.col.release()
466459
# Drop the collection
@@ -490,7 +483,7 @@ async def delete(
490483
pks = ['"' + pk + '"' for pk in pks]
491484

492485
# Delete by ids batch by batch(avoid too long expression)
493-
self._print_info("Apply {:d} deletions to schema {:s}".format(len(pks), self._schema_ver))
486+
logger.info("Apply {:d} deletions to schema {:s}".format(len(pks), self._schema_ver))
494487
while len(pks) > 0:
495488
batch_pks = pks[:batch_size]
496489
pks = pks[batch_size:]
@@ -499,7 +492,7 @@ async def delete(
499492
# Increment our deleted count
500493
delete_count += int(res.delete_count) # type: ignore
501494
except Exception as e:
502-
self._print_err("Failed to delete by ids, error: {}".format(e))
495+
logger.error("Failed to delete by ids, error: {}".format(e))
503496

504497
try:
505498
# Check if empty filter
@@ -524,9 +517,9 @@ async def delete(
524517
# Increment our delete count
525518
delete_count += int(res.delete_count) # type: ignore
526519
except Exception as e:
527-
self._print_err("Failed to delete by filter, error: {}".format(e))
520+
logger.error("Failed to delete by filter, error: {}".format(e))
528521

529-
self._print_info("{:d} records deleted".format(delete_count))
522+
logger.info("{:d} records deleted".format(delete_count))
530523

531524
# This setting performs flushes after delete. Small delete == bad to use
532525
# self.col.flush()

datastore/providers/pgvector_datastore.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from abc import ABC, abstractmethod
22
from typing import Any, Dict, List, Optional
33
from datetime import datetime
4+
from loguru import logger
45

56
from services.date import to_unix_timestamp
67
from datastore.datastore import DataStore
@@ -147,7 +148,7 @@ async def _query(self, queries: List[QueryWithEmbedding]) -> List[QueryResult]:
147148
results.append(document_chunk)
148149
query_results.append(QueryResult(query=query.query, results=results))
149150
except Exception as e:
150-
print("error:", e)
151+
logger.error(e)
151152
query_results.append(QueryResult(query=query.query, results=[]))
152153
return query_results
153154

datastore/providers/pinecone_datastore.py

+22-21
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import pinecone
44
from tenacity import retry, wait_random_exponential, stop_after_attempt
55
import asyncio
6+
from loguru import logger
67

78
from datastore.datastore import DataStore
89
from models.models import (
@@ -41,7 +42,7 @@ def __init__(self):
4142

4243
# Create a new index with the specified name, dimension, and metadata configuration
4344
try:
44-
print(
45+
logger.info(
4546
f"Creating index {PINECONE_INDEX} with metadata config {fields_to_index}"
4647
)
4748
pinecone.create_index(
@@ -50,18 +51,18 @@ def __init__(self):
5051
metadata_config={"indexed": fields_to_index},
5152
)
5253
self.index = pinecone.Index(PINECONE_INDEX)
53-
print(f"Index {PINECONE_INDEX} created successfully")
54+
logger.info(f"Index {PINECONE_INDEX} created successfully")
5455
except Exception as e:
55-
print(f"Error creating index {PINECONE_INDEX}: {e}")
56+
logger.error(f"Error creating index {PINECONE_INDEX}: {e}")
5657
raise e
5758
elif PINECONE_INDEX and PINECONE_INDEX in pinecone.list_indexes():
5859
# Connect to an existing index with the specified name
5960
try:
60-
print(f"Connecting to existing index {PINECONE_INDEX}")
61+
logger.info(f"Connecting to existing index {PINECONE_INDEX}")
6162
self.index = pinecone.Index(PINECONE_INDEX)
62-
print(f"Connected to index {PINECONE_INDEX} successfully")
63+
logger.info(f"Connected to index {PINECONE_INDEX} successfully")
6364
except Exception as e:
64-
print(f"Error connecting to index {PINECONE_INDEX}: {e}")
65+
logger.error(f"Error connecting to index {PINECONE_INDEX}: {e}")
6566
raise e
6667

6768
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3))
@@ -78,7 +79,7 @@ async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]:
7879
for doc_id, chunk_list in chunks.items():
7980
# Append the id to the ids list
8081
doc_ids.append(doc_id)
81-
print(f"Upserting document_id: {doc_id}")
82+
logger.info(f"Upserting document_id: {doc_id}")
8283
for chunk in chunk_list:
8384
# Create a vector tuple of (id, embedding, metadata)
8485
# Convert the metadata object to a dict with unix timestamps for dates
@@ -97,11 +98,11 @@ async def _upsert(self, chunks: Dict[str, List[DocumentChunk]]) -> List[str]:
9798
# Upsert each batch to Pinecone
9899
for batch in batches:
99100
try:
100-
print(f"Upserting batch of size {len(batch)}")
101+
logger.info(f"Upserting batch of size {len(batch)}")
101102
self.index.upsert(vectors=batch)
102-
print(f"Upserted batch successfully")
103+
logger.info(f"Upserted batch successfully")
103104
except Exception as e:
104-
print(f"Error upserting batch: {e}")
105+
logger.error(f"Error upserting batch: {e}")
105106
raise e
106107

107108
return doc_ids
@@ -117,7 +118,7 @@ async def _query(
117118

118119
# Define a helper coroutine that performs a single query and returns a QueryResult
119120
async def _single_query(query: QueryWithEmbedding) -> QueryResult:
120-
print(f"Query: {query.query}")
121+
logger.debug(f"Query: {query.query}")
121122

122123
# Convert the metadata filter object to a dict with pinecone filter expressions
123124
pinecone_filter = self._get_pinecone_filter(query.filter)
@@ -132,7 +133,7 @@ async def _single_query(query: QueryWithEmbedding) -> QueryResult:
132133
include_metadata=True,
133134
)
134135
except Exception as e:
135-
print(f"Error querying index: {e}")
136+
logger.error(f"Error querying index: {e}")
136137
raise e
137138

138139
query_results: List[DocumentChunkWithScore] = []
@@ -184,35 +185,35 @@ async def delete(
184185
# Delete all vectors from the index if delete_all is True
185186
if delete_all:
186187
try:
187-
print(f"Deleting all vectors from index")
188+
logger.info(f"Deleting all vectors from index")
188189
self.index.delete(delete_all=True)
189-
print(f"Deleted all vectors successfully")
190+
logger.info(f"Deleted all vectors successfully")
190191
return True
191192
except Exception as e:
192-
print(f"Error deleting all vectors: {e}")
193+
logger.error(f"Error deleting all vectors: {e}")
193194
raise e
194195

195196
# Convert the metadata filter object to a dict with pinecone filter expressions
196197
pinecone_filter = self._get_pinecone_filter(filter)
197198
# Delete vectors that match the filter from the index if the filter is not empty
198199
if pinecone_filter != {}:
199200
try:
200-
print(f"Deleting vectors with filter {pinecone_filter}")
201+
logger.info(f"Deleting vectors with filter {pinecone_filter}")
201202
self.index.delete(filter=pinecone_filter)
202-
print(f"Deleted vectors with filter successfully")
203+
logger.info(f"Deleted vectors with filter successfully")
203204
except Exception as e:
204-
print(f"Error deleting vectors with filter: {e}")
205+
logger.error(f"Error deleting vectors with filter: {e}")
205206
raise e
206207

207208
# Delete vectors that match the document ids from the index if the ids list is not empty
208209
if ids is not None and len(ids) > 0:
209210
try:
210-
print(f"Deleting vectors with ids {ids}")
211+
logger.info(f"Deleting vectors with ids {ids}")
211212
pinecone_filter = {"document_id": {"$in": ids}}
212213
self.index.delete(filter=pinecone_filter) # type: ignore
213-
print(f"Deleted vectors with ids successfully")
214+
logger.info(f"Deleted vectors with ids successfully")
214215
except Exception as e:
215-
print(f"Error deleting vectors with ids: {e}")
216+
logger.error(f"Error deleting vectors with ids: {e}")
216217
raise e
217218

218219
return True

0 commit comments

Comments
 (0)