12
12
from semantic_kernel .connectors .ai .open_ai import OpenAIEmbeddingPromptExecutionSettings , OpenAITextEmbedding
13
13
from semantic_kernel .connectors .ai .open_ai .services .azure_text_embedding import AzureTextEmbedding
14
14
from semantic_kernel .connectors .memory .azure_ai_search import AzureAISearchCollection
15
+ from semantic_kernel .connectors .memory .azure_cosmos_db .azure_cosmos_db_no_sql_collection import (
16
+ AzureCosmosDBNoSQLCollection ,
17
+ )
15
18
from semantic_kernel .connectors .memory .in_memory import InMemoryVectorCollection
16
19
from semantic_kernel .connectors .memory .postgres .postgres_collection import PostgresCollection
17
20
from semantic_kernel .connectors .memory .qdrant import QdrantCollection
25
28
VectorStoreRecordVectorField ,
26
29
vectorstoremodel ,
27
30
)
28
-
29
-
30
- @vectorstoremodel
31
- @dataclass
32
- class MyDataModelArray :
33
- vector : Annotated [
34
- np .ndarray | None ,
35
- VectorStoreRecordVectorField (
36
- embedding_settings = {"embedding" : OpenAIEmbeddingPromptExecutionSettings (dimensions = 1536 )},
37
- index_kind = "hnsw" ,
38
- dimensions = 1536 ,
39
- distance_function = "cosine_similarity" ,
40
- property_type = "float" ,
41
- serialize_function = np .ndarray .tolist ,
42
- deserialize_function = np .array ,
43
- ),
44
- ] = None
45
- other : str | None = None
46
- id : Annotated [str , VectorStoreRecordKeyField ()] = field (default_factory = lambda : str (uuid4 ()))
47
- content : Annotated [
48
- str , VectorStoreRecordDataField (has_embedding = True , embedding_property_name = "vector" , property_type = "str" )
49
- ] = "content1"
50
-
51
-
52
- @vectorstoremodel
53
- @dataclass
54
- class MyDataModelList :
55
- vector : Annotated [
56
- list [float ] | None ,
57
- VectorStoreRecordVectorField (
58
- embedding_settings = {"embedding" : OpenAIEmbeddingPromptExecutionSettings (dimensions = 1536 )},
59
- index_kind = "hnsw" ,
60
- dimensions = 1536 ,
61
- distance_function = "cosine_similarity" ,
62
- property_type = "float" ,
63
- ),
64
- ] = None
65
- other : str | None = None
66
- id : Annotated [str , VectorStoreRecordKeyField ()] = field (default_factory = lambda : str (uuid4 ()))
67
- content : Annotated [
68
- str , VectorStoreRecordDataField (has_embedding = True , embedding_property_name = "vector" , property_type = "str" )
69
- ] = "content1"
31
+ from semantic_kernel .data .const import DistanceFunction , IndexKind
32
+
33
+
34
+ def get_data_model_array (index_kind : IndexKind , distance_function : DistanceFunction ) -> type :
35
+ @vectorstoremodel
36
+ @dataclass
37
+ class DataModelArray :
38
+ vector : Annotated [
39
+ np .ndarray | None ,
40
+ VectorStoreRecordVectorField (
41
+ embedding_settings = {"embedding" : OpenAIEmbeddingPromptExecutionSettings (dimensions = 1536 )},
42
+ index_kind = index_kind ,
43
+ dimensions = 1536 ,
44
+ distance_function = distance_function ,
45
+ property_type = "float" ,
46
+ serialize_function = np .ndarray .tolist ,
47
+ deserialize_function = np .array ,
48
+ ),
49
+ ] = None
50
+ other : str | None = None
51
+ id : Annotated [str , VectorStoreRecordKeyField ()] = field (default_factory = lambda : str (uuid4 ()))
52
+ content : Annotated [
53
+ str , VectorStoreRecordDataField (has_embedding = True , embedding_property_name = "vector" , property_type = "str" )
54
+ ] = "content1"
55
+
56
+ return DataModelArray
57
+
58
+
59
+ def get_data_model_list (index_kind : IndexKind , distance_function : DistanceFunction ) -> type :
60
+ @vectorstoremodel
61
+ @dataclass
62
+ class DataModelList :
63
+ vector : Annotated [
64
+ list [float ] | None ,
65
+ VectorStoreRecordVectorField (
66
+ embedding_settings = {"embedding" : OpenAIEmbeddingPromptExecutionSettings (dimensions = 1536 )},
67
+ index_kind = index_kind ,
68
+ dimensions = 1536 ,
69
+ distance_function = distance_function ,
70
+ property_type = "float" ,
71
+ ),
72
+ ] = None
73
+ other : str | None = None
74
+ id : Annotated [str , VectorStoreRecordKeyField ()] = field (default_factory = lambda : str (uuid4 ()))
75
+ content : Annotated [
76
+ str , VectorStoreRecordDataField (has_embedding = True , embedding_property_name = "vector" , property_type = "str" )
77
+ ] = "content1"
78
+
79
+ return DataModelList
70
80
71
81
72
82
collection_name = "test"
73
- MyDataModel = MyDataModelArray
83
+ # Depending on the vector database, the index kind and distance function may need to be adjusted,
84
+ # since not all combinations are supported by all databases.
85
+ DataModel = get_data_model_array (IndexKind .HNSW , DistanceFunction .COSINE )
74
86
75
87
# A list of VectorStoreRecordCollection that can be used.
76
- # Available stores are:
88
+ # Available collections are:
77
89
# - ai_search: Azure AI Search
78
90
# - postgres: PostgreSQL
79
91
# - redis_json: Redis JSON
@@ -83,63 +95,74 @@ class MyDataModelList:
83
95
# - weaviate: Weaviate
84
96
# Please either configure the weaviate settings via environment variables or provide them through the constructor.
85
97
# Note that embed mode is not supported on Windows: https://github.com/weaviate/weaviate/issues/3315
86
- #
87
- # This is represented as a mapping from the store name to a
88
- # function which returns the store.
89
- # Using a function allows for lazy initialization of the store,
90
- # so that settings for unused stores do not cause validation errors.
91
- stores : dict [str , Callable [[], VectorStoreRecordCollection ]] = {
92
- "ai_search" : lambda : AzureAISearchCollection [MyDataModel ](
93
- data_model_type = MyDataModel ,
98
+ # - azure_cosmos_nosql: Azure Cosmos NoSQL
99
+ # https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/how-to-create-account?tabs=azure-portal
100
+ # Please see the link above to learn how to set up an Azure Cosmos NoSQL account.
101
+ # https://learn.microsoft.com/en-us/azure/cosmos-db/how-to-develop-emulator?tabs=windows%2Cpython&pivots=api-nosql
102
+ # Please see the link above to learn how to set up the Azure Cosmos NoSQL emulator on your machine.
103
+ # For this sample to work with Azure Cosmos NoSQL, please adjust the index_kind of the data model to QUANTIZED_FLAT.
104
+ # This is represented as a mapping from the collection name to a
105
+ # function which returns the collection.
106
+ # Using a function allows for lazy initialization of the collection,
107
+ # so that settings for unused collections do not cause validation errors.
108
+ collections : dict [str , Callable [[], VectorStoreRecordCollection ]] = {
109
+ "ai_search" : lambda : AzureAISearchCollection [DataModel ](
110
+ data_model_type = DataModel ,
94
111
),
95
- "postgres" : lambda : PostgresCollection [str , MyDataModel ](
96
- data_model_type = MyDataModel ,
112
+ "postgres" : lambda : PostgresCollection [str , DataModel ](
113
+ data_model_type = DataModel ,
97
114
collection_name = collection_name ,
98
115
),
99
- "redis_json" : lambda : RedisJsonCollection [MyDataModel ](
100
- data_model_type = MyDataModel ,
116
+ "redis_json" : lambda : RedisJsonCollection [DataModel ](
117
+ data_model_type = DataModel ,
101
118
collection_name = collection_name ,
102
119
prefix_collection_name_to_key_names = True ,
103
120
),
104
- "redis_hashset" : lambda : RedisHashsetCollection [MyDataModel ](
105
- data_model_type = MyDataModel ,
121
+ "redis_hashset" : lambda : RedisHashsetCollection [DataModel ](
122
+ data_model_type = DataModel ,
106
123
collection_name = collection_name ,
107
124
prefix_collection_name_to_key_names = True ,
108
125
),
109
- "qdrant" : lambda : QdrantCollection [MyDataModel ](
110
- data_model_type = MyDataModel , collection_name = collection_name , prefer_grpc = True , named_vectors = False
126
+ "qdrant" : lambda : QdrantCollection [DataModel ](
127
+ data_model_type = DataModel , collection_name = collection_name , prefer_grpc = True , named_vectors = False
128
+ ),
129
+ "in_memory" : lambda : InMemoryVectorCollection [DataModel ](
130
+ data_model_type = DataModel ,
131
+ collection_name = collection_name ,
111
132
),
112
- "in_memory " : lambda : InMemoryVectorCollection [ MyDataModel ](
113
- data_model_type = MyDataModel ,
133
+ "weaviate " : lambda : WeaviateCollection [ DataModel ](
134
+ data_model_type = DataModel ,
114
135
collection_name = collection_name ,
115
136
),
116
- "weaviate" : lambda : WeaviateCollection [MyDataModel ](
117
- data_model_type = MyDataModel ,
137
+ "azure_cosmos_nosql" : lambda : AzureCosmosDBNoSQLCollection (
138
+ data_model_type = DataModel ,
139
+ database_name = "sample_database" ,
118
140
collection_name = collection_name ,
141
+ create_database = True ,
119
142
),
120
143
}
121
144
122
145
123
- async def main (store : str , use_azure_openai : bool , embedding_model : str ):
146
+ async def main (collection : str , use_azure_openai : bool , embedding_model : str ):
124
147
kernel = Kernel ()
125
148
service_id = "embedding"
126
149
if use_azure_openai :
127
150
kernel .add_service (AzureTextEmbedding (service_id = service_id , deployment_name = embedding_model ))
128
151
else :
129
152
kernel .add_service (OpenAITextEmbedding (service_id = service_id , ai_model_id = embedding_model ))
130
- async with stores [ store ]() as record_store :
131
- await record_store .create_collection_if_not_exists ()
153
+ async with collections [ collection ]() as record_collection :
154
+ await record_collection .create_collection_if_not_exists ()
132
155
133
- record1 = MyDataModel (content = "My text" , id = "e6103c03-487f-4d7d-9c23-4723651c17f4" )
134
- record2 = MyDataModel (content = "My other text" , id = "09caec77-f7e1-466a-bcec-f1d51c5b15be" )
156
+ record1 = DataModel (content = "My text" , id = "e6103c03-487f-4d7d-9c23-4723651c17f4" )
157
+ record2 = DataModel (content = "My other text" , id = "09caec77-f7e1-466a-bcec-f1d51c5b15be" )
135
158
136
159
records = await VectorStoreRecordUtils (kernel ).add_vector_to_records (
137
- [record1 , record2 ], data_model_type = MyDataModel
160
+ [record1 , record2 ], data_model_type = DataModel
138
161
)
139
- keys = await record_store .upsert_batch (records )
162
+ keys = await record_collection .upsert_batch (records )
140
163
print (f"upserted { keys = } " )
141
164
142
- results = await record_store .get_batch ([record1 .id , record2 .id ])
165
+ results = await record_collection .get_batch ([record1 .id , record2 .id ])
143
166
if results :
144
167
for result in results :
145
168
print (f"found { result .id = } " )
@@ -156,7 +179,7 @@ async def main(store: str, use_azure_openai: bool, embedding_model: str):
156
179
argparse .ArgumentParser ()
157
180
158
181
parser = argparse .ArgumentParser ()
159
- parser .add_argument ("--store " , default = "in_memory" , choices = stores .keys (), help = "What store to use." )
182
+ parser .add_argument ("--collection " , default = "in_memory" , choices = collections .keys (), help = "What collection to use." )
160
183
# Option of whether to use OpenAI or Azure OpenAI.
161
184
parser .add_argument ("--use-azure-openai" , action = "store_true" , help = "Use Azure OpenAI instead of OpenAI." )
162
185
# Model
@@ -165,4 +188,4 @@ async def main(store: str, use_azure_openai: bool, embedding_model: str):
165
188
)
166
189
args = parser .parse_args ()
167
190
168
- asyncio .run (main (store = args .store , use_azure_openai = args .use_azure_openai , embedding_model = args .model ))
191
+ asyncio .run (main (collection = args .collection , use_azure_openai = args .use_azure_openai , embedding_model = args .model ))
0 commit comments