cocoindex-io · badmonster0 · Apr 14, 2025 · Apr 14, 2025
diff --git a/README.md b/README.md
@@ -80,7 +80,10 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
         "doc_embeddings",
         cocoindex.storages.Postgres(),
         primary_key_fields=["filename", "location"],
-        vector_index=[("embedding", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
+        vector_indexes=[
+            cocoindex.VectorIndexDef(
+                field_name="embedding",
+                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
 ```
 
 It defines an index flow like this:

diff --git a/docs/docs/core/flow_def.mdx b/docs/docs/core/flow_def.mdx
@@ -259,8 +259,10 @@ Export must happen at the top level of a flow, i.e. not within any child scopes
 
 *   `name`: the name to identify the export target.
 *   `target_spec`: the storage spec as the export target.
-*   `primary_key_fields` (optional): the fields to be used as primary key. Types of the fields must be supported as key fields. See [Key Types](data_types#key-types) for more details.
-*   `vector_index` (optional): the fields to create vector index. Each item is a tuple of a field name and a similarity metric. See [Vector Type](data_types#vector-type) for more details about supported similarity metrics.
+*   `primary_key_fields` (`Sequence[str]`): the fields to be used as primary key. Types of the fields must be supported as key fields. See [Key Types](data_types#key-types) for more details.
+*   `vector_indexes` (`Sequence[VectorIndexDef]`, optional): the fields to create vector index. `VectorIndexDef` has the following fields:
+    *   `field_name`: the field to create vector index.
+    *   `metric`: the similarity metric to use. See [Vector Type](data_types#vector-type) for more details about supported similarity metrics.
 *   `setup_by_user` (optional):
      whether the export target is setup by user.
      By default, CocoIndex is managing the target setup (surfaced by the `cocoindex setup` CLI subcommand), e.g. create related tables/collections/etc. with compatible schema, and update them upon change.

diff --git a/docs/docs/getting_started/quickstart.md b/docs/docs/getting_started/quickstart.md
@@ -97,7 +97,10 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
         "doc_embeddings",
         cocoindex.storages.Postgres(),
         primary_key_fields=["filename", "location"],
-        vector_index=[("embedding", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
+        vector_indexes=[
+            cocoindex.VectorIndexDef(
+                field_name="embedding",
+                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
 ```
 
 Notes:

diff --git a/examples/code_embedding/main.py b/examples/code_embedding/main.py
@@ -41,7 +41,10 @@ def code_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
         "code_embeddings",
         cocoindex.storages.Postgres(),
         primary_key_fields=["filename", "location"],
-        vector_index=[("embedding", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
+        vector_indexes=[
+            cocoindex.VectorIndexDef(
+                field_name="embedding",
+                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
 
 
 query_handler = cocoindex.query.SimpleSemanticsQueryHandler(

diff --git a/examples/docs_to_kg/main.py b/examples/docs_to_kg/main.py
@@ -93,15 +93,13 @@ def docs_to_kg_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.D
             ),
             nodes={
                 "Entity": cocoindex.storages.Neo4jRelationshipNodeSpec(
-                    index_options=cocoindex.IndexOptions(
-                        primary_key_fields=["value"],
-                        vector_index_defs=[
-                            cocoindex.VectorIndexDef(
-                                field_name="embedding",
-                                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
-                            ),
-                        ],
-                    ),
+                    primary_key_fields=["value"],
+                    vector_indexes=[
+                        cocoindex.VectorIndexDef(
+                            field_name="embedding",
+                            metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
+                        ),
+                    ],
                 ),
             },
         ),

diff --git a/examples/gdrive_text_embedding/main.py b/examples/gdrive_text_embedding/main.py
@@ -37,7 +37,10 @@ def gdrive_text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope:
         "doc_embeddings",
         cocoindex.storages.Postgres(),
         primary_key_fields=["filename", "location"],
-        vector_index=[("embedding", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
+        vector_indexes=[
+            cocoindex.VectorIndexDef(
+                field_name="embedding",
+                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
 
 query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
     name="SemanticsSearch",

diff --git a/examples/pdf_embedding/main.py b/examples/pdf_embedding/main.py
@@ -63,7 +63,10 @@ def pdf_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoinde
         "doc_embeddings",
         cocoindex.storages.Postgres(),
         primary_key_fields=["id"],
-        vector_index=[("embedding", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
+        vector_indexes=[
+            cocoindex.VectorIndexDef(
+                field_name="embedding",
+                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
 
 query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
     name="SemanticsSearch",

diff --git a/examples/text_embedding/main.py b/examples/text_embedding/main.py
@@ -35,7 +35,10 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
         "doc_embeddings",
         cocoindex.storages.Postgres(),
         primary_key_fields=["filename", "location"],
-        vector_index=[("embedding", cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
+        vector_indexes=[
+            cocoindex.VectorIndexDef(
+                field_name="embedding",
+                metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY)])
 
 query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
     name="SemanticsSearch",

diff --git a/python/cocoindex/flow.py b/python/cocoindex/flow.py
@@ -267,21 +267,27 @@ def collect(self, **kwargs):
             self._engine_data_collector, regular_kwargs, auto_uuid_field)
 
     def export(self, name: str, target_spec: op.StorageSpec, /, *,
-              primary_key_fields: Sequence[str] | None = None,
+              primary_key_fields: Sequence[str],
+              vector_indexes: Sequence[index.VectorIndexDef] = (),
               vector_index: Sequence[tuple[str, index.VectorSimilarityMetric]] = (),
               setup_by_user: bool = False):
         """
         Export the collected data to the specified target.
+
+        `vector_index` is for backward compatibility only. Please use `vector_indexes` instead.
         """
-        index_options: dict[str, Any] = {}
-        if primary_key_fields is not None:
-            index_options["primary_key_fields"] = primary_key_fields
-        index_options["vector_index_defs"] = [
-            {"field_name": field_name, "metric": metric.value}
-            for field_name, metric in vector_index]
+        # For backward compatibility only.
+        if len(vector_indexes) == 0 and len(vector_index) > 0:
+            vector_indexes = [index.VectorIndexDef(field_name=field_name, metric=metric)
+                             for field_name, metric in vector_index]
+
+        index_options = index.IndexOptions(
+            primary_key_fields=primary_key_fields,
+            vector_indexes=vector_indexes,
+        )
         self._flow_builder_state.engine_flow_builder.export(
             name, _spec_kind(target_spec), dump_engine_object(target_spec),
-            index_options, self._engine_data_collector, setup_by_user)
+            dump_engine_object(index_options), self._engine_data_collector, setup_by_user)
 
 
 _flow_name_builder = _NameBuilder()

diff --git a/python/cocoindex/index.py b/python/cocoindex/index.py
@@ -1,6 +1,6 @@
 from enum import Enum
 from dataclasses import dataclass
-
+from typing import Sequence
 class VectorSimilarityMetric(Enum):
     COSINE_SIMILARITY = "CosineSimilarity"
     L2_DISTANCE = "L2Distance"
@@ -19,5 +19,5 @@ class IndexOptions:
     """
     Options for an index.
     """
-    primary_key_fields: list[str] | None = None
-    vector_index_defs: list[VectorIndexDef] | None = None
+    primary_key_fields: Sequence[str]
+    vector_indexes: Sequence[VectorIndexDef] = ()
diff --git a/python/cocoindex/storages.py b/python/cocoindex/storages.py
@@ -35,8 +35,8 @@ class Neo4jRelationshipEndSpec:
 @dataclass
 class Neo4jRelationshipNodeSpec:
     """Spec for a Neo4j node type."""
-    key_field_name: str | None = None
-    index_options: index.IndexOptions | None = None
+    primary_key_fields: list[str]
+    vector_indexes: list[index.VectorIndexDef] | None = None
 class Neo4jRelationship(op.StorageSpec):
     """Graph storage powered by Neo4j."""
 

diff --git a/src/base/spec.rs b/src/base/spec.rs
@@ -232,7 +232,7 @@ pub struct IndexOptions {
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub primary_key_fields: Option<Vec<FieldName>>,
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    pub vector_index_defs: Vec<VectorIndexDef>,
+    pub vector_indexes: Vec<VectorIndexDef>,
 }
 
 /// Store data to a given sink.

diff --git a/src/execution/query.rs b/src/execution/query.rs
@@ -40,7 +40,7 @@ impl SimpleSemanticsQueryHandler {
             .position(|export_op| export_op.name == target_name)
             .unwrap();
         let export_op = &flow.flow_instance.export_ops[export_op_idx];
-        let vector_index_defs = &export_op.spec.index_options.vector_index_defs;
+        let vector_indexes = &export_op.spec.index_options.vector_indexes;
         let execution_plan = flow.get_execution_plan().await?;
         let analyzed_export_op = &execution_plan.export_ops[export_op_idx];
         Ok(Self {
@@ -55,8 +55,8 @@ impl SimpleSemanticsQueryHandler {
             },
             query_transform_flow,
             default_similarity_metric,
-            default_vector_field_name: if vector_index_defs.len() == 1 {
-                Some(vector_index_defs[0].field_name.clone())
+            default_vector_field_name: if vector_indexes.len() == 1 {
+                Some(vector_indexes[0].field_name.clone())
             } else {
                 None
             },

diff --git a/src/ops/storages/neo4j.rs b/src/ops/storages/neo4j.rs
@@ -39,6 +39,7 @@ pub struct RelationshipEndSpec {
 
 #[derive(Debug, Deserialize)]
 pub struct RelationshipNodeSpec {
+    #[serde(flatten)]
     index_options: spec::IndexOptions,
 }
 
@@ -586,7 +587,7 @@ impl NodeLabelSetupState {
             key_constraint_name,
             vector_indexes: spec
                 .index_options
-                .vector_index_defs
+                .vector_indexes
                 .iter()
                 .map(|v| -> Result<_> {
                     Ok((
@@ -639,7 +640,7 @@ impl RelationshipSetupState {
             key_field_names,
             key_constraint_name: format!("r__{}__key", spec.rel_type),
             vector_indexes: index_options
-                .vector_index_defs
+                .vector_indexes
                 .iter()
                 .map(|v| -> Result<_> {
                     Ok((

diff --git a/src/ops/storages/postgres.rs b/src/ops/storages/postgres.rs
@@ -508,7 +508,7 @@ impl SetupState {
                 .map(|f| (f.name.clone(), f.value_type.typ.without_attrs()))
                 .collect(),
             vector_indexes: index_options
-                .vector_index_defs
+                .vector_indexes
                 .iter()
                 .map(|v| (to_vector_index_name(&table_id.table_name, v), v.clone()))
                 .collect(),
@@ -611,11 +611,11 @@ impl SetupStatusCheck {
                             .value_fields_schema
                             .iter()
                             .filter(|(field_name, schema)| {
-                                existing.possible_versions().any(|v| {
+                                !existing.current.as_ref().map_or(false, |v| {
                                     v.value_fields_schema
                                         .get(*field_name)
                                         .map(to_column_type_sql)
-                                        != Some(to_column_type_sql(schema))
+                                        == Some(to_column_type_sql(schema))
                                 })
                             })
                             .map(|(k, v)| (k.clone(), v.clone()))
@@ -639,9 +639,10 @@ impl SetupStatusCheck {
                         .vector_indexes
                         .iter()
                         .filter(|(name, def)| {
-                            existing
-                                .possible_versions()
-                                .any(|v| v.vector_indexes.get(*name) != Some(def))
+                            !existing
+                                .current
+                                .as_ref()
+                                .map_or(false, |v| v.vector_indexes.get(*name) != Some(def))
                         })
                         .map(|(k, v)| (k.clone(), v.clone()))
                         .collect(),
@@ -879,7 +880,7 @@ impl setup::ResourceSetupStatusCheck for SetupStatusCheck {
                 let sql = format!(
                     "CREATE INDEX IF NOT EXISTS {} ON {} {}",
                     index_name,
-                    index_spec.field_name,
+                    self.table_id.table_name,
                     to_index_spec_sql(index_spec)
                 );
                 sqlx::query(&sql).execute(&db_pool).await?;