Skip to content

Commit 993864d

Browse files
♻️ Improve DB query listing_projects performance (🗃️) (#7475)
1 parent ef99643 commit 993864d

File tree

13 files changed

+321
-209
lines changed

13 files changed

+321
-209
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""add indexes
2+
3+
Revision ID: cf8f743fd0b7
4+
Revises: 48604dfdc5f4
5+
Create Date: 2025-04-04 09:46:38.853675+00:00
6+
7+
"""
8+
9+
import sqlalchemy as sa
10+
from alembic import op
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "cf8f743fd0b7"
14+
down_revision = "48604dfdc5f4"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.create_index(
22+
"idx_project_to_groups_gid", "project_to_groups", ["gid"], unique=False
23+
)
24+
op.create_index(
25+
"idx_projects_last_change_date_desc",
26+
"projects",
27+
["last_change_date"],
28+
unique=False,
29+
postgresql_using="btree",
30+
postgresql_ops={"last_change_date": "DESC"},
31+
)
32+
op.create_index(
33+
"ix_projects_partial_type",
34+
"projects",
35+
["type"],
36+
unique=False,
37+
postgresql_where=sa.text("type = 'TEMPLATE'"),
38+
)
39+
op.create_index(
40+
"idx_project_to_folders_project_uuid",
41+
"projects_to_folders",
42+
["project_uuid"],
43+
unique=False,
44+
)
45+
op.create_index(
46+
"idx_project_to_folders_user_id",
47+
"projects_to_folders",
48+
["user_id"],
49+
unique=False,
50+
)
51+
op.create_index(
52+
"idx_projects_to_products_product_name",
53+
"projects_to_products",
54+
["product_name"],
55+
unique=False,
56+
)
57+
op.create_index(
58+
"idx_workspaces_access_rights_gid",
59+
"workspaces_access_rights",
60+
["gid"],
61+
unique=False,
62+
)
63+
# ### end Alembic commands ###
64+
65+
66+
def downgrade():
67+
# ### commands auto generated by Alembic - please adjust! ###
68+
op.drop_index(
69+
"idx_workspaces_access_rights_gid", table_name="workspaces_access_rights"
70+
)
71+
op.drop_index(
72+
"idx_projects_to_products_product_name", table_name="projects_to_products"
73+
)
74+
op.drop_index("idx_project_to_folders_user_id", table_name="projects_to_folders")
75+
op.drop_index(
76+
"idx_project_to_folders_project_uuid", table_name="projects_to_folders"
77+
)
78+
op.drop_index(
79+
"ix_projects_partial_type",
80+
table_name="projects",
81+
postgresql_where=sa.text("type = 'TEMPLATE'"),
82+
)
83+
op.drop_index(
84+
"idx_projects_last_change_date_desc",
85+
table_name="projects",
86+
postgresql_using="btree",
87+
postgresql_ops={"last_change_date": "DESC"},
88+
)
89+
op.drop_index("idx_project_to_groups_gid", table_name="project_to_groups")
90+
# ### end Alembic commands ###

packages/postgres-database/src/simcore_postgres_database/models/project_to_groups.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,5 @@
6060
column_created_datetime(timezone=True),
6161
column_modified_datetime(timezone=True),
6262
sa.UniqueConstraint("project_uuid", "gid"),
63+
sa.Index("idx_project_to_groups_gid", "gid"),
6364
)

packages/postgres-database/src/simcore_postgres_database/models/projects.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
""" Projects table
1+
"""Projects table"""
22

3-
"""
43
import enum
54

65
import sqlalchemy as sa
@@ -171,6 +170,20 @@ class ProjectType(enum.Enum):
171170
server_default=sa.text("'{}'::jsonb"),
172171
doc="DEPRECATED: Read/write/delete access rights of each group (gid) on this project",
173172
),
173+
### INDEXES ----------------------------
174+
sa.Index(
175+
"idx_projects_last_change_date_desc",
176+
"last_change_date",
177+
postgresql_using="btree",
178+
postgresql_ops={"last_change_date": "DESC"},
179+
),
180+
)
181+
182+
# We define the partial index
183+
sa.Index(
184+
"ix_projects_partial_type",
185+
projects.c.type,
186+
postgresql_where=(projects.c.type == ProjectType.TEMPLATE),
174187
)
175188

176189

packages/postgres-database/src/simcore_postgres_database/models/projects_to_folders.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,6 @@
4242
column_created_datetime(timezone=True),
4343
column_modified_datetime(timezone=True),
4444
sa.UniqueConstraint("project_uuid", "folder_id", "user_id"),
45+
sa.Index("idx_project_to_folders_project_uuid", "project_uuid"),
46+
sa.Index("idx_project_to_folders_user_id", "user_id"),
4547
)

packages/postgres-database/src/simcore_postgres_database/models/projects_to_products.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@
3434
column_created_datetime(timezone=False),
3535
column_modified_datetime(timezone=False),
3636
sa.UniqueConstraint("project_uuid", "product_name"),
37+
sa.Index("idx_projects_to_products_product_name", "product_name"),
3738
)

packages/postgres-database/src/simcore_postgres_database/models/workspaces_access_rights.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,5 @@
5757
column_created_datetime(timezone=True),
5858
column_modified_datetime(timezone=True),
5959
sa.UniqueConstraint("workspace_id", "gid"),
60+
sa.Index("idx_workspaces_access_rights_gid", "gid"),
6061
)

services/storage/src/simcore_service_storage/modules/db/access_layer.py

Lines changed: 82 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
)
5151
from simcore_postgres_database.storage_models import file_meta_data, user_to_groups
5252
from simcore_postgres_database.utils_repos import pass_or_acquire_connection
53-
from simcore_postgres_database.utils_sql import assemble_array_groups
5453
from sqlalchemy.ext.asyncio import AsyncConnection
5554

5655
from ...exceptions.errors import InvalidFileIdentifierError
@@ -89,112 +88,104 @@ def _aggregate_access_rights(
8988
return AccessRights.none()
9089

9190

92-
access_rights_subquery = (
93-
sa.select(
94-
project_to_groups.c.project_uuid,
95-
sa.func.jsonb_object_agg(
96-
project_to_groups.c.gid,
97-
sa.func.jsonb_build_object(
98-
"read",
99-
project_to_groups.c.read,
100-
"write",
101-
project_to_groups.c.write,
102-
"delete",
103-
project_to_groups.c.delete,
104-
),
91+
def my_private_workspace_access_rights_subquery(user_group_ids: list[GroupID]):
92+
return (
93+
sa.select(
94+
project_to_groups.c.project_uuid,
95+
sa.func.jsonb_object_agg(
96+
project_to_groups.c.gid,
97+
sa.func.jsonb_build_object(
98+
"read",
99+
project_to_groups.c.read,
100+
"write",
101+
project_to_groups.c.write,
102+
"delete",
103+
project_to_groups.c.delete,
104+
),
105+
).label("access_rights"),
106+
)
107+
.where(
108+
(project_to_groups.c.read) # Filters out entries where "read" is False
109+
& (
110+
project_to_groups.c.gid.in_(user_group_ids)
111+
) # Filters gid to be in user_groups
105112
)
106-
.filter(project_to_groups.c.read) # Filters out entries where "read" is False
107-
.label("access_rights"),
108-
).group_by(project_to_groups.c.project_uuid)
109-
).subquery("access_rights_subquery")
110-
111-
112-
workspace_access_rights_subquery = (
113-
sa.select(
114-
workspaces_access_rights.c.workspace_id,
115-
sa.func.jsonb_object_agg(
116-
workspaces_access_rights.c.gid,
117-
sa.func.jsonb_build_object(
118-
"read",
119-
workspaces_access_rights.c.read,
120-
"write",
121-
workspaces_access_rights.c.write,
122-
"delete",
123-
workspaces_access_rights.c.delete,
124-
),
113+
.group_by(project_to_groups.c.project_uuid)
114+
).subquery("my_access_rights_subquery")
115+
116+
117+
def my_shared_workspace_access_rights_subquery(user_group_ids: list[GroupID]):
118+
return (
119+
sa.select(
120+
workspaces_access_rights.c.workspace_id,
121+
sa.func.jsonb_object_agg(
122+
workspaces_access_rights.c.gid,
123+
sa.func.jsonb_build_object(
124+
"read",
125+
workspaces_access_rights.c.read,
126+
"write",
127+
workspaces_access_rights.c.write,
128+
"delete",
129+
workspaces_access_rights.c.delete,
130+
),
131+
).label("access_rights"),
132+
)
133+
.where(
134+
(
135+
workspaces_access_rights.c.read
136+
) # Filters out entries where "read" is False
137+
& (
138+
workspaces_access_rights.c.gid.in_(user_group_ids)
139+
) # Filters gid to be in user_groups
125140
)
126-
.filter(workspaces_access_rights.c.read)
127-
.label("access_rights"),
128-
).group_by(workspaces_access_rights.c.workspace_id)
129-
).subquery("workspace_access_rights_subquery")
141+
.group_by(workspaces_access_rights.c.workspace_id)
142+
).subquery("my_workspace_access_rights_subquery")
130143

131144

132-
async def _list_projects_access_rights(
145+
async def _list_user_projects_access_rights_with_read_access(
133146
connection: AsyncConnection, user_id: UserID
134-
) -> dict[ProjectID, AccessRights]:
147+
) -> list[ProjectID]:
135148
"""
136149
Returns access-rights of user (user_id) over all OWNED or SHARED projects
137150
"""
138151

139152
user_group_ids: list[GroupID] = await _get_user_groups_ids(connection, user_id)
153+
_my_access_rights_subquery = my_private_workspace_access_rights_subquery(
154+
user_group_ids
155+
)
140156

141157
private_workspace_query = (
142158
sa.select(
143159
projects.c.uuid,
144-
access_rights_subquery.c.access_rights,
145-
)
146-
.select_from(projects.join(access_rights_subquery, isouter=True))
147-
.where(
148-
(
149-
(projects.c.prj_owner == user_id)
150-
| sa.text(
151-
f"jsonb_exists_any(access_rights_subquery.access_rights, {assemble_array_groups(user_group_ids)})"
152-
)
153-
)
154-
& (projects.c.workspace_id.is_(None))
155160
)
161+
.select_from(projects.join(_my_access_rights_subquery))
162+
.where(projects.c.workspace_id.is_(None))
163+
)
164+
165+
_my_workspace_access_rights_subquery = my_shared_workspace_access_rights_subquery(
166+
user_group_ids
156167
)
157168

158169
shared_workspace_query = (
159-
sa.select(
160-
projects.c.uuid,
161-
workspace_access_rights_subquery.c.access_rights,
162-
)
170+
sa.select(projects.c.uuid)
163171
.select_from(
164172
projects.join(
165-
workspace_access_rights_subquery,
173+
_my_workspace_access_rights_subquery,
166174
projects.c.workspace_id
167-
== workspace_access_rights_subquery.c.workspace_id,
175+
== _my_workspace_access_rights_subquery.c.workspace_id,
168176
)
169177
)
170-
.where(
171-
(
172-
sa.text(
173-
f"jsonb_exists_any(workspace_access_rights_subquery.access_rights, {assemble_array_groups(user_group_ids)})"
174-
)
175-
)
176-
& (projects.c.workspace_id.is_not(None))
177-
)
178+
.where(projects.c.workspace_id.is_not(None))
178179
)
179180

180181
combined_query = sa.union_all(private_workspace_query, shared_workspace_query)
181182

182-
projects_access_rights = {}
183+
projects_access_rights = []
183184

184185
async for row in await connection.stream(combined_query):
185-
assert isinstance(row.access_rights, dict) # nosec
186186
assert isinstance(row.uuid, str) # nosec
187187

188-
if row.access_rights:
189-
# NOTE: access_rights should be direclty filtered from result in stm instead calling again user_group_ids
190-
projects_access_rights[ProjectID(row.uuid)] = _aggregate_access_rights(
191-
row.access_rights, user_group_ids
192-
)
193-
194-
else:
195-
# backwards compatibility
196-
# - no access_rights defined BUT project is owned
197-
projects_access_rights[ProjectID(row.uuid)] = AccessRights.all()
188+
projects_access_rights.append(ProjectID(row.uuid))
198189

199190
return projects_access_rights
200191

@@ -213,44 +204,40 @@ async def get_project_access_rights(
213204

214205
async with pass_or_acquire_connection(self.db_engine, connection) as conn:
215206
user_group_ids = await _get_user_groups_ids(conn, user_id)
207+
_my_access_rights_subquery = my_private_workspace_access_rights_subquery(
208+
user_group_ids
209+
)
216210

217211
private_workspace_query = (
218212
sa.select(
219213
projects.c.prj_owner,
220-
access_rights_subquery.c.access_rights,
214+
_my_access_rights_subquery.c.access_rights,
221215
)
222-
.select_from(projects.join(access_rights_subquery, isouter=True))
216+
.select_from(projects.join(_my_access_rights_subquery))
223217
.where(
224218
(projects.c.uuid == f"{project_id}")
225-
& (
226-
(projects.c.prj_owner == user_id)
227-
| sa.text(
228-
f"jsonb_exists_any(access_rights_subquery.access_rights, {assemble_array_groups(user_group_ids)})"
229-
)
230-
)
231219
& (projects.c.workspace_id.is_(None))
232220
)
233221
)
234222

223+
_my_workspace_access_rights_subquery = (
224+
my_shared_workspace_access_rights_subquery(user_group_ids)
225+
)
226+
235227
shared_workspace_query = (
236228
sa.select(
237229
projects.c.prj_owner,
238-
workspace_access_rights_subquery.c.access_rights,
230+
_my_workspace_access_rights_subquery.c.access_rights,
239231
)
240232
.select_from(
241233
projects.join(
242-
workspace_access_rights_subquery,
234+
_my_workspace_access_rights_subquery,
243235
projects.c.workspace_id
244-
== workspace_access_rights_subquery.c.workspace_id,
236+
== _my_workspace_access_rights_subquery.c.workspace_id,
245237
)
246238
)
247239
.where(
248240
(projects.c.uuid == f"{project_id}")
249-
& (
250-
sa.text(
251-
f"jsonb_exists_any(workspace_access_rights_subquery.access_rights, {assemble_array_groups(user_group_ids)})"
252-
)
253-
)
254241
& (projects.c.workspace_id.is_not(None))
255242
)
256243
)
@@ -358,5 +345,6 @@ async def get_readable_project_ids(
358345
) -> list[ProjectID]:
359346
"""Returns a list of projects where user has granted read-access"""
360347
async with pass_or_acquire_connection(self.db_engine, connection) as conn:
361-
projects_access_rights = await _list_projects_access_rights(conn, user_id)
362-
return [pid for pid, access in projects_access_rights.items() if access.read]
348+
return await _list_user_projects_access_rights_with_read_access(
349+
conn, user_id
350+
)

0 commit comments

Comments
 (0)