Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit d4ca0e6

Browse files
sungchun12Sung Won Chung
and
Sung Won Chung
authored
Fix motherduck bug (#858) (#861)
* quick debug logs * fix the override method * remove submods * remove prints * remove submods * revert change * Refactor dynamic database clause in DuckDB.py * draft tests * Add validation for input path in select_table_schema method * style fixes by ruff * Revert "Add validation for input path in select_table_schema method" This reverts commit c09f9cf. * Remove unnecessary code in test_duckdb.py --------- Co-authored-by: Sung Won Chung <[email protected]> Co-authored-by: sungchun12 <[email protected]>
1 parent 2396ed1 commit d4ca0e6

File tree

3 files changed

+49
-1
lines changed

3 files changed

+49
-1
lines changed

Diff for: data_diff/databases/base.py

+2
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,7 @@ def query_table_schema(self, path: DbPath) -> Dict[str, RawColumnInfo]:
10461046
accessing the schema using a SQL query.
10471047
"""
10481048
rows = self.query(self.select_table_schema(path), list, log_message=path)
1049+
10491050
if not rows:
10501051
raise RuntimeError(f"{self.name}: Table '{'.'.join(path)}' does not exist, or has no columns")
10511052

@@ -1060,6 +1061,7 @@ def query_table_schema(self, path: DbPath) -> Dict[str, RawColumnInfo]:
10601061
)
10611062
for r in rows
10621063
}
1064+
10631065
assert len(d) == len(rows)
10641066
return d
10651067

Diff for: data_diff/databases/duckdb.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,16 @@ def select_table_schema(self, path: DbPath) -> str:
167167
database, schema, table = self._normalize_table_path(path)
168168

169169
info_schema_path = ["information_schema", "columns"]
170+
170171
if database:
171172
info_schema_path.insert(0, database)
173+
dynamic_database_clause = f"'{database}'"
174+
else:
175+
dynamic_database_clause = "current_catalog()"
172176

173177
return (
174178
f"SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM {'.'.join(info_schema_path)} "
175-
f"WHERE table_name = '{table}' AND table_schema = '{schema}'"
179+
f"WHERE table_name = '{table}' AND table_schema = '{schema}' and table_catalog = {dynamic_database_clause}"
176180
)
177181

178182
def _normalize_table_path(self, path: DbPath) -> DbPath:

Diff for: tests/test_duckdb.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import unittest
2+
from data_diff.databases import duckdb as duckdb_differ
3+
import os
4+
import uuid
5+
6+
test_duckdb_filepath = str(uuid.uuid4()) + ".duckdb"
7+
8+
9+
class TestDuckDBTableSchemaMethods(unittest.TestCase):
10+
def setUp(self):
11+
# Create a new duckdb file
12+
self.duckdb_conn = duckdb_differ.DuckDB(filepath=test_duckdb_filepath)
13+
14+
def tearDown(self):
15+
# Optional: delete file after tests
16+
os.remove(test_duckdb_filepath)
17+
18+
def test_normalize_table_path(self):
19+
self.assertEqual(self.duckdb_conn._normalize_table_path(("test_table",)), (None, "main", "test_table"))
20+
self.assertEqual(
21+
self.duckdb_conn._normalize_table_path(("test_schema", "test_table")), (None, "test_schema", "test_table")
22+
)
23+
self.assertEqual(
24+
self.duckdb_conn._normalize_table_path(("test_database", "test_schema", "test_table")),
25+
("test_database", "test_schema", "test_table"),
26+
)
27+
28+
with self.assertRaises(ValueError):
29+
self.duckdb_conn._normalize_table_path(("test_database", "test_schema", "test_table", "extra"))
30+
31+
def test_select_table_schema(self):
32+
db_path = ("test_table",)
33+
expected_sql = "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM information_schema.columns WHERE table_name = 'test_table' AND table_schema = 'main' and table_catalog = current_catalog()"
34+
self.assertEqual(self.duckdb_conn.select_table_schema(db_path), expected_sql)
35+
36+
db_path = ("custom_schema", "test_table")
37+
expected_sql = "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM information_schema.columns WHERE table_name = 'test_table' AND table_schema = 'custom_schema' and table_catalog = current_catalog()"
38+
self.assertEqual(self.duckdb_conn.select_table_schema(db_path), expected_sql)
39+
40+
db_path = ("custom_db", "custom_schema", "test_table")
41+
expected_sql = "SELECT column_name, data_type, datetime_precision, numeric_precision, numeric_scale FROM custom_db.information_schema.columns WHERE table_name = 'test_table' AND table_schema = 'custom_schema' and table_catalog = 'custom_db'"
42+
self.assertEqual(self.duckdb_conn.select_table_schema(db_path), expected_sql)

0 commit comments

Comments
 (0)