Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 75e3e52

Browse files
committed
Fixes for PR
1 parent 6df9d37 commit 75e3e52

File tree

3 files changed

+37
-25
lines changed

3 files changed

+37
-25
lines changed

data_diff/database.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ def close(self):
173173
"Close connection(s) to the database instance. Querying will stop functioning."
174174
...
175175

176-
177176
@abstractmethod
178177
def normalize_timestamp(self, value: str, coltype: ColType) -> str:
179178
"""Creates an SQL expression, that converts 'value' to a normalized timestamp.
@@ -282,7 +281,12 @@ def _convert_db_precision_to_digits(self, p: int) -> int:
282281
return math.floor(math.log(2**p, 10))
283282

284283
def _parse_type(
285-
self, type_repr: str, datetime_precision: int = None, numeric_precision: int = None, numeric_scale: int = None
284+
self,
285+
col_name: str,
286+
type_repr: str,
287+
datetime_precision: int = None,
288+
numeric_precision: int = None,
289+
numeric_scale: int = None,
286290
) -> ColType:
287291
""" """
288292

@@ -302,7 +306,7 @@ def _parse_type(
302306

303307
elif issubclass(cls, Decimal):
304308
if numeric_scale is None:
305-
raise ValueError(f"{self.name}: Unexpected numeric_scale is NULL, for column of type {type_repr}.")
309+
raise ValueError(f"{self.name}: Unexpected numeric_scale is NULL, for column {col_name} of type {type_repr}.")
306310
return cls(precision=numeric_scale)
307311

308312
assert issubclass(cls, Float)
@@ -333,7 +337,7 @@ def query_table_schema(self, path: DbPath, filter_columns: Optional[Sequence[str
333337
rows = [r for r in rows if r[0].lower() in accept]
334338

335339
# Return a dict of form {name: type} after normalization
336-
return {row[0]: self._parse_type(*row[1:]) for row in rows}
340+
return {row[0]: self._parse_type(*row) for row in rows}
337341

338342
# @lru_cache()
339343
# def get_table_schema(self, path: DbPath) -> Dict[str, ColType]:
@@ -344,13 +348,10 @@ def _normalize_table_path(self, path: DbPath) -> DbPath:
344348
if self.default_schema:
345349
return self.default_schema, path[0]
346350
elif len(path) != 2:
347-
raise ValueError(
348-
f"{self.name}: Bad table path for {self}: '{'.'.join(path)}'. Expected form: schema.table"
349-
)
351+
raise ValueError(f"{self.name}: Bad table path for {self}: '{'.'.join(path)}'. Expected form: schema.table")
350352

351353
return path
352354

353-
354355
def parse_table_name(self, name: str) -> DbPath:
355356
return parse_table_name(name)
356357

@@ -446,13 +447,14 @@ def md5_to_int(self, s: str) -> str:
446447
def to_string(self, s: str):
447448
return f"{s}::varchar"
448449

449-
450450
def normalize_timestamp(self, value: str, coltype: ColType) -> str:
451451
if coltype.rounds:
452452
return f"to_char({value}::timestamp({coltype.precision}), 'YYYY-mm-dd HH24:MI:SS.US')"
453453

454454
timestamp6 = f"to_char({value}::timestamp(6), 'YYYY-mm-dd HH24:MI:SS.US')"
455-
return f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
455+
return (
456+
f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
457+
)
456458

457459
def normalize_number(self, value: str, coltype: ColType) -> str:
458460
return self.to_string(f"{value}::decimal(38, {coltype.precision})")
@@ -502,9 +504,7 @@ def normalize_timestamp(self, value: str, coltype: ColType) -> str:
502504
else:
503505
s = f"date_format(cast({value} as timestamp(6)), '%Y-%m-%d %H:%i:%S.%f')"
504506

505-
return (
506-
f"RPAD(RPAD({s}, {TIMESTAMP_PRECISION_POS+coltype.precision}, '.'), {TIMESTAMP_PRECISION_POS+6}, '0')"
507-
)
507+
return f"RPAD(RPAD({s}, {TIMESTAMP_PRECISION_POS+coltype.precision}, '.'), {TIMESTAMP_PRECISION_POS+6}, '0')"
508508

509509
def normalize_number(self, value: str, coltype: ColType) -> str:
510510
return self.to_string(f"cast({value} as decimal(38,{coltype.precision}))")
@@ -517,7 +517,9 @@ def select_table_schema(self, path: DbPath) -> str:
517517
f"WHERE table_name = '{table}' AND table_schema = '{schema}'"
518518
)
519519

520-
def _parse_type(self, type_repr: str, datetime_precision: int = None, numeric_precision: int = None) -> ColType:
520+
def _parse_type(
521+
self, col_name: str, type_repr: str, datetime_precision: int = None, numeric_precision: int = None
522+
) -> ColType:
521523
regexps = {
522524
r"timestamp\((\d)\)": Timestamp,
523525
r"timestamp\((\d)\) with time zone": TimestampTZ,
@@ -607,7 +609,6 @@ def normalize_number(self, value: str, coltype: ColType) -> str:
607609
return self.to_string(f"cast({value} as decimal(38, {coltype.precision}))")
608610

609611

610-
611612
class Oracle(ThreadedDatabase):
612613
ROUNDS_ON_PREC_LOSS = True
613614

@@ -661,7 +662,12 @@ def normalize_number(self, value: str, coltype: ColType) -> str:
661662
return f"to_char({value}, '{format_str}')"
662663

663664
def _parse_type(
664-
self, type_repr: str, datetime_precision: int = None, numeric_precision: int = None, numeric_scale: int = None
665+
self,
666+
col_name: str,
667+
type_repr: str,
668+
datetime_precision: int = None,
669+
numeric_precision: int = None,
670+
numeric_scale: int = None,
665671
) -> ColType:
666672
""" """
667673
regexps = {
@@ -720,15 +726,18 @@ def normalize_timestamp(self, value: str, coltype: ColType) -> str:
720726
us = f"extract(us from {timestamp})"
721727
# epoch = Total time since epoch in microseconds.
722728
epoch = f"{secs}*1000000 + {ms}*1000 + {us}"
723-
timestamp6 = f"to_char({epoch}, -6+{coltype.precision}) * interval '0.000001 seconds', 'YYYY-mm-dd HH24:MI:SS.US')"
729+
timestamp6 = (
730+
f"to_char({epoch}, -6+{coltype.precision}) * interval '0.000001 seconds', 'YYYY-mm-dd HH24:MI:SS.US')"
731+
)
724732
else:
725733
timestamp6 = f"to_char({value}::timestamp(6), 'YYYY-mm-dd HH24:MI:SS.US')"
726-
return f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
734+
return (
735+
f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
736+
)
727737

728738
def normalize_number(self, value: str, coltype: ColType) -> str:
729739
return self.to_string(f"{value}::decimal(38,{coltype.precision})")
730740

731-
732741
def select_table_schema(self, path: DbPath) -> str:
733742
schema, table = self._normalize_table_path(path)
734743

@@ -838,7 +847,9 @@ def normalize_timestamp(self, value: str, coltype: ColType) -> str:
838847
return f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', {value})"
839848

840849
timestamp6 = f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', {value})"
841-
return f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
850+
return (
851+
f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
852+
)
842853

843854
def normalize_number(self, value: str, coltype: ColType) -> str:
844855
if isinstance(coltype, Integer):

data_diff/diff_tables.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,10 @@ def _validate_and_adjust_columns(self, table1, table2):
386386
for c in t._relevant_columns:
387387
ctype = t._schema[c]
388388
if isinstance(ctype, UnknownColType):
389-
logger.warn(f"[{t.database.name}] Column '{c}' of type '{ctype.text}' has no compatibility handling. "
390-
"If encoding/formatting differs between databases, it may result in false positives.")
389+
logger.warn(
390+
f"[{t.database.name}] Column '{c}' of type '{ctype.text}' has no compatibility handling. "
391+
"If encoding/formatting differs between databases, it may result in false positives."
392+
)
391393

392394
def _bisect_and_diff_tables(self, table1, table2, level=0, max_rows=None):
393395
assert table1.is_bounded and table2.is_bounded

tests/test_diff_tables.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,16 @@ def tearDownClass(cls):
3232
cls.preql.close()
3333
cls.connection.close()
3434

35-
3635
# Fallback for test runners that doesn't support setUpClass/tearDownClass
3736
def setUp(self) -> None:
38-
if not hasattr(self, 'connection'):
37+
if not hasattr(self, "connection"):
3938
self.setUpClass.__func__(self)
4039
self.private_connection = True
4140

4241
return super().setUp()
4342

4443
def tearDown(self) -> None:
45-
if hasattr(self, 'private_connection'):
44+
if hasattr(self, "private_connection"):
4645
self.tearDownClass.__func__(self)
4746

4847
return super().tearDown()

0 commit comments

Comments
 (0)