Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 1c243f3

Browse files
committed
cli: only json from standard diff with --json
1 parent 94d1419 commit 1c243f3

File tree

3 files changed

+19
-9
lines changed

3 files changed

+19
-9
lines changed

data_diff/__main__.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
@click.option("--max-age", default=None, help="Considers only rows younger than specified. See --min-age.")
5252
@click.option("-s", "--stats", is_flag=True, help="Print stats instead of a detailed diff")
5353
@click.option("-d", "--debug", is_flag=True, help="Print debug info")
54-
@click.option("--json", 'json_output', is_flag=True, help="Print JSON output for --stats")
54+
@click.option("--json", 'json_output', is_flag=True, help="Print JSONL output for machine readability")
5555
@click.option("-v", "--verbose", is_flag=True, help="Print extra info")
5656
@click.option("-i", "--interactive", is_flag=True, help="Confirm queries, implies --debug")
5757
@click.option("--keep-column-case", is_flag=True, help="Don't use the schema to fix the case of given column names.")
@@ -160,16 +160,21 @@ def main(
160160
"different_-": minus,
161161
"total": max_table_count,
162162
}
163-
print(json.dumps(json_output, indent=2))
163+
print(json.dumps(json_output))
164164
else:
165165
print(f"Diff-Total: {len(diff)} changed rows out of {max_table_count}")
166166
print(f"Diff-Percent: {percent:.14f}%")
167167
print(f"Diff-Split: +{plus} -{minus}")
168168
else:
169169
for op, key in diff_iter:
170170
color = COLOR_SCHEME[op]
171-
jsonl = json.dumps([op, list(key)])
172-
rich.print(f"[{color}]{jsonl}[/{color}]")
171+
172+
if json_output:
173+
jsonl = json.dumps([op, list(key)])
174+
rich.print(f"[{color}]{jsonl}[/{color}]")
175+
else:
176+
rich.print(f"[{color}]{op} {key!r}[/{color}]")
177+
173178
sys.stdout.flush()
174179

175180
end = time.time()

data_diff/diff_tables.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -382,9 +382,10 @@ def _bisect_and_diff_tables(self, table1, table2, level=0, max_rows=None):
382382
rows1, rows2 = self._threaded_call("get_values", [table1, table2])
383383
diff = list(diff_sets(rows1, rows2))
384384

385-
# This happens when the initial bisection threshold is larger than
386-
# the table itself.
387-
if level == 0 and not self.stats.get("table1_count", False):
385+
# Initial bisection_threshold larger than count. Normally we always
386+
# checksum and count segments, even if we get the values. At the
387+
# first level, however, that won't be true.
388+
if level == 0:
388389
self.stats["table1_count"] = self.stats.get("table1_count", 0) + len(rows1)
389390
self.stats["table2_count"] = self.stats.get("table2_count", 0) + len(rows2)
390391

@@ -427,8 +428,8 @@ def _diff_tables(self, table1, table2, level=0, segment_index=None, segment_coun
427428
return
428429

429430
if level == 1:
430-
self.stats["table1_count"] = self.stats.get("table_count1", 0) + count1
431-
self.stats["table2_count"] = self.stats.get("table_count2", 0) + count2
431+
self.stats["table1_count"] = self.stats.get("table1_count", 0) + count1
432+
self.stats["table2_count"] = self.stats.get("table2_count", 0) + count2
432433

433434
if checksum1 != checksum2:
434435
yield from self._bisect_and_diff_tables(table1, table2, level=level, max_rows=max(count1, count2))

tests/test_diff_tables.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ def test_diff_small_tables(self):
155155
diff = list(self.differ.diff_tables(self.table, self.table2))
156156
expected = [("-", ("2", time + ".000000"))]
157157
self.assertEqual(expected, diff)
158+
self.assertEqual(2, self.differ.stats["table1_count"])
159+
self.assertEqual(1, self.differ.stats["table2_count"])
158160

159161
def test_diff_table_above_bisection_threshold(self):
160162
time = "2022-01-01 00:00:00"
@@ -176,6 +178,8 @@ def test_diff_table_above_bisection_threshold(self):
176178
diff = list(self.differ.diff_tables(self.table, self.table2))
177179
expected = [("-", ("5", time + ".000000"))]
178180
self.assertEqual(expected, diff)
181+
self.assertEqual(5, self.differ.stats["table1_count"])
182+
self.assertEqual(4, self.differ.stats["table2_count"])
179183

180184
def test_return_empty_array_when_same(self):
181185
time = "2022-01-01 00:00:00"

0 commit comments

Comments
 (0)