Skip to content

Commit db23573

Browse files
committed
refactor: use sets to collect data
Coverage.py predates sets as a built-in data structure, so the file data collection has long been dicts with None as the values. Sets are available to us now (since Python 2.4 in 2004, which coverage.py dropped support for in 2014!), we use sets.
1 parent f6d3e88 commit db23573

File tree

6 files changed

+60
-62
lines changed

6 files changed

+60
-62
lines changed

coverage/ctracer/datastack.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* possible.
1313
*/
1414
typedef struct DataStackEntry {
15-
/* The current file_data dictionary. Owned. */
15+
/* The current file_data set. Owned. */
1616
PyObject * file_data;
1717

1818
/* The disposition object for this frame. A borrowed instance of CFileDisposition. */

coverage/ctracer/tracer.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ CTracer_record_pair(CTracer *self, int l1, int l2)
182182
goto error;
183183
}
184184

185-
if (PyDict_SetItem(self->pcur_entry->file_data, t, Py_None) < 0) {
185+
if (PySet_Add(self->pcur_entry->file_data, t) < 0) {
186186
goto error;
187187
}
188188

@@ -504,7 +504,7 @@ CTracer_handle_call(CTracer *self, PyFrameObject *frame)
504504
if (PyErr_Occurred()) {
505505
goto error;
506506
}
507-
file_data = PyDict_New();
507+
file_data = PySet_New(NULL);
508508
if (file_data == NULL) {
509509
goto error;
510510
}
@@ -674,7 +674,7 @@ CTracer_handle_line(CTracer *self, PyFrameObject *frame)
674674
goto error;
675675
}
676676

677-
ret2 = PyDict_SetItem(self->pcur_entry->file_data, this_line, Py_None);
677+
ret2 = PySet_Add(self->pcur_entry->file_data, this_line);
678678
Py_DECREF(this_line);
679679
if (ret2 < 0) {
680680
goto error;

coverage/ctracer/tracer.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,14 @@ typedef struct CTracer {
3939
PyObject * context;
4040

4141
/*
42-
The data stack is a stack of dictionaries. Each dictionary collects
42+
The data stack is a stack of sets. Each set collects
4343
data for a single source file. The data stack parallels the call stack:
4444
each call pushes the new frame's file data onto the data stack, and each
4545
return pops file data off.
4646
47-
The file data is a dictionary whose form depends on the tracing options.
48-
If tracing arcs, the keys are line number pairs. If not tracing arcs,
49-
the keys are line numbers. In both cases, the value is irrelevant
50-
(None).
47+
The file data is a set whose form depends on the tracing options.
48+
If tracing arcs, the values are line number pairs. If not tracing arcs,
49+
the values are line numbers.
5150
*/
5251

5352
DataStack data_stack; /* Used if we aren't doing concurrency. */

coverage/pytracer.py

+15-15
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def __init__(self):
4848
# The threading module to use, if any.
4949
self.threading = None
5050

51-
self.cur_file_dict = None
51+
self.cur_file_data = None
5252
self.last_line = 0 # int, but uninitialized.
5353
self.cur_file_name = None
5454
self.context = None
@@ -113,18 +113,18 @@ def _trace(self, frame, event, arg_unused):
113113
self.log(">", f.f_code.co_filename, f.f_lineno, f.f_code.co_name, f.f_trace)
114114
f = f.f_back
115115
sys.settrace(None)
116-
self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
116+
self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
117117
self.data_stack.pop()
118118
)
119119
return None
120120

121121
if self.last_exc_back:
122122
if frame == self.last_exc_back:
123123
# Someone forgot a return event.
124-
if self.trace_arcs and self.cur_file_dict:
124+
if self.trace_arcs and self.cur_file_data:
125125
pair = (self.last_line, -self.last_exc_firstlineno)
126-
self.cur_file_dict[pair] = None
127-
self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
126+
self.cur_file_data.add(pair)
127+
self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
128128
self.data_stack.pop()
129129
)
130130
self.last_exc_back = None
@@ -150,7 +150,7 @@ def _trace(self, frame, event, arg_unused):
150150
self._activity = True
151151
self.data_stack.append(
152152
(
153-
self.cur_file_dict,
153+
self.cur_file_data,
154154
self.cur_file_name,
155155
self.last_line,
156156
self.started_context,
@@ -163,12 +163,12 @@ def _trace(self, frame, event, arg_unused):
163163
disp = self.should_trace(filename, frame)
164164
self.should_trace_cache[filename] = disp
165165

166-
self.cur_file_dict = None
166+
self.cur_file_data = None
167167
if disp.trace:
168168
tracename = disp.source_filename
169169
if tracename not in self.data:
170-
self.data[tracename] = {}
171-
self.cur_file_dict = self.data[tracename]
170+
self.data[tracename] = set()
171+
self.cur_file_data = self.data[tracename]
172172
# The call event is really a "start frame" event, and happens for
173173
# function calls and re-entering generators. The f_lasti field is
174174
# -1 for calls, and a real offset for generators. Use <0 as the
@@ -179,25 +179,25 @@ def _trace(self, frame, event, arg_unused):
179179
self.last_line = frame.f_lineno
180180
elif event == 'line':
181181
# Record an executed line.
182-
if self.cur_file_dict is not None:
182+
if self.cur_file_data is not None:
183183
lineno = frame.f_lineno
184184

185185
if self.trace_arcs:
186-
self.cur_file_dict[(self.last_line, lineno)] = None
186+
self.cur_file_data.add((self.last_line, lineno))
187187
else:
188-
self.cur_file_dict[lineno] = None
188+
self.cur_file_data.add(lineno)
189189
self.last_line = lineno
190190
elif event == 'return':
191-
if self.trace_arcs and self.cur_file_dict:
191+
if self.trace_arcs and self.cur_file_data:
192192
# Record an arc leaving the function, but beware that a
193193
# "return" event might just mean yielding from a generator.
194194
# Jython seems to have an empty co_code, so just assume return.
195195
code = frame.f_code.co_code
196196
if (not code) or code[frame.f_lasti] != YIELD_VALUE:
197197
first = frame.f_code.co_firstlineno
198-
self.cur_file_dict[(self.last_line, -first)] = None
198+
self.cur_file_data.add((self.last_line, -first))
199199
# Leaving this function, pop the filename stack.
200-
self.cur_file_dict, self.cur_file_name, self.last_line, self.started_context = (
200+
self.cur_file_data, self.cur_file_name, self.last_line, self.started_context = (
201201
self.data_stack.pop()
202202
)
203203
# Leaving a context?

coverage/sqldata.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -450,9 +450,9 @@ def data_filename(self):
450450
def add_lines(self, line_data):
451451
"""Add measured line data.
452452
453-
`line_data` is a dictionary mapping file names to dictionaries::
453+
`line_data` is a dictionary mapping file names to iterables of ints::
454454
455-
{ filename: { lineno: None, ... }, ...}
455+
{ filename: { line1, line2, ... }, ...}
456456
457457
"""
458458
if self._debug.should('dataop'):
@@ -483,9 +483,10 @@ def add_lines(self, line_data):
483483
def add_arcs(self, arc_data):
484484
"""Add measured arc data.
485485
486-
`arc_data` is a dictionary mapping file names to dictionaries::
486+
`arc_data` is a dictionary mapping file names to iterables of pairs of
487+
ints::
487488
488-
{ filename: { (l1,l2): None, ... }, ...}
489+
{ filename: { (l1,l2), (l1,l2), ... }, ...}
489490
490491
"""
491492
if self._debug.should('dataop'):

tests/test_data.py

+32-34
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,24 @@
2424

2525

2626
LINES_1 = {
27-
'a.py': {1: None, 2: None},
28-
'b.py': {3: None},
27+
'a.py': {1, 2},
28+
'b.py': {3},
2929
}
3030
SUMMARY_1 = {'a.py': 2, 'b.py': 1}
3131
MEASURED_FILES_1 = ['a.py', 'b.py']
3232
A_PY_LINES_1 = [1, 2]
3333
B_PY_LINES_1 = [3]
3434

3535
LINES_2 = {
36-
'a.py': {1: None, 5: None},
37-
'c.py': {17: None},
36+
'a.py': {1, 5},
37+
'c.py': {17},
3838
}
3939
SUMMARY_1_2 = {'a.py': 3, 'b.py': 1, 'c.py': 1}
4040
MEASURED_FILES_1_2 = ['a.py', 'b.py', 'c.py']
4141

4242
ARCS_3 = {
43-
'x.py': {
44-
(-1, 1): None,
45-
(1, 2): None,
46-
(2, 3): None,
47-
(3, -1): None,
48-
},
49-
'y.py': {
50-
(-1, 17): None,
51-
(17, 23): None,
52-
(23, -1): None,
53-
},
43+
'x.py': {(-1, 1), (1, 2), (2, 3), (3, -1)},
44+
'y.py': {(-1, 17), (17, 23), (23, -1)},
5445
}
5546
X_PY_ARCS_3 = [(-1, 1), (1, 2), (2, 3), (3, -1)]
5647
Y_PY_ARCS_3 = [(-1, 17), (17, 23), (23, -1)]
@@ -60,15 +51,8 @@
6051
Y_PY_LINES_3 = [17, 23]
6152

6253
ARCS_4 = {
63-
'x.py': {
64-
(-1, 2): None,
65-
(2, 5): None,
66-
(5, -1): None,
67-
},
68-
'z.py': {
69-
(-1, 1000): None,
70-
(1000, -1): None,
71-
},
54+
'x.py': {(-1, 2), (2, 5), (5, -1)},
55+
'z.py': {(-1, 1000), (1000, -1)},
7256
}
7357
SUMMARY_3_4 = {'x.py': 4, 'y.py': 2, 'z.py': 1}
7458
MEASURED_FILES_3_4 = ['x.py', 'y.py', 'z.py']
@@ -103,6 +87,16 @@ def assert_arcs3_data(self, covdata):
10387
assert covdata.has_arcs()
10488

10589

90+
def dicts_from_sets(file_data):
91+
"""Convert a dict of sets into a dict of dicts.
92+
93+
Before 6.0, file data was a dict with None as the values. In 6.0, file
94+
data is a set. SqlData all along only cared that it was an iterable.
95+
This function helps us test that the old dict format still works.
96+
"""
97+
return {k: dict.fromkeys(v) for k, v in file_data.items()}
98+
99+
106100
class CoverageDataTest(DataTestHelpers, CoverageTest):
107101
"""Test cases for CoverageData."""
108102

@@ -130,14 +124,16 @@ def test_empty_arc_data_is_false(self):
130124
covdata.add_arcs({})
131125
assert not covdata
132126

133-
def test_adding_lines(self):
127+
@pytest.mark.parametrize("lines", [LINES_1, dicts_from_sets(LINES_1)])
128+
def test_adding_lines(self, lines):
134129
covdata = CoverageData()
135-
covdata.add_lines(LINES_1)
130+
covdata.add_lines(lines)
136131
self.assert_lines1_data(covdata)
137132

138-
def test_adding_arcs(self):
133+
@pytest.mark.parametrize("arcs", [ARCS_3, dicts_from_sets(ARCS_3)])
134+
def test_adding_arcs(self, arcs):
139135
covdata = CoverageData()
140-
covdata.add_arcs(ARCS_3)
136+
covdata.add_arcs(arcs)
141137
self.assert_arcs3_data(covdata)
142138

143139
def test_ok_to_add_lines_twice(self):
@@ -212,20 +208,22 @@ def test_contexts_by_lineno_with_lines(self):
212208
covdata.add_lines(LINES_1)
213209
assert covdata.contexts_by_lineno('a.py') == {1: ['test_a'], 2: ['test_a']}
214210

215-
def test_no_duplicate_lines(self):
211+
@pytest.mark.parametrize("lines", [LINES_1, dicts_from_sets(LINES_1)])
212+
def test_no_duplicate_lines(self, lines):
216213
covdata = CoverageData()
217214
covdata.set_context("context1")
218-
covdata.add_lines(LINES_1)
215+
covdata.add_lines(lines)
219216
covdata.set_context("context2")
220-
covdata.add_lines(LINES_1)
217+
covdata.add_lines(lines)
221218
assert covdata.lines('a.py') == A_PY_LINES_1
222219

223-
def test_no_duplicate_arcs(self):
220+
@pytest.mark.parametrize("arcs", [ARCS_3, dicts_from_sets(ARCS_3)])
221+
def test_no_duplicate_arcs(self, arcs):
224222
covdata = CoverageData()
225223
covdata.set_context("context1")
226-
covdata.add_arcs(ARCS_3)
224+
covdata.add_arcs(arcs)
227225
covdata.set_context("context2")
228-
covdata.add_arcs(ARCS_3)
226+
covdata.add_arcs(arcs)
229227
assert covdata.arcs('x.py') == X_PY_ARCS_3
230228

231229
def test_no_arcs_vs_unmeasured_file(self):

0 commit comments

Comments
 (0)