Skip to content

Commit 94a07f9

Browse files
tylerjereddyWarrenWeckesserj-bowhay
committed
BUG: cluster: cophenet intercept invalid linkage matrix count (scipy#22187)
* BUG: catch invalid linkage count * Fixes scipygh-22183. * The input data from scipygh-22183 causes an out of bounds memory access in a 1-D `memoryview` in the Cython `cophenetic_distances` function. Prevent this by enforcing a check for an allowable upper bound on the cluster membership (4th column) of the linkage matrix `Z` received by `is_valid_linkage`. Co-authored-by: Warren Weckesser <[email protected]> * TST: PR 22187 revisions * Adjust `test_gh_22183()` such that the linkage matrix is invalid only because of an excessive observation count, rather than also being invalid because the large observation count is not integral. * Adjust `test_gh_22183()` such that is properly flushes through array API backends. * DOC: backticks --------- Co-authored-by: Warren Weckesser <[email protected]> Co-authored-by: Jake Bowhay <[email protected]>
1 parent 55de424 commit 94a07f9

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

scipy/cluster/_hierarchy.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,10 @@ def cophenetic_distances(const double[:, :] Z, double[:] d, int n):
365365
# back to the root of current subtree
366366
dist = Z[root, 2]
367367
right_start = left_start[k] + n_lc
368+
# NOTE: an invalid linkage matrix (gh-22183)
369+
# can cause an out of bounds memory access
370+
# of `j` on `members` memoryview below, if not
371+
# caught ahead of time by `is_valid_linkage`
368372
for i in range(left_start[k], right_start):
369373
for j in range(right_start, right_start + n_rc):
370374
d[condensed_index(n, members[i], members[j])] = dist

scipy/cluster/hierarchy.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,6 +2157,10 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None):
21572157
I.e., a cluster cannot join another cluster unless the cluster being joined
21582158
has been generated.
21592159
2160+
The fourth column of `Z` represents the number of original observations
2161+
in a cluster, so a valid ``Z[i, 3]`` value may not exceed the number of
2162+
original observations.
2163+
21602164
Parameters
21612165
----------
21622166
Z : array_like
@@ -2244,6 +2248,9 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None):
22442248
raise ValueError(f'Linkage {name_str}contains negative distances.')
22452249
if xp.any(Z[:, 3] < 0):
22462250
raise ValueError(f'Linkage {name_str}contains negative counts.')
2251+
if xp.any(Z[:, 3] > (Z.shape[0] + 1)):
2252+
raise ValueError('Linkage matrix contains excessive observations'
2253+
'in a cluster')
22472254
if _check_hierarchy_uses_cluster_before_formed(Z):
22482255
raise ValueError(f'Linkage {name_str}uses non-singleton cluster before'
22492256
' it is formed.')

scipy/cluster/tests/test_hierarchy.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,25 @@ def test_linkage_cophenet_tdist_Z_Y(self, xp):
200200
xp_assert_close(c, expectedc, atol=1e-10)
201201
xp_assert_close(M, expectedM, atol=1e-10)
202202

203+
def test_gh_22183(self, xp):
204+
# check for lack of segfault
205+
# (out of bounds memory access)
206+
# and correct interception of
207+
# invalid linkage matrix
208+
arr=[[0.0, 1.0, 1.0, 2.0],
209+
[2.0, 12.0, 1.0, 3.0],
210+
[3.0, 4.0, 1.0, 2.0],
211+
[5.0, 14.0, 1.0, 3.0],
212+
[6.0, 7.0, 1.0, 2.0],
213+
[8.0, 16.0, 1.0, 3.0],
214+
[9.0, 10.0, 1.0, 2.0],
215+
[11.0, 18.0, 1.0, 3.0],
216+
[13.0, 15.0, 2.0, 6.0],
217+
[17.0, 20.0, 2.0, 32.0],
218+
[19.0, 21.0, 2.0, 12.0]]
219+
with pytest.raises(ValueError, match="excessive observations"):
220+
cophenet(xp.asarray(arr))
221+
203222

204223
class TestMLabLinkageConversion:
205224

0 commit comments

Comments
 (0)