@@ -259,7 +259,7 @@ endmacro()
259
259
# in `SRC_CUDA_ARCHS` that is less or equal to the version in `TGT_CUDA_ARCHS`.
260
260
# We have special handling for 9.0a, if 9.0a is in `SRC_CUDA_ARCHS` and 9.0 is
261
261
# in `TGT_CUDA_ARCHS` then we should remove 9.0a from `SRC_CUDA_ARCHS` and add
262
- # 9.0a to the result.
262
+ # 9.0a to the result (and remove 9.0 from TGT_CUDA_ARCHS) .
263
263
# The result is stored in `OUT_CUDA_ARCHS`.
264
264
#
265
265
# Example:
@@ -270,34 +270,47 @@ endmacro()
270
270
#
271
271
function (cuda_archs_loose_intersection OUT_CUDA_ARCHS SRC_CUDA_ARCHS TGT_CUDA_ARCHS)
272
272
list (REMOVE_DUPLICATES SRC_CUDA_ARCHS)
273
+ set (TGT_CUDA_ARCHS_ ${TGT_CUDA_ARCHS} )
273
274
274
275
# if 9.0a is in SRC_CUDA_ARCHS and 9.0 is in CUDA_ARCHS then we should
275
276
# remove 9.0a from SRC_CUDA_ARCHS and add 9.0a to _CUDA_ARCHS
276
277
set (_CUDA_ARCHS)
277
278
if ("9.0a" IN_LIST SRC_CUDA_ARCHS)
278
279
list (REMOVE_ITEM SRC_CUDA_ARCHS "9.0a" )
279
- if ("9.0" IN_LIST TGT_CUDA_ARCHS)
280
+ if ("9.0" IN_LIST TGT_CUDA_ARCHS_)
281
+ list (REMOVE_ITEM TGT_CUDA_ARCHS_ "9.0" )
280
282
set (_CUDA_ARCHS "9.0a" )
281
283
endif ()
282
284
endif ()
283
285
284
286
list (SORT SRC_CUDA_ARCHS COMPARE NATURAL ORDER ASCENDING)
285
287
286
- # for each ARCH in CUDA_ARCHS find the highest arch in SRC_CUDA_ARCHS that is
287
- # less or eqault to ARCH
288
- foreach (_ARCH ${CUDA_ARCHS} )
289
- set (_TMP_ARCH)
290
- foreach (_SRC_ARCH ${SRC_CUDA_ARCHS} )
291
- if (_SRC_ARCH VERSION_LESS_EQUAL _ARCH)
292
- set (_TMP_ARCH ${_SRC_ARCH} )
293
- else ()
294
- break ()
288
+ # for each ARCH in TGT_CUDA_ARCHS find the highest arch in SRC_CUDA_ARCHS that
289
+ # is less or equal to ARCH (but has the same major version since SASS binary
290
+ # compatibility is only forward compatible within the same major version).
291
+ foreach (_ARCH ${TGT_CUDA_ARCHS_} )
292
+ set (_TMP_ARCH)
293
+ # Extract the major version of the target arch
294
+ string (REGEX REPLACE "^([0-9]+)\\ ..*$" "\\ 1" TGT_ARCH_MAJOR "${_ARCH} " )
295
+ foreach (_SRC_ARCH ${SRC_CUDA_ARCHS} )
296
+ # Extract the major version of the source arch
297
+ string (REGEX REPLACE "^([0-9]+)\\ ..*$" "\\ 1" SRC_ARCH_MAJOR "${_SRC_ARCH} " )
298
+ # Check major-version match AND version-less-or-equal
299
+ if (_SRC_ARCH VERSION_LESS_EQUAL _ARCH)
300
+ if (SRC_ARCH_MAJOR STREQUAL TGT_ARCH_MAJOR)
301
+ set (_TMP_ARCH "${_SRC_ARCH} " )
302
+ endif ()
303
+ else ()
304
+ # If we hit a version greater than the target, we can break
305
+ break ()
306
+ endif ()
307
+ endforeach ()
308
+
309
+ # If we found a matching _TMP_ARCH, append it to _CUDA_ARCHS
310
+ if (_TMP_ARCH)
311
+ list (APPEND _CUDA_ARCHS "${_TMP_ARCH} " )
295
312
endif ()
296
313
endforeach ()
297
- if (_TMP_ARCH)
298
- list (APPEND _CUDA_ARCHS ${_TMP_ARCH} )
299
- endif ()
300
- endforeach ()
301
314
302
315
list (REMOVE_DUPLICATES _CUDA_ARCHS)
303
316
set (${OUT_CUDA_ARCHS} ${_CUDA_ARCHS} PARENT_SCOPE)
0 commit comments