@@ -1112,12 +1112,12 @@ CM_NODEBUG CM_INLINE auto cm_atomic(SurfaceIndex Idx,
1112
1112
" unsupported cache hint" );
1113
1113
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1114
1114
constexpr bool _Transposed = false ;
1115
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1115
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1116
1116
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1117
1117
auto _TmpRes =
1118
1118
__cm_intrinsic_impl_lsc_atomic_bti<Op, _DS, VS, _Transposed, L1H, L3H,
1119
- _MessTy , N>(Pred, Idx, Offset);
1120
- return lsc_format_ret<T, _MessTy , _RetTy>(_TmpRes);
1119
+ _IntRetTy , N>(Pred, Idx, Offset);
1120
+ return lsc_format_ret<T, _IntRetTy , _RetTy>(_TmpRes);
1121
1121
}
1122
1122
1123
1123
template <AtomicOp Op, typename T, VectorSize VS = VectorSize::N1,
@@ -1138,12 +1138,15 @@ cm_atomic(SurfaceIndex Idx, vector<unsigned, N> Offset,
1138
1138
" unsupported cache hint" );
1139
1139
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1140
1140
constexpr bool _Transposed = false ;
1141
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1141
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1142
+ using _SrcTy = decltype (lsc_data_type_ext<T, N, VS>());
1143
+ using _CastTy = typename lsc_bitcast_type<T>::type;
1142
1144
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1143
- auto _TmpRes =
1144
- __cm_intrinsic_impl_lsc_atomic_bti<Op, _DS, VS, _Transposed, L1H, L3H,
1145
- _MessTy, N>(Pred, Idx, Offset, Src0);
1146
- return lsc_format_ret<T, _MessTy, _RetTy>(_TmpRes);
1145
+ _SrcTy _TmpSrc0 = Src0.format <_CastTy>();
1146
+ auto _TmpRes = __cm_intrinsic_impl_lsc_atomic_bti<Op, _DS, VS, _Transposed,
1147
+ L1H, L3H, _IntRetTy, N>(
1148
+ Pred, Idx, Offset, _TmpSrc0);
1149
+ return lsc_format_ret<T, _IntRetTy, _RetTy>(_TmpRes);
1147
1150
}
1148
1151
1149
1152
template <AtomicOp Op, typename T, VectorSize VS = VectorSize::N1,
@@ -1164,13 +1167,17 @@ cm_atomic(SurfaceIndex Idx, vector<unsigned, N> Offset,
1164
1167
CM_STATIC_ERROR ((lsc_check_cache_hint<LSCAction::Atomic, L1H, L3H>()),
1165
1168
" unsupported cache hint" );
1166
1169
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1167
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1170
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1171
+ using _SrcTy = decltype (lsc_data_type_ext<T, N, VS>());
1168
1172
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1173
+ using _CastTy = typename lsc_bitcast_type<T>::type;
1174
+ _SrcTy _TmpSrc0 = Src0.format <_CastTy>();
1175
+ _SrcTy _TmpSrc1 = Src1.format <_CastTy>();
1169
1176
constexpr bool _Transposed = false ;
1170
1177
auto _TmpRes = __cm_intrinsic_impl_lsc_atomic_bti<Op, _DS, VS, _Transposed,
1171
- L1H, L3H, _MessTy , N>(
1172
- Pred, Idx, Offset, Src0, Src1 );
1173
- return lsc_format_ret<T, _MessTy , _RetTy>(_TmpRes);
1178
+ L1H, L3H, _IntRetTy , N>(
1179
+ Pred, Idx, Offset, _TmpSrc0, _TmpSrc1 );
1180
+ return lsc_format_ret<T, _IntRetTy , _RetTy>(_TmpRes);
1174
1181
}
1175
1182
1176
1183
// flat-address atomic
@@ -1192,12 +1199,12 @@ CM_NODEBUG CM_INLINE auto cm_ptr_atomic(T *Ptr, vector<unsigned, N> Offset,
1192
1199
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1193
1200
constexpr bool _Transposed = false ;
1194
1201
uint64_t _Addr = (uint64_t )Ptr;
1195
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1202
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1196
1203
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1197
1204
auto _TmpRes =
1198
1205
__cm_intrinsic_impl_lsc_atomic_flat<Op, _DS, VS, _Transposed, L1H, L3H,
1199
- _MessTy , N>(Pred, _Addr, Offset);
1200
- return lsc_format_ret<T, _MessTy , _RetTy>(_TmpRes);
1206
+ _IntRetTy , N>(Pred, _Addr, Offset);
1207
+ return lsc_format_ret<T, _IntRetTy , _RetTy>(_TmpRes);
1201
1208
}
1202
1209
1203
1210
template <AtomicOp Op, typename T, VectorSize VS = VectorSize::N1,
@@ -1219,12 +1226,15 @@ cm_ptr_atomic(T *Ptr, vector<unsigned, N> Offset,
1219
1226
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1220
1227
constexpr bool _Transposed = false ;
1221
1228
uint64_t _Addr = (uint64_t )Ptr;
1222
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1229
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1230
+ using _SrcTy = decltype (lsc_data_type_ext<T, N, VS>());
1223
1231
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1232
+ using _CastTy = typename lsc_bitcast_type<T>::type;
1233
+ _SrcTy _TmpSrc0 = Src0.format <_CastTy>();
1224
1234
auto _TmpRes = __cm_intrinsic_impl_lsc_atomic_flat<Op, _DS, VS, _Transposed,
1225
- L1H, L3H, _MessTy , N>(
1226
- Pred, _Addr, Offset, Src0 );
1227
- return lsc_format_ret<T, _MessTy , _RetTy>(_TmpRes);
1235
+ L1H, L3H, _IntRetTy , N>(
1236
+ Pred, _Addr, Offset, _TmpSrc0 );
1237
+ return lsc_format_ret<T, _IntRetTy , _RetTy>(_TmpRes);
1228
1238
}
1229
1239
1230
1240
template <AtomicOp Op, typename T, VectorSize VS = VectorSize::N1,
@@ -1245,14 +1255,18 @@ cm_ptr_atomic(T *Ptr, vector<unsigned, N> Offset,
1245
1255
CM_STATIC_ERROR ((lsc_check_cache_hint<LSCAction::Atomic, L1H, L3H>()),
1246
1256
" unsupported cache hint" );
1247
1257
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1248
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1258
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1259
+ using _SrcTy = decltype (lsc_data_type_ext<T, N, VS>());
1249
1260
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1261
+ using _CastTy = typename lsc_bitcast_type<T>::type;
1262
+ _SrcTy _TmpSrc0 = Src0.format <_CastTy>();
1263
+ _SrcTy _TmpSrc1 = Src1.format <_CastTy>();
1250
1264
constexpr bool _Transposed = false ;
1251
1265
uint64_t _Addr = (uint64_t )Ptr;
1252
1266
auto _TmpRes = __cm_intrinsic_impl_lsc_atomic_flat<Op, _DS, VS, _Transposed,
1253
- L1H, L3H, _MessTy , N>(
1254
- Pred, _Addr, Offset, Src0, Src1 );
1255
- return lsc_format_ret<T, _MessTy , _RetTy>(_TmpRes);
1267
+ L1H, L3H, _IntRetTy , N>(
1268
+ Pred, _Addr, Offset, _TmpSrc0, _TmpSrc1 );
1269
+ return lsc_format_ret<T, _IntRetTy , _RetTy>(_TmpRes);
1256
1270
}
1257
1271
1258
1272
// bindless-address atomic
@@ -1298,12 +1312,12 @@ CM_NODEBUG CM_INLINE auto cm_atomic_slm(vector<unsigned, N> Offset,
1298
1312
" unsupported cache hint" );
1299
1313
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1300
1314
constexpr bool _Transposed = false ;
1301
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1315
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1302
1316
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1303
1317
auto _TmpRes =
1304
1318
__cm_intrinsic_impl_lsc_atomic_slm<Op, _DS, VS, _Transposed, L1H, L3H,
1305
- _MessTy , N>(Pred, Offset);
1306
- return lsc_format_ret<T, _MessTy , _RetTy>(_TmpRes);
1319
+ _IntRetTy , N>(Pred, Offset);
1320
+ return lsc_format_ret<T, _IntRetTy , _RetTy>(_TmpRes);
1307
1321
}
1308
1322
1309
1323
template <AtomicOp Op, typename T, VectorSize VS = VectorSize::N1,
@@ -1324,12 +1338,15 @@ cm_atomic_slm(vector<unsigned, N> Offset,
1324
1338
" unsupported cache hint" );
1325
1339
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1326
1340
constexpr bool _Transposed = false ;
1327
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1341
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1342
+ using _SrcTy = decltype (lsc_data_type_ext<T, N, VS>());
1328
1343
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1344
+ using _CastTy = typename lsc_bitcast_type<T>::type;
1345
+ _SrcTy _TmpSrc0 = Src0.format <_CastTy>();
1329
1346
auto _TmpRes =
1330
1347
__cm_intrinsic_impl_lsc_atomic_slm<Op, _DS, VS, _Transposed, L1H, L3H,
1331
- _MessTy , N>(Pred, Offset, Src0 );
1332
- return lsc_format_ret<T, _MessTy , _RetTy>(_TmpRes);
1348
+ _IntRetTy , N>(Pred, Offset, _TmpSrc0 );
1349
+ return lsc_format_ret<T, _IntRetTy , _RetTy>(_TmpRes);
1333
1350
}
1334
1351
1335
1352
template <AtomicOp Op, typename T, VectorSize VS = VectorSize::N1,
@@ -1350,13 +1367,17 @@ cm_atomic_slm(vector<unsigned, N> Offset,
1350
1367
CM_STATIC_ERROR ((lsc_check_cache_hint<LSCAction::Atomic, L1H, L3H>()),
1351
1368
" unsupported cache hint" );
1352
1369
constexpr DataSize _DS = lsc_expand_ds (lsc_data_size<T, DS>());
1353
- using _MessTy = decltype (lsc_data_type_ext<T, N, VS>());
1370
+ using _IntRetTy = decltype (lsc_data_type_ext<T, N, VS>());
1371
+ using _SrcTy = decltype (lsc_data_type_ext<T, N, VS>());
1354
1372
using _RetTy = decltype (lsc_data_type<T, N, VS>());
1373
+ using _CastTy = typename lsc_bitcast_type<T>::type;
1374
+ _SrcTy _TmpSrc0 = Src0.format <_CastTy>();
1375
+ _SrcTy _TmpSrc1 = Src1.format <_CastTy>();
1355
1376
constexpr bool _Transposed = false ;
1356
- auto _TmpRes =
1357
- __cm_intrinsic_impl_lsc_atomic_slm<Op, _DS, VS, _Transposed, L1H, L3H,
1358
- _MessTy, N>( Pred, Offset, Src0, Src1 );
1359
- return lsc_format_ret<T, _MessTy , _RetTy>(_TmpRes);
1377
+ auto _TmpRes = __cm_intrinsic_impl_lsc_atomic_slm<Op, _DS, VS, _Transposed,
1378
+ L1H, L3H, _IntRetTy, N>(
1379
+ Pred, Offset, _TmpSrc0, _TmpSrc1 );
1380
+ return lsc_format_ret<T, _IntRetTy , _RetTy>(_TmpRes);
1360
1381
}
1361
1382
1362
1383
// /
0 commit comments