File tree 1 file changed +21
-3
lines changed
1 file changed +21
-3
lines changed Original file line number Diff line number Diff line change @@ -404,7 +404,13 @@ static __device__ __forceinline__ T dequantize_1_q4_0(const void * __restrict__
404
404
const int q0 = x[ib].qs [iqs];
405
405
const int q = ((q0 >> (4 *shift)) & 0x0F ) - 8 ;
406
406
407
- return d*((T) q);
407
+ #if FP16_AVAILABLE
408
+ if (std::is_same<T, half>::value) {
409
+ return ((half) d)*((half) q);
410
+ }
411
+ #endif // FP16_AVAILABLE
412
+
413
+ return ((float ) d)*((float ) q);
408
414
}
409
415
410
416
template <typename T>
@@ -444,7 +450,13 @@ static __device__ __forceinline__ T dequantize_1_q5_0(const void * __restrict__
444
450
const int qh = ((qh0 >> idq) << 4 ) & 0x10 ;
445
451
const int q = (ql | qh) - 16 ;
446
452
447
- return d*((T) q);
453
+ #if FP16_AVAILABLE
454
+ if (std::is_same<T, half>::value) {
455
+ return ((half) d)*((half) q);
456
+ }
457
+ #endif // FP16_AVAILABLE
458
+
459
+ return ((float ) d)*((float ) q);
448
460
}
449
461
450
462
template <typename T>
@@ -482,7 +494,13 @@ static __device__ __forceinline__ T dequantize_1_q8_0(const void * __restrict__
482
494
const T d = x[ib].d ;
483
495
const int q = x[ib].qs [iqs];
484
496
485
- return d*((T) q);
497
+ #if FP16_AVAILABLE
498
+ if (std::is_same<T, half>::value) {
499
+ return ((half) d)*((half) q);
500
+ }
501
+ #endif // FP16_AVAILABLE
502
+
503
+ return ((float ) d)*((float ) q);
486
504
}
487
505
488
506
template <typename T>
You can’t perform that action at this time.
0 commit comments