Skip to content

Commit c4f3742

Browse files
authored
Replace INFINITY by std::numeric_limits<float>::infinity() (#22868)
Replace INFINITY by `std::numeric_limits<float>::infinity()` to avoid build errors with Visual Studio 2022 v17.12 Preview 5 ### Motivation and Context #22728
1 parent 02a0be3 commit c4f3742

File tree

14 files changed

+70
-49
lines changed

14 files changed

+70
-49
lines changed

onnxruntime/contrib_ops/cuda/bert/flash_attention/flash_fwd_kernel.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ inline __device__ void compute_attn_1rowblock(const Params& params, const int bi
9898
for (int m = 0; m < size<1>(tOgO); ++m) {
9999
const int row = get<0>(tOcO(0, m, 0));
100100
if (row < binfo.actual_seqlen_q - m_block * kBlockM && get<1>(tOcO(0, m, 0)) == 0) {
101-
gLSE(row) = INFINITY;
101+
gLSE(row) = std::numeric_limits<ElementAccum>::infinity();
102102
}
103103
}
104104
return;
@@ -499,7 +499,7 @@ inline __device__ void compute_attn_1rowblock_splitkv(const Params& params, cons
499499
for (int m = 0; m < size<1>(tOgOaccum); ++m) {
500500
const int row = get<0>(tOcO(0, m, 0));
501501
if (row < binfo.actual_seqlen_q - m_block * kBlockM && get<1>(tOcO(0, m, 0)) == 0) {
502-
gLSEaccum(row) = Split ? -INFINITY : INFINITY;
502+
gLSEaccum(row) = Split ? -std::numeric_limits<ElementAccum>::infinity() : std::numeric_limits<ElementAccum>::infinity();
503503
}
504504
}
505505
return;
@@ -1061,7 +1061,7 @@ inline __device__ void combine_attn_seqk_parallel(const Params& params) {
10611061
for (int l = 0; l < kNLsePerThread; ++l) {
10621062
const int row = l * kRowsPerLoadLSE + tidx / kBlockM;
10631063
const int col = tidx % kBlockM;
1064-
ElementAccum lse = (row < params.num_splits && col < params.b * params.h * params.seqlen_q - bidx * kBlockM) ? gLSEaccum(row, col) : -INFINITY;
1064+
ElementAccum lse = (row < params.num_splits && col < params.b * params.h * params.seqlen_q - bidx * kBlockM) ? gLSEaccum(row, col) : -std::numeric_limits<ElementAccum>::infinity();
10651065
if (row < kMaxSplits) {
10661066
sLSE[row][col] = lse;
10671067
}
@@ -1082,7 +1082,7 @@ inline __device__ void combine_attn_seqk_parallel(const Params& params) {
10821082
for (int l = 0; l < kNLsePerThread; ++l) {
10831083
const int row = l * kRowsPerLoadTranspose + tidx % kRowsPerLoadTranspose;
10841084
const int col = tidx / kRowsPerLoadTranspose;
1085-
lse_accum(l) = (row < kMaxSplits && col < kBlockM) ? sLSE[row][col] : -INFINITY;
1085+
lse_accum(l) = (row < kMaxSplits && col < kBlockM) ? sLSE[row][col] : -std::numeric_limits<ElementAccum>::infinity();
10861086
// if (bidx == 0 && tidx < 32) { printf("tidx = %d, row = %d, col = %d, lse = %f\n", tidx, row, col, lse_accum(l)); }
10871087
}
10881088

@@ -1094,7 +1094,7 @@ inline __device__ void combine_attn_seqk_parallel(const Params& params) {
10941094
}
10951095
MaxOp<float> max_op;
10961096
lse_max = Allreduce<kRowsPerLoadTranspose>::run(lse_max, max_op);
1097-
lse_max = lse_max == -INFINITY ? 0.0f : lse_max; // In case all local LSEs are -inf
1097+
lse_max = lse_max == -std::numeric_limits<ElementAccum>::infinity() ? 0.0f : lse_max; // In case all local LSEs are -inf
10981098
float lse_sum = expf(lse_accum(0) - lse_max);
10991099
#pragma unroll
11001100
for (int l = 1; l < kNLsePerThread; ++l) {
@@ -1104,7 +1104,7 @@ inline __device__ void combine_attn_seqk_parallel(const Params& params) {
11041104
lse_sum = Allreduce<kRowsPerLoadTranspose>::run(lse_sum, sum_op);
11051105
// For the case where all local lse == -INFINITY, we want to set lse_logsum to INFINITY. Otherwise
11061106
// lse_logsum is log(0.0) = -INFINITY and we get NaN when we do lse_accum(l) - lse_logsum.
1107-
ElementAccum lse_logsum = (lse_sum == 0.f || lse_sum != lse_sum) ? INFINITY : logf(lse_sum) + lse_max;
1107+
ElementAccum lse_logsum = (lse_sum == 0.f || lse_sum != lse_sum) ? std::numeric_limits<ElementAccum>::infinity() : logf(lse_sum) + lse_max;
11081108
// if (bidx == 0 && tidx < 32) { printf("tidx = %d, lse = %f, lse_max = %f, lse_logsum = %f\n", tidx, lse_accum(0), lse_max, lse_logsum); }
11091109
if (tidx % kRowsPerLoadTranspose == 0 && tidx / kRowsPerLoadTranspose < kBlockM) {
11101110
gLSE(tidx / kRowsPerLoadTranspose) = lse_logsum;

onnxruntime/contrib_ops/cuda/bert/flash_attention/mask.h

+8-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#pragma once
66

7+
#include <limits>
78
#include <cute/tensor.hpp>
89

910
namespace onnxruntime {
@@ -28,7 +29,7 @@ __forceinline__ __device__ void apply_mask(Tensor<Engine, Layout>& tensor, const
2829
// Without the "make_coord" we get wrong results
2930
#pragma unroll
3031
for (int mi = 0; mi < size<0>(tensor); ++mi) {
31-
tensor(mi, make_coord(j, nj)) = -INFINITY;
32+
tensor(mi, make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
3233
}
3334
}
3435
}
@@ -59,7 +60,7 @@ __forceinline__ __device__ void apply_mask_local(Tensor<Engine, Layout>& tensor,
5960
for (int j = 0; j < size<1, 0>(tensor); ++j) {
6061
const int col_idx = col_idx_base + j;
6162
if (col_idx >= col_idx_limit_right || (HasWSLeft && col_idx < col_idx_limit_left)) {
62-
tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
63+
tensor(make_coord(i, mi), make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
6364
}
6465
}
6566
}
@@ -96,7 +97,7 @@ __forceinline__ __device__ void apply_mask_causal_w_idx(
9697
#pragma unroll
9798
for (int ni = 0; ni < size<1, 1>(tensor); ++ni) {
9899
if (col_idx_offset_ + get<1>(idx_rowcol(0, ni)) >= col_idx_limit) {
99-
tensor(mi, ni) = -INFINITY;
100+
tensor(mi, ni) = -std::numeric_limits<float>::infinity();
100101
}
101102
}
102103
// if (cute::thread0()) {
@@ -151,7 +152,7 @@ struct Mask {
151152
}
152153
if constexpr (!Is_even_MN) {
153154
if (col_idx >= max_seqlen_k) {
154-
tensor(mi, make_coord(j, nj)) = -INFINITY;
155+
tensor(mi, make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
155156
}
156157
}
157158
}
@@ -181,18 +182,18 @@ struct Mask {
181182
}
182183
if constexpr (Causal_mask) {
183184
if (col_idx >= col_idx_limit_right) {
184-
tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
185+
tensor(make_coord(i, mi), make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
185186
}
186187
}
187188
if constexpr (Is_local) {
188189
if (col_idx >= col_idx_limit_right || col_idx < col_idx_limit_left) {
189-
tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
190+
tensor(make_coord(i, mi), make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
190191
}
191192
}
192193
if constexpr (!Causal_mask && !Is_local && !Is_even_MN) {
193194
// Causal and Local already handles MN masking
194195
if (col_idx >= max_seqlen_k) {
195-
tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
196+
tensor(make_coord(i, mi), make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
196197
}
197198
}
198199
}

onnxruntime/contrib_ops/cuda/bert/flash_attention/softmax.h

+9-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#pragma once
55

66
#include <cmath>
7+
#include <limits>
78

89
#include <cute/tensor.hpp>
910

@@ -71,7 +72,9 @@ __forceinline__ __device__ void scale_apply_exp2(Tensor<Engine0, Layout0>& tenso
7172
// If max is -inf, then all elements must have been -inf (possibly due to masking).
7273
// We don't want (-inf - (-inf)) since that would give NaN.
7374
// If we don't have float around M_LOG2E the multiplication is done in fp64.
74-
const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * (Scale_max ? scale : float(M_LOG2E));
75+
const float max_scaled = max(mi) == -std::numeric_limits<float>::infinity()
76+
? 0.f
77+
: max(mi) * (Scale_max ? scale : float(M_LOG2E));
7578
#pragma unroll
7679
for (int ni = 0; ni < size<1>(tensor); ++ni) {
7780
// Instead of computing exp(x - max), we compute exp2(x * log_2(e) -
@@ -99,7 +102,7 @@ __forceinline__ __device__ void max_scale_exp2_sum(Tensor<Engine0, Layout0>& ten
99102
max(mi) = Allreduce<4>::run(max(mi), max_op);
100103
// If max is -inf, then all elements must have been -inf (possibly due to masking).
101104
// We don't want (-inf - (-inf)) since that would give NaN.
102-
const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * scale;
105+
const float max_scaled = max(mi) == -std::numeric_limits<float>::infinity() ? 0.f : max(mi) * scale;
103106
sum(mi) = 0;
104107
#pragma unroll
105108
for (int ni = 0; ni < size<1>(tensor); ++ni) {
@@ -143,7 +146,7 @@ struct Softmax {
143146
for (int mi = 0; mi < size(row_max); ++mi) {
144147
float scores_max_cur = !Check_inf
145148
? row_max(mi)
146-
: (row_max(mi) == -INFINITY ? 0.0f : row_max(mi));
149+
: (row_max(mi) == -std::numeric_limits<float>::infinity() ? 0.0f : row_max(mi));
147150
float scores_scale = exp2f((scores_max_prev(mi) - scores_max_cur) * softmax_scale_log2);
148151
row_sum(mi) *= scores_scale;
149152
#pragma unroll
@@ -169,7 +172,9 @@ struct Softmax {
169172
for (int mi = 0; mi < size<0>(acc_o_rowcol); ++mi) {
170173
float sum = smooth_softmax ? row_sum(mi) + expf(-row_max(mi) * softmax_scale) : row_sum(mi);
171174
float inv_sum = (sum == 0.f || sum != sum) ? 1.f : 1.f / sum;
172-
lse(mi) = (sum == 0.f || sum != sum) ? (Split ? -INFINITY : INFINITY) : row_max(mi) * softmax_scale + __logf(sum);
175+
lse(mi) = (sum == 0.f || sum != sum)
176+
? (Split ? -std::numeric_limits<float>::infinity() : std::numeric_limits<float>::infinity())
177+
: row_max(mi) * softmax_scale + __logf(sum);
173178
float scale = inv_sum;
174179
#pragma unroll
175180
for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) {

onnxruntime/contrib_ops/cuda/bert/lean_attention/lean_fwd_kernel.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,7 @@ inline __device__ void lean_compute_attn_impl_ver3(const Params& params, const i
825825
const int row = l * kRowsPerLoadLSE + tidx / kBlockM;
826826
const int col = tidx % kBlockM;
827827
// We skip the first row = 0, as we already populated it in shared memory.
828-
ElementAccum lse = (row > 0 && row < total_splits && col < params.b * params.h * (index_t)params.seqlen_q - row_offset_lseaccum) ? gLSEaccumRead(row, col) : -INFINITY;
828+
ElementAccum lse = (row > 0 && row < total_splits && col < params.b * params.h * (index_t)params.seqlen_q - row_offset_lseaccum) ? gLSEaccumRead(row, col) : -std::numeric_limits<ElementAccum>::infinity();
829829
if (row > 0 && row < kMaxSplits) {
830830
sLSE(row, col) = lse;
831831

@@ -857,7 +857,7 @@ inline __device__ void lean_compute_attn_impl_ver3(const Params& params, const i
857857
for (int l = 0; l < kNLsePerThread; ++l) {
858858
const int row = l * kRowsPerLoadTranspose + tidx % kRowsPerLoadTranspose;
859859
const int col = tidx / kRowsPerLoadTranspose;
860-
lse_accum(l) = (row < kMaxSplits && col < kBlockM) ? sLSE(row, col) : -INFINITY;
860+
lse_accum(l) = (row < kMaxSplits && col < kBlockM) ? sLSE(row, col) : -std::numeric_limits<ElementAccum>::infinity();
861861

862862
#if defined(DEBUG_LEAN_ATTENTION)
863863
if (threadIdx.x == 0 && blockIdx.z == tracing_block) {
@@ -874,7 +874,7 @@ inline __device__ void lean_compute_attn_impl_ver3(const Params& params, const i
874874
}
875875
MaxOp<float> max_op;
876876
lse_max = Allreduce<kRowsPerLoadTranspose>::run(lse_max, max_op);
877-
lse_max = lse_max == -INFINITY ? 0.0f : lse_max; // In case all local LSEs are -inf
877+
lse_max = lse_max == -std::numeric_limits<ElementAccum>::infinity() ? 0.0f : lse_max; // In case all local LSEs are -inf
878878
float lse_sum = expf(lse_accum(0) - lse_max);
879879
#pragma unroll
880880
for (int l = 1; l < kNLsePerThread; ++l) {
@@ -884,7 +884,9 @@ inline __device__ void lean_compute_attn_impl_ver3(const Params& params, const i
884884
lse_sum = Allreduce<kRowsPerLoadTranspose>::run(lse_sum, sum_op);
885885
// For the case where all local lse == -INFINITY, we want to set lse_logsum to INFINITY. Otherwise
886886
// lse_logsum is log(0.0) = -INFINITY and we get NaN when we do lse_accum(l) - lse_logsum.
887-
ElementAccum lse_logsum = (lse_sum == 0.f || lse_sum != lse_sum) ? INFINITY : logf(lse_sum) + lse_max;
887+
ElementAccum lse_logsum = (lse_sum == 0.f || lse_sum != lse_sum)
888+
? std::numeric_limits<ElementAccum>::infinity()
889+
: logf(lse_sum) + lse_max;
888890
// if (tidx % kRowsPerLoadTranspose == 0 && tidx / kRowsPerLoadTranspose < kBlockM) { gLSE(tidx / kRowsPerLoadTranspose) = lse_logsum; }
889891
// Store the scales exp(lse - lse_logsum) in shared memory.
890892
#pragma unroll

onnxruntime/contrib_ops/cuda/bert/lean_attention/mask.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
******************************************************************************/
44

55
#pragma once
6-
6+
#include <limits>
77
#include <cute/tensor.hpp>
88

99
namespace onnxruntime {
@@ -28,7 +28,7 @@ __forceinline__ __device__ void apply_mask(Tensor<Engine, Layout>& tensor, const
2828
// Without the "make_coord" we get wrong results
2929
#pragma unroll
3030
for (int mi = 0; mi < size<0>(tensor); ++mi) {
31-
tensor(mi, make_coord(j, nj)) = -INFINITY;
31+
tensor(mi, make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
3232
}
3333
}
3434
}
@@ -59,7 +59,7 @@ __forceinline__ __device__ void apply_mask_local(Tensor<Engine, Layout>& tensor,
5959
for (int j = 0; j < size<1, 0>(tensor); ++j) {
6060
const int col_idx = col_idx_base + j;
6161
if (col_idx >= col_idx_limit_right || (HasWSLeft && col_idx < col_idx_limit_left)) {
62-
tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
62+
tensor(make_coord(i, mi), make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
6363
}
6464
}
6565
}
@@ -96,7 +96,7 @@ __forceinline__ __device__ void apply_mask_causal_w_idx(
9696
#pragma unroll
9797
for (int ni = 0; ni < size<1, 1>(tensor); ++ni) {
9898
if (col_idx_offset_ + get<1>(idx_rowcol(0, ni)) >= col_idx_limit) {
99-
tensor(mi, ni) = -INFINITY;
99+
tensor(mi, ni) = -std::numeric_limits<float>::infinity();
100100
}
101101
}
102102
// if (cute::thread0()) {
@@ -152,7 +152,7 @@ struct Mask {
152152
}
153153
if constexpr (!Is_even_MN) {
154154
if (col_idx >= max_seqlen_k) {
155-
tensor(mi, make_coord(j, nj)) = -INFINITY;
155+
tensor(mi, make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
156156
}
157157
}
158158
}
@@ -182,18 +182,18 @@ struct Mask {
182182
}
183183
if constexpr (Causal_mask) {
184184
if (col_idx >= col_idx_limit_right) {
185-
tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
185+
tensor(make_coord(i, mi), make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
186186
}
187187
}
188188
if constexpr (Is_local) {
189189
if (col_idx >= col_idx_limit_right || col_idx < col_idx_limit_left) {
190-
tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
190+
tensor(make_coord(i, mi), make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
191191
}
192192
}
193193
if constexpr (!Causal_mask && !Is_local && !Is_even_MN) {
194194
// Causal and Local already handles MN masking
195195
if (col_idx >= max_seqlen_k) {
196-
tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
196+
tensor(make_coord(i, mi), make_coord(j, nj)) = -std::numeric_limits<float>::infinity();
197197
}
198198
}
199199
}

onnxruntime/contrib_ops/cuda/bert/lean_attention/softmax.h

+9-5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
******************************************************************************/
44

55
#pragma once
6-
6+
#include <limits>
77
#include <cmath>
88

99
#include <cute/tensor.hpp>
@@ -72,7 +72,9 @@ __forceinline__ __device__ void scale_apply_exp2(Tensor<Engine0, Layout0>& tenso
7272
// If max is -inf, then all elements must have been -inf (possibly due to masking).
7373
// We don't want (-inf - (-inf)) since that would give NaN.
7474
// If we don't have float around M_LOG2E the multiplication is done in fp64.
75-
const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * (Scale_max ? scale : float(M_LOG2E));
75+
const float max_scaled = max(mi) == -std::numeric_limits<float>::infinity()
76+
? 0.f
77+
: max(mi) * (Scale_max ? scale : float(M_LOG2E));
7678
#pragma unroll
7779
for (int ni = 0; ni < size<1>(tensor); ++ni) {
7880
// Instead of computing exp(x - max), we compute exp2(x * log_2(e) -
@@ -107,7 +109,7 @@ __forceinline__ __device__ void max_scale_exp2_sum(Tensor<Engine0, Layout0>& ten
107109
max(mi) = Allreduce<4>::run(max(mi), max_op);
108110
// If max is -inf, then all elements must have been -inf (possibly due to masking).
109111
// We don't want (-inf - (-inf)) since that would give NaN.
110-
const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * scale;
112+
const float max_scaled = max(mi) == -std::numeric_limits<float>::infinity() ? 0.f : max(mi) * scale;
111113
sum(mi) = 0;
112114
#pragma unroll
113115
for (int ni = 0; ni < size<1>(tensor); ++ni) {
@@ -151,7 +153,7 @@ struct Softmax {
151153
for (int mi = 0; mi < size(row_max); ++mi) {
152154
float scores_max_cur = !Check_inf
153155
? row_max(mi)
154-
: (row_max(mi) == -INFINITY ? 0.0f : row_max(mi));
156+
: (row_max(mi) == -std::numeric_limits<float>::infinity() ? 0.0f : row_max(mi));
155157
float scores_scale = exp2f((scores_max_prev(mi) - scores_max_cur) * softmax_scale_log2);
156158
row_sum(mi) *= scores_scale;
157159
#pragma unroll
@@ -181,7 +183,9 @@ struct Softmax {
181183
// printf("sum: %f, inv_sum: %f\n", sum, inv_sum);
182184
// printf("mi %d row_max %f softmax_scale %f\n", mi, row_max(mi), softmax_scale);
183185
// }
184-
lse(mi) = (sum == 0.f || sum != sum) ? (Split ? -INFINITY : INFINITY) : row_max(mi) * softmax_scale + __logf(sum);
186+
lse(mi) = (sum == 0.f || sum != sum)
187+
? (Split ? -std::numeric_limits<float>::infinity() : std::numeric_limits<float>::infinity())
188+
: row_max(mi) * softmax_scale + __logf(sum);
185189
float scale = !Is_dropout ? inv_sum : inv_sum * rp_dropout;
186190
#pragma unroll
187191
for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) {

onnxruntime/contrib_ops/cuda/bert/ngram_repeat_block_impl.cu

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Licensed under the MIT License.
66
/*
77
Kernel implementation for blocking repeated n-grams.
88
*/
9-
9+
#include <limits>
1010
#include "core/providers/cuda/cu_inc/common.cuh"
1111
#include "contrib_ops/cuda/bert/ngram_repeat_block_impl.h"
1212

@@ -48,7 +48,7 @@ __global__ void banRepeatedTokens(const int64_t* __restrict__ tokens,
4848
}
4949
if (is_banned == true) {
5050
auto token_to_be_banned = tokens_shm[col + no_repeat_ngram_size - 1];
51-
lprobs[lprob_start + token_to_be_banned] = -INFINITY;
51+
lprobs[lprob_start + token_to_be_banned] = -std::numeric_limits<float>::infinity();
5252
}
5353
}
5454

onnxruntime/core/optimizer/attention_fusion_helper.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
3+
#include <limits>
34
#include "onnx/defs/shape_inference.h"
45
#include "onnx/defs/tensor_proto_util.h"
56
#include "core/framework/tensorprotoutils.h"
@@ -767,7 +768,8 @@ bool MatchInputMaskSubgraph(const Graph& graph, const Node& layer_norm, const No
767768
}
768769

769770
// check where has X=-Infinity
770-
if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(where.InputDefs()[1]), -INFINITY, true)) {
771+
if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(where.InputDefs()[1]),
772+
-std::numeric_limits<float>::infinity(), true)) {
771773
DEBUG_LOG("where const not matched.");
772774
return false;
773775
}

onnxruntime/core/providers/xnnpack/detail/utils.cc

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <unordered_map>
66
#include <unordered_set>
77
#include <vector>
8+
#include <limits>
89

910
#include "core/common/common.h"
1011
#include "core/common/safeint.h"
@@ -239,8 +240,8 @@ std::unique_ptr<IndexedSubGraph::MetaDef> FuseActivation(const NodeUnit& node_un
239240
def.attributes = node_unit.GetNode().GetAttributes();
240241

241242
// use infinity as the default as that's what xnnpack uses if min/max are not set
242-
float min = -INFINITY;
243-
float max = INFINITY;
243+
float min = -std::numeric_limits<float>::infinity();
244+
float max = std::numeric_limits<float>::infinity();
244245

245246
const auto& activation_type = activation.OpType();
246247
if (activation_type == "Clip") {

0 commit comments

Comments
 (0)