@@ -144,22 +144,26 @@ Tensor& any_out(
144
144
ET_SWITCH_REALHBBF16_TYPES (in_type, ctx, name, CTYPE_IN, [&] {
145
145
ET_SWITCH_TWO_TYPES (Bool, Byte , out_type, ctx, name, CTYPE_OUT, [&] {
146
146
CTYPE_OUT* out_data = out.mutable_data_ptr <CTYPE_OUT>();
147
- for (const auto out_ix : c10::irange (out.numel ())) {
148
- CTYPE_OUT any = false ;
149
- if (in.numel () > 0 ) {
150
- std::tuple<CTYPE_OUT, long > acc =
151
- map_reduce_over_dim<CTYPE_IN, CTYPE_OUT>(
152
- [](CTYPE_IN v) { return static_cast <bool >(v); },
153
- [](bool outv, long , bool acc, long ) {
154
- return std::tuple<bool , long >{acc || outv, 0 };
155
- },
156
- in,
157
- dim,
158
- out_ix);
159
- any = std::get<0 >(acc);
160
- }
161
- out_data[out_ix] = any;
162
- }
147
+ const bool success = parallel_for_each_reduce_over_dim_output_index (
148
+ in, dim, out, [&](const auto begin, const auto end) {
149
+ for (const auto out_ix : c10::irange (begin, end)) {
150
+ CTYPE_OUT any = false ;
151
+ if (in.numel () > 0 ) {
152
+ std::tuple<CTYPE_OUT, long > acc =
153
+ map_reduce_over_dim<CTYPE_IN, CTYPE_OUT>(
154
+ [](CTYPE_IN v) { return static_cast <bool >(v); },
155
+ [](bool outv, long , bool acc, long ) {
156
+ return std::tuple<bool , long >{acc || outv, 0 };
157
+ },
158
+ in,
159
+ dim,
160
+ out_ix);
161
+ any = std::get<0 >(acc);
162
+ }
163
+ out_data[out_ix] = any;
164
+ }
165
+ });
166
+ ET_KERNEL_CHECK_MSG (ctx, success, Internal, , " parallel_for failed" );
163
167
});
164
168
});
165
169
0 commit comments