Skip to content

Commit 389e2e5

Browse files
authored
[SYCL] Add force range rounding option and introduce new compiler flag (#12715)
Adds a new preference for range rounding, force, such that if the compile flag is used, only the range rounded parallel_for kernel will be generated. This can make binaries smaller as there is no duplication of SYCL range kernels across range rounded and unrounded versions. I have also added the flag: -fsycl-range-rounding, which can have values: on, force or disable. This flag aims to supercede the fsycl-disable-range-rounding flag. I have also added to existing tests to check for the functionality of the new flag and refactored the range rounding sycl-e2e test. Also added brief description of flag's behaviour in `doc`
1 parent 7fb214a commit 389e2e5

File tree

14 files changed

+304
-160
lines changed

14 files changed

+304
-160
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,9 @@ LANGOPT(
298298
"SYCL compiler assumes value fits within MAX_INT for member function of "
299299
"get/operator[], get_id/operator[] and get_global_id/get_global_linear_id "
300300
"in SYCL class id, iterm and nd_iterm")
301-
LANGOPT(SYCLDisableRangeRounding, 1, 0, "Disable parallel for range rounding")
301+
ENUM_LANGOPT(SYCLRangeRounding, SYCLRangeRoundingPreference, 2,
302+
SYCLRangeRoundingPreference::On,
303+
"Preference for SYCL parallel_for range rounding")
302304
LANGOPT(SYCLEnableIntHeaderDiags, 1, 0, "Enable diagnostics that require the "
303305
"SYCL integration header")
304306
LANGOPT(SYCLAllowVirtualFunctions, 1, 0,

clang/include/clang/Basic/LangOptions.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ class LangOptionsBase {
151151
undefined
152152
};
153153

154+
enum class SYCLRangeRoundingPreference {
155+
On,
156+
Disable,
157+
Force,
158+
};
159+
154160
enum HLSLLangStd {
155161
HLSL_Unset = 0,
156162
HLSL_2015 = 2015,

clang/include/clang/Driver/Options.td

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3997,6 +3997,21 @@ def fsycl_host_compiler_options_EQ : Joined<["-"], "fsycl-host-compiler-options=
39973997
Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"When performing the host compilation with "
39983998
"-fsycl-host-compiler specified, use the given options during that compile. "
39993999
"Options are expected to be a quoted list of space separated options.">;
4000+
def fsycl_range_rounding_EQ : Joined<["-"], "fsycl-range-rounding=">,
4001+
Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>,
4002+
Values<"on,disable,force">,
4003+
NormalizedValuesScope<"LangOptions::SYCLRangeRoundingPreference">,
4004+
NormalizedValues<["On", "Disable", "Force"]>,
4005+
MarshallingInfoEnum<LangOpts<"SYCLRangeRounding">, "On">,
4006+
HelpText<"Options for range rounding of SYCL range kernels: "
4007+
"disable (do not generate range rounded kernels) "
4008+
"force (only generate range rounded kernels) "
4009+
"on (generate range rounded kernels as well as unrounded kernels). Default is 'on'">;
4010+
def fsycl_disable_range_rounding : Flag<["-"], "fsycl-disable-range-rounding">,
4011+
Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>,
4012+
Alias<fsycl_range_rounding_EQ>, AliasArgs<["disable"]>,
4013+
HelpText<"Deprecated: please use -fsycl-range-rounding=disable instead.">,
4014+
Flags<[Deprecated]>;
40004015
def fno_sycl_use_footer : Flag<["-"], "fno-sycl-use-footer">, Visibility<[ClangOption, CLOption, DXCOption]>,
40014016
HelpText<"Disable usage of the integration footer during SYCL enabled "
40024017
"compilations.">;
@@ -8256,9 +8271,6 @@ defm sycl_allow_func_ptr: BoolFOption<"sycl-allow-func-ptr",
82568271
def fenable_sycl_dae : Flag<["-"], "fenable-sycl-dae">,
82578272
HelpText<"Enable Dead Argument Elimination in SPIR kernels">,
82588273
MarshallingInfoFlag<LangOpts<"EnableDAEInSpirKernels">>;
8259-
def fsycl_disable_range_rounding : Flag<["-"], "fsycl-disable-range-rounding">,
8260-
HelpText<"Disable parallel for range rounding.">,
8261-
MarshallingInfoFlag<LangOpts<"SYCLDisableRangeRounding">>;
82628274
def fsycl_enable_int_header_diags: Flag<["-"], "fsycl-enable-int-header-diags">,
82638275
HelpText<"Enable diagnostics that require the SYCL integration header.">,
82648276
MarshallingInfoFlag<LangOpts<"SYCLEnableIntHeaderDiags">>;

clang/lib/Driver/Driver.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,6 +1158,10 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11581158
checkSingleArgValidity(DeviceCodeSplit,
11591159
{"per_kernel", "per_source", "auto", "off"});
11601160

1161+
Arg *RangeRoundingPreference =
1162+
C.getInputArgs().getLastArg(options::OPT_fsycl_range_rounding_EQ);
1163+
checkSingleArgValidity(RangeRoundingPreference, {"disable", "force", "on"});
1164+
11611165
Arg *SYCLForceTarget =
11621166
getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ);
11631167
if (SYCLForceTarget) {

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5427,6 +5427,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
54275427
options::OPT_fno_sycl_esimd_force_stateless_mem, true))
54285428
CmdArgs.push_back("-fno-sycl-esimd-force-stateless-mem");
54295429

5430+
if (Arg *A = Args.getLastArg(options::OPT_fsycl_range_rounding_EQ))
5431+
A->render(Args, CmdArgs);
5432+
54305433
// Add the Unique ID prefix
54315434
StringRef UniqueID = D.getSYCLUniqueID(Input.getBaseInput());
54325435
if (!UniqueID.empty())
@@ -5451,10 +5454,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
54515454
bool DisableRangeRounding = false;
54525455
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
54535456
if (A->getOption().matches(options::OPT_O0))
5454-
DisableRangeRounding = true;
5457+
// If the user has set some range rounding preference then let that
5458+
// override not range rounding at -O0
5459+
if (!Args.getLastArg(options::OPT_fsycl_range_rounding_EQ))
5460+
DisableRangeRounding = true;
54555461
}
54565462
if (DisableRangeRounding || HasFPGA)
5457-
CmdArgs.push_back("-fsycl-disable-range-rounding");
5463+
CmdArgs.push_back("-fsycl-range-rounding=disable");
54585464

54595465
if (HasFPGA) {
54605466
// Pass -fintelfpga to both the host and device SYCL compilations if set.

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,8 +579,16 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
579579
// Set __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ macro for
580580
// both host and device compilations if -fsycl-disable-range-rounding
581581
// flag is used.
582-
if (LangOpts.SYCLDisableRangeRounding)
582+
switch (LangOpts.getSYCLRangeRounding()) {
583+
case LangOptions::SYCLRangeRoundingPreference::Disable:
583584
Builder.defineMacro("__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__");
585+
break;
586+
case LangOptions::SYCLRangeRoundingPreference::Force:
587+
Builder.defineMacro("__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__");
588+
break;
589+
default:
590+
break;
591+
}
584592
}
585593

586594
if (LangOpts.DeclareSPIRVBuiltins) {

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5172,10 +5172,19 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
51725172
O << "#endif //" << Macro.first << "\n\n";
51735173
}
51745174

5175-
if (S.getLangOpts().SYCLDisableRangeRounding) {
5175+
switch (S.getLangOpts().getSYCLRangeRounding()) {
5176+
case LangOptions::SYCLRangeRoundingPreference::Disable:
51765177
O << "#ifndef __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ \n";
51775178
O << "#define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1\n";
51785179
O << "#endif //__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__\n\n";
5180+
break;
5181+
case LangOptions::SYCLRangeRoundingPreference::Force:
5182+
O << "#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ \n";
5183+
O << "#define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1\n";
5184+
O << "#endif //__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__\n\n";
5185+
break;
5186+
default:
5187+
break;
51795188
}
51805189

51815190
if (SpecConsts.size() > 0) {

clang/test/CodeGenSYCL/integration_header_ppmacros.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-SYCL2020
33
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -sycl-std=2017 -fsycl-int-header=%t.h %s
44
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-SYCL2017
5-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-disable-range-rounding -fsycl-int-header=%t.h %s
5+
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-range-rounding=disable -fsycl-int-header=%t.h %s
66
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-RANGE
7+
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-range-rounding=force -fsycl-int-header=%t.h %s
8+
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-FORCE-RANGE
79
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-int-header=%t.h %s
810
// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-NO-RANGE
911

@@ -33,4 +35,10 @@ int main() {
3335
// CHECK-RANGE: #ifndef __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__
3436
// CHECK-RANGE-NEXT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
3537
// CHECK-RANGE-NEXT: #endif //__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__
38+
39+
// CHECK-FORCE-RANGE: #ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
40+
// CHECK-FORCE-RANGE-NEXT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1
41+
// CHECK-FORCE-RANGE-NEXT: #endif //__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
42+
3643
// CHECK-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
44+
// CHECK-NO-RANGE-NOT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1

clang/test/Driver/sycl-offload-intelfpga.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@
2626
// CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-fintelfpga"
2727
// CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fintelfpga"{{.*}} "-fsycl-is-host"
2828

29-
/// FPGA target implies -fsycl-disable-range-rounding
29+
/// FPGA target implies -fsycl-range-rounding=disable
3030
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fintelfpga %s 2>&1 \
3131
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
3232
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown %s 2>&1 \
3333
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
34-
// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding"
35-
// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-disable-range-rounding"{{.*}} "-fsycl-is-host"
34+
// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable"
35+
// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-range-rounding=disable"{{.*}} "-fsycl-is-host"
3636

3737
/// FPGA target implies -emit-only-kernels-as-entry-points in sycl-post-link
3838
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fintelfpga %s 2>&1 \
@@ -41,12 +41,12 @@
4141
// RUN: | FileCheck -check-prefix=CHK-NON-KERNEL-ENTRY-POINTS %s
4242
// CHK-NON-KERNEL-ENTRY-POINTS: sycl-post-link{{.*}} "-emit-only-kernels-as-entry-points"
4343

44-
/// -fsycl-disable-range-rounding is applied to all compilations if fpga is used
44+
/// -fsycl-range-rounding=disable is applied to all compilations if fpga is used
4545
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown,spir64_gen-unknown-unknown %s 2>&1 \
4646
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING-MULTI %s
47-
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding"
48-
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-fsycl-disable-range-rounding"{{.*}} "-fsycl-is-host"
49-
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding"
47+
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable"
48+
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-fsycl-range-rounding=disable"{{.*}} "-fsycl-is-host"
49+
// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable"
5050

5151
/// -fintelfpga with -reuse-exe=
5252
// RUN: touch %t.cpp

clang/test/Driver/sycl-offload.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -508,13 +508,33 @@
508508
// RUN: | FileCheck -check-prefix=CHK-TOOLS-OPTS2 %s
509509
// CHK-TOOLS-OPTS2: clang-offload-wrapper{{.*}} "-link-opts=-DFOO1 -DFOO2"
510510

511-
/// -fsycl-disable-range-rounding settings
511+
/// -fsycl-range-rounding settings
512+
///
513+
/// // Check that driver flag is passed to cc1
514+
// RUN: %clang -### -fsycl -fsycl-range-rounding=disable %s 2>&1 \
515+
// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-DISABLE %s
516+
// RUN: %clang -### -fsycl -fsycl-range-rounding=force %s 2>&1 \
517+
// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-FORCE %s
518+
// RUN: %clang -### -fsycl -fsycl-range-rounding=on %s 2>&1 \
519+
// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-ON %s
520+
// CHK-DRIVER-RANGE-ROUNDING-DISABLE: "-cc1{{.*}}-fsycl-range-rounding=disable"
521+
// CHK-DRIVER-RANGE-ROUNDING-FORCE: "-cc1{{.*}}-fsycl-range-rounding=force"
522+
// CHK-DRIVER-RANGE-ROUNDING-ON: "-cc1{{.*}}-fsycl-range-rounding=on"
523+
///
524+
///
512525
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
513526
// RUN: -fsycl-targets=spir64 -O0 %s 2>&1 \
514527
// RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s
515528
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 -Od %s 2>&1 \
516529
// RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s
517-
// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-disable-range-rounding"
530+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
531+
// RUN: -O0 -fsycl-range-rounding=force %s 2>&1 \
532+
// RUN: | FileCheck -check-prefix=CHK-OVERRIDE-RANGE-ROUNDING %s
533+
// RUN: %clang_cl -### -fsycl -Od %s 2>&1 -fsycl-range-rounding=force %s 2>&1 \
534+
// RUN: | FileCheck -check-prefix=CHK-OVERRIDE-RANGE-ROUNDING %s
535+
// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-range-rounding=disable"
536+
// CHK-OVERRIDE-RANGE-ROUNDING: "-fsycl-range-rounding=force"
537+
// CHK-OVERRIDE-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=disable"
518538

519539
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \
520540
// RUN: -fsycl-targets=spir64 -O2 %s 2>&1 \
@@ -527,6 +547,8 @@
527547
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 %s 2>&1 \
528548
// RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s
529549
// CHK-RANGE-ROUNDING-NOT: "-fsycl-disable-range-rounding"
550+
// CHK-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=disable"
551+
// CHK-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=force"
530552

531553
/// ###########################################################################
532554

clang/test/Preprocessor/predefined-macros.c

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -284,32 +284,60 @@
284284
// CHECK-RDC: #define __CLANG_RDC__ 1
285285

286286
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \
287-
// RUN: -triple spir64-unknown-unknown -fsycl-disable-range-rounding -o - \
288-
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE
287+
// RUN: -triple spir64-unknown-unknown -fsycl-range-rounding=disable -o - \
288+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE
289289

290290
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \
291291
// RUN: -triple spir64_fpga-unknown-unknown -o - \
292-
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE
292+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE
293293

294-
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-disable-range-rounding \
294+
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-range-rounding=disable \
295295
// RUN: -triple spir64_fpga-unknown-unknown -o - \
296-
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE
296+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE
297297

298298
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -o - \
299-
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE
299+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE
300300

301301
// RUN: %clang_cc1 %s -E -dM -o - \
302-
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE
302+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE
303303

304304
// RUN: %clang_cc1 %s -E -dM -fsycl-is-host \
305-
// RUN: -triple x86_64-unknown-linux-gnu -fsycl-disable-range-rounding -o - \
306-
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE
305+
// RUN: -triple x86_64-unknown-linux-gnu -fsycl-range-rounding=disable -o - \
306+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE
307307

308308
// RUN: %clang_cc1 %s -E -dM -fsycl-is-host -o - \
309-
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE
309+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE
310310

311-
// CHECK-RANGE: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
312-
// CHECK-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
311+
// CHECK-DISABLE-RANGE: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
312+
// CHECK-DISABLE-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1
313+
314+
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \
315+
// RUN: -triple spir64-unknown-unknown -fsycl-range-rounding=force -o - \
316+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE
317+
318+
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \
319+
// RUN: -triple spir64_fpga-unknown-unknown -o - \
320+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE
321+
322+
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-range-rounding=force \
323+
// RUN: -triple spir64_fpga-unknown-unknown -o - \
324+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE
325+
326+
// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -o - \
327+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE
328+
329+
// RUN: %clang_cc1 %s -E -dM -o - \
330+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE
331+
332+
// RUN: %clang_cc1 %s -E -dM -fsycl-is-host \
333+
// RUN: -triple x86_64-unknown-linux-gnu -fsycl-range-rounding=force -o - \
334+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE
335+
336+
// RUN: %clang_cc1 %s -E -dM -fsycl-is-host -o - \
337+
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE
338+
339+
// CHECK-FORCE-RANGE: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1
340+
// CHECK-FORCE-NO-RANGE-NOT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1
313341

314342
// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple x86_64-unknown-linux-gnu \
315343
// RUN: -fgpu-default-stream=per-thread \
@@ -334,4 +362,4 @@
334362
// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device | FileCheck -match-full-lines \
335363
// RUN: %s --check-prefix=CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG
336364
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG: #define __HIPSTDPAR__ 1
337-
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1
365+
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1

sycl/doc/design/ParallelForRangeRounding.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,8 @@ rounding will only be used if the SYCL runtime X dimension exceeds some minimum
4242
value, which can be configured using the
4343
`SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS` environment variable.
4444

45-
Generation of range rounded kernels can be disabled by using the compiler flag
46-
`-fsycl-disable-range-rounding`.
45+
In order to reduce binary size, the user can tell the compiler only to generate
46+
the range rounded kernel, `-fsycl-range-rounding=force`. The user can also tell
47+
the SYCL implementation to only produce the unrounded kernel using the flag
48+
`-fsycl-range-rounding=disable`. By default both kernels will be generated,
49+
which is equivalent to `-fsycl-range-rounding=on`.

sycl/include/sycl/handler.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1181,7 +1181,6 @@ class __SYCL_EXPORT handler {
11811181
// non-32-bit global range, we wrap the old kernel in a new kernel
11821182
// that has each work item peform multiple invocations the old
11831183
// kernel in a 32-bit global range.
1184-
auto Dev = detail::getSyclObjImpl(detail::getDeviceFromHandler(*this));
11851184
id<Dims> MaxNWGs = [&] {
11861185
auto [MaxWGs, HasMaxWGs] = getMaxWorkGroups_v2();
11871186
if (!HasMaxWGs) {
@@ -1224,6 +1223,11 @@ class __SYCL_EXPORT handler {
12241223
// will yield a rounded-up value for the total range.
12251224
Adjust(0, ((RoundedRange[0] + GoodFactor - 1) / GoodFactor) * GoodFactor);
12261225
}
1226+
#ifdef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
1227+
// If we are forcing range rounding kernels to be used, we always want the
1228+
// rounded range kernel to be generated, even if rounding isn't needed
1229+
DidAdjust = true;
1230+
#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
12271231

12281232
for (int i = 0; i < Dims; ++i)
12291233
if (RoundedRange[i] > MaxRange[i])
@@ -1330,6 +1334,9 @@ class __SYCL_EXPORT handler {
13301334
{
13311335
(void)UserRange;
13321336
(void)Props;
1337+
#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
1338+
// If parallel_for range rounding is forced then only range rounded
1339+
// kernel is generated
13331340
kernel_parallel_for_wrapper<NameT, TransformedArgType, KernelType,
13341341
PropertiesT>(KernelFunc);
13351342
#ifndef __SYCL_DEVICE_ONLY__
@@ -1340,6 +1347,9 @@ class __SYCL_EXPORT handler {
13401347
std::move(KernelFunc));
13411348
setType(detail::CG::Kernel);
13421349
#endif
1350+
#else
1351+
(void)KernelFunc;
1352+
#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__
13431353
}
13441354
}
13451355

0 commit comments

Comments
 (0)