@@ -168,10 +168,11 @@ KernelTranslator::loadSPIRVKernel(llvm::LLVMContext &LLVMCtx,
168
168
return SPIRVLLVMTranslator::loadSPIRVKernel (LLVMCtx, Kernel);
169
169
}
170
170
171
- llvm::Error KernelTranslator::translateKernel (SYCLKernelInfo &Kernel,
172
- llvm::Module &Mod,
173
- JITContext &JITCtx,
174
- BinaryFormat Format) {
171
+ llvm::Error
172
+ KernelTranslator::translateKernel (SYCLKernelInfo &Kernel, llvm::Module &Mod,
173
+ JITContext &JITCtx, BinaryFormat Format,
174
+ const std::string &TargetCPU,
175
+ const std::string &TargetFeatures) {
175
176
176
177
KernelBinary *KernelBin = nullptr ;
177
178
switch (Format) {
@@ -186,7 +187,7 @@ llvm::Error KernelTranslator::translateKernel(SYCLKernelInfo &Kernel,
186
187
}
187
188
case BinaryFormat::PTX: {
188
189
llvm::Expected<KernelBinary *> BinaryOrError =
189
- translateToPTX (Kernel, Mod, JITCtx);
190
+ translateToPTX (Kernel, Mod, JITCtx, TargetCPU, TargetFeatures );
190
191
if (auto Error = BinaryOrError.takeError ()) {
191
192
return Error;
192
193
}
@@ -195,7 +196,7 @@ llvm::Error KernelTranslator::translateKernel(SYCLKernelInfo &Kernel,
195
196
}
196
197
case BinaryFormat::AMDGCN: {
197
198
llvm::Expected<KernelBinary *> BinaryOrError =
198
- translateToAMDGCN (Kernel, Mod, JITCtx);
199
+ translateToAMDGCN (Kernel, Mod, JITCtx, TargetCPU, TargetFeatures );
199
200
if (auto Error = BinaryOrError.takeError ())
200
201
return Error;
201
202
KernelBin = *BinaryOrError;
@@ -226,9 +227,9 @@ KernelTranslator::translateToSPIRV(llvm::Module &Mod, JITContext &JITCtx) {
226
227
return SPIRVLLVMTranslator::translateLLVMtoSPIRV (Mod, JITCtx);
227
228
}
228
229
229
- llvm::Expected<KernelBinary *>
230
- KernelTranslator::translateToPTX ( SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
231
- JITContext &JITCtx ) {
230
+ llvm::Expected<KernelBinary *> KernelTranslator::translateToPTX (
231
+ SYCLKernelInfo &KernelInfo, llvm::Module &Mod, JITContext &JITCtx ,
232
+ const std::string &TargetCPU, const std::string &TargetFeatures ) {
232
233
#ifndef FUSION_JIT_SUPPORT_PTX
233
234
(void )KernelInfo;
234
235
(void )Mod;
@@ -257,23 +258,32 @@ KernelTranslator::translateToPTX(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
257
258
ErrorMessage.c_str ());
258
259
}
259
260
260
- llvm::StringRef TargetCPU{" sm_50" };
261
- llvm::StringRef TargetFeatures{" +sm_50,+ptx76" };
262
- if (auto *KernelFunc = Mod.getFunction (KernelInfo.Name .c_str ())) {
263
- if (KernelFunc->hasFnAttribute (TARGET_CPU_ATTRIBUTE)) {
264
- TargetCPU =
265
- KernelFunc->getFnAttribute (TARGET_CPU_ATTRIBUTE).getValueAsString ();
261
+ // Give priority to user specified values (through environment variables:
262
+ // SYCL_JIT_TARGET_CPU and SYCL_JIT_TARGET_FEATURES).
263
+ llvm::StringRef CPU{TargetCPU};
264
+ llvm::StringRef Features{TargetFeatures};
265
+
266
+ auto *KernelFunc = Mod.getFunction (KernelInfo.Name .c_str ());
267
+ // If they were not set, use default and consult the module for alternatives
268
+ // (if present).
269
+ if (CPU.empty ()) {
270
+ CPU = " sm_50" ;
271
+ if (KernelFunc && KernelFunc->hasFnAttribute (TARGET_CPU_ATTRIBUTE)) {
272
+ CPU = KernelFunc->getFnAttribute (TARGET_CPU_ATTRIBUTE).getValueAsString ();
266
273
}
267
- if (KernelFunc->hasFnAttribute (TARGET_FEATURE_ATTRIBUTE)) {
268
- TargetFeatures = KernelFunc->getFnAttribute (TARGET_FEATURE_ATTRIBUTE)
269
- .getValueAsString ();
274
+ }
275
+ if (Features.empty ()) {
276
+ Features = " +sm_50,+ptx76" ;
277
+ if (KernelFunc && KernelFunc->hasFnAttribute (TARGET_FEATURE_ATTRIBUTE)) {
278
+ Features = KernelFunc->getFnAttribute (TARGET_FEATURE_ATTRIBUTE)
279
+ .getValueAsString ();
270
280
}
271
281
}
272
282
273
283
// FIXME: Check whether we can provide more accurate target information here
274
284
auto *TargetMachine = Target->createTargetMachine (
275
- TargetTriple, TargetCPU, TargetFeatures , {}, llvm::Reloc::PIC_,
276
- std::nullopt, llvm::CodeGenOptLevel::Default);
285
+ TargetTriple, CPU, Features , {}, llvm::Reloc::PIC_, std::nullopt ,
286
+ llvm::CodeGenOptLevel::Default);
277
287
278
288
llvm::legacy::PassManager PM;
279
289
@@ -298,9 +308,9 @@ KernelTranslator::translateToPTX(SYCLKernelInfo &KernelInfo, llvm::Module &Mod,
298
308
#endif // FUSION_JIT_SUPPORT_PTX
299
309
}
300
310
301
- llvm::Expected<KernelBinary *>
302
- KernelTranslator::translateToAMDGCN ( SYCLKernelInfo &KernelInfo,
303
- llvm::Module &Mod, JITContext &JITCtx ) {
311
+ llvm::Expected<KernelBinary *> KernelTranslator::translateToAMDGCN (
312
+ SYCLKernelInfo &KernelInfo, llvm::Module &Mod, JITContext &JITCtx ,
313
+ const std::string &TargetCPU, const std::string &TargetFeatures ) {
304
314
#ifndef FUSION_JIT_SUPPORT_AMDGCN
305
315
(void )KernelInfo;
306
316
(void )Mod;
@@ -329,25 +339,29 @@ KernelTranslator::translateToAMDGCN(SYCLKernelInfo &KernelInfo,
329
339
" Failed to load and translate AMDGCN LLVM IR module with error %s" ,
330
340
ErrorMessage.c_str ());
331
341
332
- // Set to the lowest tested target according to the GetStartedGuide, section
333
- // "Build DPC++ toolchain with support for HIP AMD"
334
- llvm::StringRef TargetCPU{" gfx906" };
335
- llvm::StringRef TargetFeatures{" " };
336
- if (auto *KernelFunc = Mod.getFunction (KernelInfo.Name .c_str ())) {
337
- if (KernelFunc->hasFnAttribute (TARGET_CPU_ATTRIBUTE)) {
338
- TargetCPU =
339
- KernelFunc->getFnAttribute (TARGET_CPU_ATTRIBUTE).getValueAsString ();
342
+ llvm::StringRef CPU{TargetCPU};
343
+ llvm::StringRef Features{TargetFeatures};
344
+
345
+ auto *KernelFunc = Mod.getFunction (KernelInfo.Name .c_str ());
346
+ if (CPU.empty ()) {
347
+ // Set to the lowest tested target according to the GetStartedGuide, section
348
+ // "Build DPC++ toolchain with support for HIP AMD"
349
+ CPU = " gfx906" ;
350
+ if (KernelFunc && KernelFunc->hasFnAttribute (TARGET_CPU_ATTRIBUTE)) {
351
+ CPU = KernelFunc->getFnAttribute (TARGET_CPU_ATTRIBUTE).getValueAsString ();
340
352
}
341
- if (KernelFunc->hasFnAttribute (TARGET_FEATURE_ATTRIBUTE)) {
342
- TargetFeatures = KernelFunc->getFnAttribute (TARGET_FEATURE_ATTRIBUTE)
343
- .getValueAsString ();
353
+ }
354
+ if (Features.empty ()) {
355
+ if (KernelFunc && KernelFunc->hasFnAttribute (TARGET_FEATURE_ATTRIBUTE)) {
356
+ Features = KernelFunc->getFnAttribute (TARGET_FEATURE_ATTRIBUTE)
357
+ .getValueAsString ();
344
358
}
345
359
}
346
360
347
361
// FIXME: Check whether we can provide more accurate target information here
348
362
auto *TargetMachine = Target->createTargetMachine (
349
- TargetTriple, TargetCPU, TargetFeatures , {}, llvm::Reloc::PIC_,
350
- std::nullopt, llvm::CodeGenOptLevel::Default);
363
+ TargetTriple, CPU, Features , {}, llvm::Reloc::PIC_, std::nullopt ,
364
+ llvm::CodeGenOptLevel::Default);
351
365
352
366
std::string AMDObj;
353
367
{
0 commit comments