|
15 | 15 |
|
16 | 16 | #include "SpecConstants.h"
|
17 | 17 | #include "llvm/ADT/SetVector.h"
|
| 18 | +#include "llvm/ADT/Triple.h" |
18 | 19 | #include "llvm/Bitcode/BitcodeWriterPass.h"
|
19 | 20 | #include "llvm/IR/IRPrintingPasses.h"
|
20 | 21 | #include "llvm/IR/InstIterator.h"
|
@@ -47,6 +48,18 @@ cl::OptionCategory PostLinkCat{"sycl-post-link options"};
|
47 | 48 | static constexpr char COL_CODE[] = "Code";
|
48 | 49 | static constexpr char COL_SYM[] = "Symbols";
|
49 | 50 | static constexpr char COL_PROPS[] = "Properties";
|
| 51 | +static constexpr char DEVICELIB_FUNC_PREFIX[] = "__devicelib_"; |
| 52 | + |
| 53 | +// DeviceLibExt is shared between sycl-post-link tool and sycl runtime. |
| 54 | +// If any change is made here, need to sync with DeviceLibExt definition |
| 55 | +// in sycl/source/detail/program_manager/program_manager.hpp |
| 56 | +enum class DeviceLibExt : std::uint32_t { |
| 57 | + cl_intel_devicelib_assert, |
| 58 | + cl_intel_devicelib_math, |
| 59 | + cl_intel_devicelib_math_fp64, |
| 60 | + cl_intel_devicelib_complex, |
| 61 | + cl_intel_devicelib_complex_fp64 |
| 62 | +}; |
50 | 63 |
|
51 | 64 | // InputFilename - The filename to read from.
|
52 | 65 | static cl::opt<std::string> InputFilename{
|
@@ -104,6 +117,143 @@ static cl::opt<SpecConstMode> SpecConstLower{
|
104 | 117 | "set spec constants to C++ defaults")),
|
105 | 118 | cl::cat(PostLinkCat)};
|
106 | 119 |
|
| 120 | +struct ImagePropSaveInfo { |
| 121 | + bool NeedDeviceLibReqMask; |
| 122 | + bool DoSpecConst; |
| 123 | + bool SetSpecConstAtRT; |
| 124 | + bool SpecConstsMet; |
| 125 | +}; |
| 126 | +// Please update DeviceLibFuncMap if any item is added to or removed from |
| 127 | +// fallback device libraries in libdevice. |
| 128 | +static std::unordered_map<std::string, DeviceLibExt> DeviceLibFuncMap = { |
| 129 | + {"__devicelib_acosf", DeviceLibExt::cl_intel_devicelib_math}, |
| 130 | + {"__devicelib_acoshf", DeviceLibExt::cl_intel_devicelib_math}, |
| 131 | + {"__devicelib_asinf", DeviceLibExt::cl_intel_devicelib_math}, |
| 132 | + {"__devicelib_asinhf", DeviceLibExt::cl_intel_devicelib_math}, |
| 133 | + {"__devicelib_atan2f", DeviceLibExt::cl_intel_devicelib_math}, |
| 134 | + {"__devicelib_atanf", DeviceLibExt::cl_intel_devicelib_math}, |
| 135 | + {"__devicelib_atanhf", DeviceLibExt::cl_intel_devicelib_math}, |
| 136 | + {"__devicelib_cbrtf", DeviceLibExt::cl_intel_devicelib_math}, |
| 137 | + {"__devicelib_cosf", DeviceLibExt::cl_intel_devicelib_math}, |
| 138 | + {"__devicelib_coshf", DeviceLibExt::cl_intel_devicelib_math}, |
| 139 | + {"__devicelib_erfcf", DeviceLibExt::cl_intel_devicelib_math}, |
| 140 | + {"__devicelib_erff", DeviceLibExt::cl_intel_devicelib_math}, |
| 141 | + {"__devicelib_exp2f", DeviceLibExt::cl_intel_devicelib_math}, |
| 142 | + {"__devicelib_expf", DeviceLibExt::cl_intel_devicelib_math}, |
| 143 | + {"__devicelib_expm1f", DeviceLibExt::cl_intel_devicelib_math}, |
| 144 | + {"__devicelib_fdimf", DeviceLibExt::cl_intel_devicelib_math}, |
| 145 | + {"__devicelib_fmaf", DeviceLibExt::cl_intel_devicelib_math}, |
| 146 | + {"__devicelib_fmodf", DeviceLibExt::cl_intel_devicelib_math}, |
| 147 | + {"__devicelib_frexpf", DeviceLibExt::cl_intel_devicelib_math}, |
| 148 | + {"__devicelib_hypotf", DeviceLibExt::cl_intel_devicelib_math}, |
| 149 | + {"__devicelib_ilogbf", DeviceLibExt::cl_intel_devicelib_math}, |
| 150 | + {"__devicelib_ldexpf", DeviceLibExt::cl_intel_devicelib_math}, |
| 151 | + {"__devicelib_lgammaf", DeviceLibExt::cl_intel_devicelib_math}, |
| 152 | + {"__devicelib_log10f", DeviceLibExt::cl_intel_devicelib_math}, |
| 153 | + {"__devicelib_log1pf", DeviceLibExt::cl_intel_devicelib_math}, |
| 154 | + {"__devicelib_log2f", DeviceLibExt::cl_intel_devicelib_math}, |
| 155 | + {"__devicelib_logbf", DeviceLibExt::cl_intel_devicelib_math}, |
| 156 | + {"__devicelib_logf", DeviceLibExt::cl_intel_devicelib_math}, |
| 157 | + {"__devicelib_modff", DeviceLibExt::cl_intel_devicelib_math}, |
| 158 | + {"__devicelib_nextafterf", DeviceLibExt::cl_intel_devicelib_math}, |
| 159 | + {"__devicelib_powf", DeviceLibExt::cl_intel_devicelib_math}, |
| 160 | + {"__devicelib_remainderf", DeviceLibExt::cl_intel_devicelib_math}, |
| 161 | + {"__devicelib_remquof", DeviceLibExt::cl_intel_devicelib_math}, |
| 162 | + {"__devicelib_sinf", DeviceLibExt::cl_intel_devicelib_math}, |
| 163 | + {"__devicelib_sinhf", DeviceLibExt::cl_intel_devicelib_math}, |
| 164 | + {"__devicelib_sqrtf", DeviceLibExt::cl_intel_devicelib_math}, |
| 165 | + {"__devicelib_tanf", DeviceLibExt::cl_intel_devicelib_math}, |
| 166 | + {"__devicelib_tanhf", DeviceLibExt::cl_intel_devicelib_math}, |
| 167 | + {"__devicelib_tgammaf", DeviceLibExt::cl_intel_devicelib_math}, |
| 168 | + {"__devicelib_acos", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 169 | + {"__devicelib_acosh", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 170 | + {"__devicelib_asin", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 171 | + {"__devicelib_asinh", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 172 | + {"__devicelib_atan", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 173 | + {"__devicelib_atan2", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 174 | + {"__devicelib_atanh", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 175 | + {"__devicelib_cbrt", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 176 | + {"__devicelib_cos", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 177 | + {"__devicelib_cosh", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 178 | + {"__devicelib_erf", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 179 | + {"__devicelib_erfc", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 180 | + {"__devicelib_exp", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 181 | + {"__devicelib_exp2", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 182 | + {"__devicelib_expm1", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 183 | + {"__devicelib_fdim", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 184 | + {"__devicelib_fma", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 185 | + {"__devicelib_fmod", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 186 | + {"__devicelib_frexp", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 187 | + {"__devicelib_hypot", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 188 | + {"__devicelib_ilogb", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 189 | + {"__devicelib_ldexp", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 190 | + {"__devicelib_lgamma", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 191 | + {"__devicelib_log", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 192 | + {"__devicelib_log10", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 193 | + {"__devicelib_log1p", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 194 | + {"__devicelib_log2", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 195 | + {"__devicelib_logb", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 196 | + {"__devicelib_modf", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 197 | + {"__devicelib_nextafter", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 198 | + {"__devicelib_pow", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 199 | + {"__devicelib_remainder", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 200 | + {"__devicelib_remquo", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 201 | + {"__devicelib_sin", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 202 | + {"__devicelib_sinh", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 203 | + {"__devicelib_sqrt", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 204 | + {"__devicelib_tan", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 205 | + {"__devicelib_tanh", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 206 | + {"__devicelib_tgamma", DeviceLibExt::cl_intel_devicelib_math_fp64}, |
| 207 | + {"__devicelib___divsc3", DeviceLibExt::cl_intel_devicelib_complex}, |
| 208 | + {"__devicelib___mulsc3", DeviceLibExt::cl_intel_devicelib_complex}, |
| 209 | + {"__devicelib_cabsf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 210 | + {"__devicelib_cacosf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 211 | + {"__devicelib_cacoshf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 212 | + {"__devicelib_cargf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 213 | + {"__devicelib_casinf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 214 | + {"__devicelib_casinhf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 215 | + {"__devicelib_catanf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 216 | + {"__devicelib_catanhf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 217 | + {"__devicelib_ccosf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 218 | + {"__devicelib_ccoshf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 219 | + {"__devicelib_cexpf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 220 | + {"__devicelib_cimagf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 221 | + {"__devicelib_clogf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 222 | + {"__devicelib_cpolarf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 223 | + {"__devicelib_cpowf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 224 | + {"__devicelib_cprojf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 225 | + {"__devicelib_crealf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 226 | + {"__devicelib_csinf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 227 | + {"__devicelib_csinhf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 228 | + {"__devicelib_csqrtf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 229 | + {"__devicelib_ctanf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 230 | + {"__devicelib_ctanhf", DeviceLibExt::cl_intel_devicelib_complex}, |
| 231 | + {"__devicelib___divdc3", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 232 | + {"__devicelib___muldc3", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 233 | + {"__devicelib_cabs", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 234 | + {"__devicelib_cacos", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 235 | + {"__devicelib_cacosh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 236 | + {"__devicelib_carg", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 237 | + {"__devicelib_casin", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 238 | + {"__devicelib_casinh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 239 | + {"__devicelib_catan", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 240 | + {"__devicelib_catanh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 241 | + {"__devicelib_ccos", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 242 | + {"__devicelib_ccosh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 243 | + {"__devicelib_cexp", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 244 | + {"__devicelib_cimag", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 245 | + {"__devicelib_clog", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 246 | + {"__devicelib_cpolar", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 247 | + {"__devicelib_cpow", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 248 | + {"__devicelib_cproj", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 249 | + {"__devicelib_creal", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 250 | + {"__devicelib_csin", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 251 | + {"__devicelib_csinh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 252 | + {"__devicelib_csqrt", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 253 | + {"__devicelib_ctan", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 254 | + {"__devicelib_ctanh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, |
| 255 | +}; |
| 256 | + |
107 | 257 | static void error(const Twine &Msg) {
|
108 | 258 | errs() << "sycl-post-link: " << Msg << '\n';
|
109 | 259 | exit(1);
|
@@ -295,20 +445,76 @@ saveResultModules(std::vector<std::unique_ptr<Module>> &ResModules) {
|
295 | 445 | return Res;
|
296 | 446 | }
|
297 | 447 |
|
298 |
| -static string_vector |
299 |
| -saveSpecConstantIDMaps(const std::vector<SpecIDMapTy> &Maps) { |
300 |
| - string_vector Res; |
| 448 | +// Each fallback device library corresponds to one bit in "require mask" which |
| 449 | +// is an unsigned int32. getDeviceLibBit checks which fallback device library |
| 450 | +// is required for FuncName and returns the corresponding bit. The corresponding |
| 451 | +// mask for each fallback device library is: |
| 452 | +// fallback-cassert: 0x1 |
| 453 | +// fallback-cmath: 0x2 |
| 454 | +// fallback-cmath-fp64: 0x4 |
| 455 | +// fallback-complex: 0x8 |
| 456 | +// fallback-complex-fp64: 0x10 |
| 457 | +static uint32_t getDeviceLibBits(const std::string &FuncName) { |
| 458 | + auto DeviceLibFuncIter = DeviceLibFuncMap.find(FuncName); |
| 459 | + return ((DeviceLibFuncIter == DeviceLibFuncMap.end()) |
| 460 | + ? 0 |
| 461 | + : 0x1 << (static_cast<uint32_t>(DeviceLibFuncIter->second) - |
| 462 | + static_cast<uint32_t>( |
| 463 | + DeviceLibExt::cl_intel_devicelib_assert))); |
| 464 | +} |
301 | 465 |
|
302 |
| - for (size_t I = 0; I < Maps.size(); ++I) { |
303 |
| - std::string SCFile = makeResultFileName(".prop", I); |
| 466 | +// For each device image module, we go through all functions which meets |
| 467 | +// 1. The function name has prefix "__devicelib_" |
| 468 | +// 2. The function is declaration which means it doesn't have function body |
| 469 | +// And we don't expect non-spirv functions with "__devicelib_" prefix. |
| 470 | +static uint32_t getModuleReqMask(const Module &M) { |
| 471 | + // Device libraries will be enabled only for spir-v module. |
| 472 | + if (!llvm::Triple(M.getTargetTriple()).isSPIR()) |
| 473 | + return 0; |
| 474 | + // 0x1 means sycl runtime will link and load libsycl-fallback-assert.spv as |
| 475 | + // default. In fact, default link assert spv is not necessary but dramatic |
| 476 | + // perf regression is observed if we don't link any device library. The perf |
| 477 | + // regression is caused by a clang issue. |
| 478 | + uint32_t ReqMask = 0x1; |
| 479 | + for (const Function &SF : M) { |
| 480 | + if (SF.getName().startswith(DEVICELIB_FUNC_PREFIX) && SF.isDeclaration()) { |
| 481 | + assert(SF.getCallingConv() == CallingConv::SPIR_FUNC); |
| 482 | + uint32_t DeviceLibBits = getDeviceLibBits(SF.getName().str()); |
| 483 | + ReqMask |= DeviceLibBits; |
| 484 | + } |
| 485 | + } |
| 486 | + return ReqMask; |
| 487 | +} |
| 488 | + |
| 489 | +static string_vector saveDeviceImageProperty( |
| 490 | + const std::vector<std::unique_ptr<Module>> &ResultModules, |
| 491 | + const ImagePropSaveInfo &ImgPSInfo) { |
| 492 | + string_vector Res; |
| 493 | + for (size_t I = 0; I < ResultModules.size(); ++I) { |
304 | 494 | llvm::util::PropertySetRegistry PropSet;
|
305 |
| - PropSet.add(llvm::util::PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS, |
306 |
| - Maps[I]); |
| 495 | + if (ImgPSInfo.NeedDeviceLibReqMask) { |
| 496 | + uint32_t MRMask = getModuleReqMask(*ResultModules[I]); |
| 497 | + std::map<StringRef, uint32_t> RMEntry = {{"DeviceLibReqMask", MRMask}}; |
| 498 | + PropSet.add(llvm::util::PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK, |
| 499 | + RMEntry); |
| 500 | + } |
| 501 | + if (ImgPSInfo.DoSpecConst && ImgPSInfo.SetSpecConstAtRT) { |
| 502 | + // extract spec constant maps per each module |
| 503 | + SpecIDMapTy TmpSpecIDMap; |
| 504 | + if (ImgPSInfo.SpecConstsMet) |
| 505 | + SpecConstantsPass::collectSpecConstantMetadata(*ResultModules[I].get(), |
| 506 | + TmpSpecIDMap); |
| 507 | + PropSet.add( |
| 508 | + llvm::util::PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS, |
| 509 | + TmpSpecIDMap); |
| 510 | + } |
307 | 511 | std::error_code EC;
|
| 512 | + std::string SCFile = makeResultFileName(".prop", I); |
308 | 513 | raw_fd_ostream SCOut(SCFile, EC);
|
309 | 514 | PropSet.write(SCOut);
|
310 | 515 | Res.emplace_back(std::move(SCFile));
|
311 | 516 | }
|
| 517 | + |
312 | 518 | return Res;
|
313 | 519 | }
|
314 | 520 |
|
@@ -412,7 +618,6 @@ int main(int argc, char **argv) {
|
412 | 618 | }
|
413 | 619 |
|
414 | 620 | std::vector<std::unique_ptr<Module>> ResultModules;
|
415 |
| - std::vector<SpecIDMapTy> ResultSpecIDMaps; |
416 | 621 | string_vector ResultSymbolsLists;
|
417 | 622 |
|
418 | 623 | util::SimpleTable Table;
|
@@ -456,15 +661,11 @@ int main(int argc, char **argv) {
|
456 | 661 | Error Err = Table.addColumn(COL_CODE, Files);
|
457 | 662 | CHECK_AND_EXIT(Err);
|
458 | 663 | }
|
459 |
| - if (DoSpecConst && SetSpecConstAtRT) { |
460 |
| - // extract spec constant maps per each module |
461 |
| - for (auto &MUptr : ResultModules) { |
462 |
| - ResultSpecIDMaps.emplace_back(SpecIDMapTy()); |
463 |
| - if (SpecConstsMet) |
464 |
| - SpecConstantsPass::collectSpecConstantMetadata(*MUptr.get(), |
465 |
| - ResultSpecIDMaps.back()); |
466 |
| - } |
467 |
| - string_vector Files = saveSpecConstantIDMaps(ResultSpecIDMaps); |
| 664 | + |
| 665 | + { |
| 666 | + ImagePropSaveInfo ImgPSInfo = {true, DoSpecConst, SetSpecConstAtRT, |
| 667 | + SpecConstsMet}; |
| 668 | + string_vector Files = saveDeviceImageProperty(ResultModules, ImgPSInfo); |
468 | 669 | Error Err = Table.addColumn(COL_PROPS, Files);
|
469 | 670 | CHECK_AND_EXIT(Err);
|
470 | 671 | }
|
|
0 commit comments