diff --git a/buildbot/configure.py b/buildbot/configure.py index 32cf6ebdd93ab..e086e1b09c421 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -178,6 +178,8 @@ def do_configure(args): "-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects), "-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform), "-DLLVM_BUILD_TOOLS=ON", + "-DLLVM_ENABLE_ZSTD=ON", + "-DLLVM_USE_STATIC_ZSTD=ON", "-DSYCL_ENABLE_WERROR={}".format(sycl_werror), "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests. diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bbaea4eb8fd79..5b954da1ce4e0 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10131,6 +10131,19 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, SmallString<128> TargetTripleOpt = TT.getArchName(); bool WrapFPGADevice = false; bool FPGAEarly = false; + + // Validate and propogate CLI options related to device image compression. + // -offload-compress + if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) { + WrapperArgs.push_back( + C.getArgs().MakeArgString(Twine("-offload-compress"))); + // -offload-compression-level=<> + if (Arg *A = C.getInputArgs().getLastArg( + options::OPT_offload_compression_level_EQ)) + WrapperArgs.push_back(C.getArgs().MakeArgString( + Twine("-offload-compression-level=") + A->getValue())); + } + if (Arg *A = C.getInputArgs().getLastArg(options::OPT_fsycl_link_EQ)) { WrapFPGADevice = true; FPGAEarly = (A->getValue() == StringRef("early")); diff --git a/clang/test/Driver/clang-offload-wrapper-zstd.c b/clang/test/Driver/clang-offload-wrapper-zstd.c new file mode 100644 index 0000000000000..bc5fadfc4cf42 --- /dev/null +++ b/clang/test/Driver/clang-offload-wrapper-zstd.c @@ -0,0 +1,40 @@ +// REQUIRES: zstd && (system-windows || system-linux) + +// clang-offload-wrapper compression test: checks that the wrapper can compress the device images. +// Checks the '--offload-compress', '--offload-compression-level', and '--offload-compression-threshold' +// CLI options. + +// --- Prepare test data by creating the debice binary image. +// RUN: echo -e -n 'device binary image1\n' > %t.bin +// RUN: echo -e -n '[Category1]\nint_prop1=1|10\n[Category2]\nint_prop2=1|20\n' > %t.props +// RUN: echo -e -n 'kernel1\nkernel2\n' > %t.sym +// RUN: echo -e -n 'Manifest file - arbitrary data generated by the toolchain\n' > %t.mnf +// RUN: echo '[Code|Properties|Symbols|Manifest]' > %t.img1 +// RUN: echo %t.bin"|"%t.props"|"%t.sym"|"%t.mnf >> %t.img1 + +/////////////////////////////////////////////////////// +// Compress the test image using clang-offload-wrapper. +/////////////////////////////////////////////////////// + +// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ +// RUN: --offload-compress --offload-compression-level=9 --offload-compression-threshold=0 \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS + +// CHECK-COMPRESS: [Compression] Original image size: +// CHECK-COMPRESS: [Compression] Compressed image size: +// CHECK-COMPRESS: [Compression] Compression level used: 9 + +/////////////////////////////////////////////////////////// +// Check that there is no compression when the threshold is set to a value higher than the image size +// or '--offload-compress' is not set. +/////////////////////////////////////////////////////////// + +// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ +// RUN: --offload-compress --offload-compression-level=3 --offload-compression-threshold=1000 \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS + +// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ +// RUN: --offload-compression-level=3 --offload-compression-threshold=0 \ +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS + +// CHECK-NO-COMPRESS-NOT: [Compression] Original image size: diff --git a/clang/test/Driver/sycl-offload-wrapper-compression.cpp b/clang/test/Driver/sycl-offload-wrapper-compression.cpp new file mode 100644 index 0000000000000..1ef9282ee3598 --- /dev/null +++ b/clang/test/Driver/sycl-offload-wrapper-compression.cpp @@ -0,0 +1,14 @@ +/// +/// Check if '--offload-compress' and '--offload-compression-level' CLI +/// options are passed to the clang-offload-wrapper. +/// + +// RUN: %clangxx -### -fsycl --offload-compress --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS +// CHECK-COMPRESS: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}}"-offload-compression-level=3"{{.*}} + +// Make sure that the compression options are not passed when --offload-compress is not set. +// RUN: %clangxx -### -fsycl %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS +// RUN: %clangxx -### -fsycl --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS + +// CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}} +// CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compression-level=3"{{.*}} diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt index 9cb5ec66c644e..3195f18fe23cd 100644 --- a/clang/tools/clang-offload-wrapper/CMakeLists.txt +++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_tool(clang-offload-wrapper set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS clangBasic + LLVMSupport ) add_dependencies(clang clang-offload-wrapper) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index b11d51c0cc047..66853d5daefa2 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -67,6 +67,9 @@ #include #include +// For device image compression. +#include + #define OPENMP_OFFLOAD_IMAGE_VERSION "1.0" using namespace llvm; @@ -139,6 +142,25 @@ static cl::list Inputs(cl::Positional, cl::OneOrMore, cl::desc(""), cl::cat(ClangOffloadWrapperCategory)); +// CLI options for device image compression. +static cl::opt OffloadCompressDevImgs( + "offload-compress", cl::init(false), cl::Optional, + cl::desc("Enable device image compression using ZSTD."), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt + OffloadCompressLevel("offload-compression-level", cl::init(10), + cl::Optional, + cl::desc("ZSTD Compression level. Default: 10"), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt + OffloadCompressThreshold("offload-compression-threshold", cl::init(512), + cl::Optional, + cl::desc("Threshold (in bytes) over which to " + "compress images. Default: 512"), + cl::cat(ClangOffloadWrapperCategory)); + // Binary image formats supported by this tool. The support basically means // mapping string representation given at the command line to a value from this // enum. No format checking is performed. @@ -146,8 +168,9 @@ enum BinaryImageFormat { none, // image kind is not determined native, // image kind is native // portable image kinds go next - spirv, // SPIR-V - llvmbc // LLVM bitcode + spirv, // SPIR-V + llvmbc, // LLVM bitcode + compressed_none // compressed image with unknown format }; /// Sets offload kind. @@ -265,6 +288,8 @@ static StringRef formatToString(BinaryImageFormat Fmt) { return "llvmbc"; case BinaryImageFormat::native: return "native"; + case BinaryImageFormat::compressed_none: + return "compressed_none"; } llvm_unreachable("bad format"); @@ -1083,10 +1108,66 @@ class BinaryWrapper { return FBinOrErr.takeError(); Fbin = *FBinOrErr; } else { - Fbin = addDeviceImageToModule( - ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), - Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, - Img.Tgt); + + // If '--offload-compress' option is specified and zstd is not + // available, throw an error. + if (OffloadCompressDevImgs && !llvm::compression::zstd::isAvailable()) { + return createStringError( + inconvertibleErrorCode(), + "'--offload-compress' option is specified but zstd " + "is not available. The device image will not be " + "compressed."); + } + + // Don't compress if the user explicitly specifies the binary image + // format or if the image is smaller than OffloadCompressThreshold + // bytes. + if (Kind != OffloadKind::SYCL || !OffloadCompressDevImgs || + Img.Fmt != BinaryImageFormat::none || + !llvm::compression::zstd::isAvailable() || + static_cast(Bin->getBufferSize()) < OffloadCompressThreshold) { + Fbin = addDeviceImageToModule( + ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), + Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, + Img.Tgt); + } else { + + // Compress the image using zstd. + SmallVector CompressedBuffer; +#if LLVM_ENABLE_EXCEPTIONS + try { +#endif + llvm::compression::zstd::compress( + ArrayRef( + (const unsigned char *)(Bin->getBufferStart()), + Bin->getBufferSize()), + CompressedBuffer, OffloadCompressLevel); +#if LLVM_ENABLE_EXCEPTIONS + } catch (const std::exception &ex) { + return createStringError(inconvertibleErrorCode(), + std::string("Failed to compress the device image: \n") + + std::string(ex.what())); + } +#endif + if (Verbose) + errs() << "[Compression] Original image size: " + << Bin->getBufferSize() << "\n" + << "[Compression] Compressed image size: " + << CompressedBuffer.size() << "\n" + << "[Compression] Compression level used: " + << OffloadCompressLevel << "\n"; + + // Add the compressed image to the module. + Fbin = addDeviceImageToModule( + ArrayRef((const char *)CompressedBuffer.data(), + CompressedBuffer.size()), + Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, + Img.Tgt); + + // Change image format to compressed_none. + Ffmt = ConstantInt::get(Type::getInt8Ty(C), + BinaryImageFormat::compressed_none); + } } if (Kind == OffloadKind::SYCL) { diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 6a9e128825184..3f184edc12def 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -195,6 +195,19 @@ and not recommended to use in production environment. which may or may not perform additional inlining. Default value is 225. +**`--offload-compress`** + + Enables device image compression for SYCL offloading. Device images + are compressed using `zstd` compression algorithm and only if their size + exceeds 512 bytes. + Default value is false. + +**`--offload-compression-level=`** + + `zstd` compression level used to compress device images when `--offload- + compress` is enabled. + The default value is 10. + ## Target toolchain options **`-Xsycl-target-backend= "options"`** diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 5e7cdead48f25..b5db3f421e19d 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -69,6 +69,13 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) endif() + if (NOT LLVM_ENABLE_ZSTD) + target_compile_definitions(${LIB_OBJ_NAME} PRIVATE SYCL_RT_ZSTD_NOT_AVAIABLE) + else() + target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY}) + target_include_directories(${LIB_OBJ_NAME} PRIVATE ${zstd_INCLUDE_DIR}) + endif() + target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) # ur_win_proxy_loader diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp index 9d2777e863ee1..827ee61ef8110 100644 --- a/sycl/source/detail/compiler.hpp +++ b/sycl/source/detail/compiler.hpp @@ -115,7 +115,8 @@ enum sycl_device_binary_type : uint8_t { SYCL_DEVICE_BINARY_TYPE_NONE = 0, // undetermined SYCL_DEVICE_BINARY_TYPE_NATIVE = 1, // specific to a device SYCL_DEVICE_BINARY_TYPE_SPIRV = 2, - SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3 + SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3, + SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE = 4 }; // Device binary descriptor version supported by this library. diff --git a/sycl/source/detail/compression.hpp b/sycl/source/detail/compression.hpp new file mode 100644 index 0000000000000..1878010cd5bad --- /dev/null +++ b/sycl/source/detail/compression.hpp @@ -0,0 +1,153 @@ +//==---------- compression.hpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE + +#include + +#include +#include +#include + +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// Singleton class to handle ZSTD compression and decompression. +class ZSTDCompressor { +private: + ZSTDCompressor() {} + + ZSTDCompressor(const ZSTDCompressor &) = delete; + ZSTDCompressor &operator=(const ZSTDCompressor &) = delete; + ~ZSTDCompressor() {} + + // Get the singleton instance of the ZSTDCompressor class. + static ZSTDCompressor &GetSingletonInstance() { + static ZSTDCompressor instance; + return instance; + } + + // Public APIs +public: + // Blob (de)compression do not assume format/structure of the input buffer. + // This function can be used in future for compression in on-disk cache. + static std::unique_ptr CompressBlob(const char *src, size_t srcSize, + size_t &dstSize, int level) { + auto &instance = GetSingletonInstance(); + + // Lazy initialize compression context. + if (!instance.m_ZSTD_compression_ctx) { + + // Call ZSTD_createCCtx() and ZSTD_freeCCtx() to create and free the + // context. + instance.m_ZSTD_compression_ctx = + std::unique_ptr(ZSTD_createCCtx(), + ZSTD_freeCCtx); + if (!instance.m_ZSTD_compression_ctx) { + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Failed to create ZSTD compression context"); + } + } + + // Get maximum size of the compressed buffer and allocate it. + auto dstBufferSize = ZSTD_compressBound(srcSize); + auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); + + if (!dstBuffer) + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Failed to allocate memory for compressed data"); + + // Compress the input buffer. + dstSize = + ZSTD_compressCCtx(instance.m_ZSTD_compression_ctx.get(), + static_cast(dstBuffer.get()), dstBufferSize, + static_cast(src), srcSize, level); + + // Store the error code if compression failed. + if (ZSTD_isError(dstSize)) + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + ZSTD_getErrorName(dstSize)); + + // Pass ownership of the buffer to the caller. + return dstBuffer; + } + + static size_t GetDecompressedSize(const char *src, size_t srcSize) { + size_t dstBufferSize = ZSTD_getFrameContentSize(src, srcSize); + + if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || + dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Error determining size of uncompressed data."); + } + return dstBufferSize; + } + + static std::unique_ptr DecompressBlob(const char *src, size_t srcSize, + size_t &dstSize) { + auto &instance = GetSingletonInstance(); + + // Lazy initialize decompression context. + if (!instance.m_ZSTD_decompression_ctx) { + + // Call ZSTD_createDCtx() and ZSTD_freeDCtx() to create and free the + // context. + instance.m_ZSTD_decompression_ctx = + std::unique_ptr(ZSTD_createDCtx(), + ZSTD_freeDCtx); + if (!instance.m_ZSTD_decompression_ctx) { + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Failed to create ZSTD decompression context"); + } + } + + // Size of decompressed image can be larger than what we can allocate + // on heap. In that case, we need to use streaming decompression. + auto dstBufferSize = GetDecompressedSize(src, srcSize); + + // Allocate buffer for decompressed data. + auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); + + if (!dstBuffer) + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Failed to allocate memory for decompressed data"); + + dstSize = + ZSTD_decompressDCtx(instance.m_ZSTD_decompression_ctx.get(), + static_cast(dstBuffer.get()), dstBufferSize, + static_cast(src), srcSize); + + // In case of decompression error, return the error message and set dstSize + // to 0. + if (ZSTD_isError(dstSize)) { + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + ZSTD_getErrorName(dstSize)); + } + + // Pass ownership of the buffer to the caller. + return dstBuffer; + } + + // Data fields +private: + // ZSTD contexts. Reusing ZSTD context speeds up subsequent (de)compression. + std::unique_ptr m_ZSTD_compression_ctx{ + nullptr, nullptr}; + std::unique_ptr m_ZSTD_decompression_ctx{ + nullptr, nullptr}; +}; +} // namespace detail +} // namespace _V1 +} // namespace sycl + +#endif // SYCL_RT_ZSTD_NOT_AVAIABLE diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index beb9bae0dd0f1..2be48d4a38fce 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -9,6 +9,9 @@ #include #include +// For device image compression. +#include + #include #include #include @@ -167,6 +170,8 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { // it when invoking the offload wrapper job Format = static_cast(Bin->Format); + // For compressed images, we delay determining the format until the image is + // decompressed. if (Format == SYCL_DEVICE_BINARY_TYPE_NONE) // try to determine the format; may remain "NONE" Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); @@ -226,6 +231,48 @@ DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { Bin = nullptr; } +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE +CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( + sycl_device_binary CompressedBin) + : RTDeviceBinaryImage() { + + // 'CompressedBin' is part of the executable image loaded into memory + // which can't be modified easily. So, we need to make a copy of it. + Bin = new sycl_device_binary_struct(*CompressedBin); + + // Get the decompressed size of the binary image. + m_ImageSize = ZSTDCompressor::GetDecompressedSize( + reinterpret_cast(Bin->BinaryStart), + static_cast(Bin->BinaryEnd - Bin->BinaryStart)); + + init(Bin); +} + +void CompressedRTDeviceBinaryImage::Decompress() { + + size_t CompressedDataSize = + static_cast(Bin->BinaryEnd - Bin->BinaryStart); + + size_t DecompressedSize = 0; + m_DecompressedData = ZSTDCompressor::DecompressBlob( + reinterpret_cast(Bin->BinaryStart), CompressedDataSize, + DecompressedSize); + + Bin->BinaryStart = + reinterpret_cast(m_DecompressedData.get()); + Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize; + + Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); + Format = static_cast(Bin->Format); +} + +CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { + // De-allocate device binary struct. + delete Bin; + Bin = nullptr; +} +#endif // SYCL_RT_ZSTD_NOT_AVAIABLE + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 49047a04ae77c..203427b89ca45 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #pragma once +#include "ur_utils.hpp" #include #include #include #include #include -#include "ur_utils.hpp" #include @@ -158,7 +158,10 @@ class RTDeviceBinaryImage { virtual void print() const; virtual void dump(std::ostream &Out) const; - size_t getSize() const { + // getSize will be overridden in the case of compressed binary images. + // In that case, we return the size of uncompressed data, instead of + // BinaryEnd - BinaryStart. + virtual size_t getSize() const { assert(Bin && "binary image data not set"); return static_cast(Bin->BinaryEnd - Bin->BinaryStart); } @@ -276,6 +279,35 @@ class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { std::unique_ptr Data; }; +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE +// Compressed device binary image. Decompression happens when the image is +// actually used to build a program. +// Also, frees the decompressed data in destructor. +class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { +public: + CompressedRTDeviceBinaryImage(sycl_device_binary Bin); + ~CompressedRTDeviceBinaryImage() override; + + void Decompress(); + + // We return the size of decompressed data, not the size of compressed data. + size_t getSize() const override { + assert(Bin && "binary image data not set"); + return m_ImageSize; + } + + bool IsCompressed() const { return m_DecompressedData.get() == nullptr; } + void print() const override { + RTDeviceBinaryImage::print(); + std::cerr << " COMPRESSED\n"; + } + +private: + std::unique_ptr m_DecompressedData; + size_t m_ImageSize; +}; +#endif // SYCL_RT_ZSTD_NOT_AVAIABLE + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index d6f063e5fada6..3a675577e410a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -173,6 +173,8 @@ static bool isDeviceBinaryTypeSupported(const context &C, return "SPIR-V"; case SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE: return "LLVM IR"; + case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE: + return "compressed none"; } assert(false && "Unknown device image format"); return "unknown"; @@ -731,6 +733,14 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, } } +static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE + if (auto CompImg = dynamic_cast(Img)) + if (CompImg->IsCompressed()) + CompImg->Decompress(); +#endif +} + // When caching is enabled, the returned UrProgram will already have // its ref count incremented. ur_program_handle_t ProgramManager::getBuiltURProgram( @@ -783,6 +793,10 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( collectDeviceImageDepsForImportedSymbols(Img, Device); DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); + // Decompress all DeviceImagesToLink + for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) + CheckAndDecompressImage(BinImg); + std::vector AllImages; AllImages.reserve(ImageDeps.size() + 1); AllImages.push_back(&Img); @@ -1386,6 +1400,10 @@ ProgramManager::getDeviceImage(const std::string &KernelName, Device); } } + + // Decompress the image if it is compressed. + CheckAndDecompressImage(Img); + if (Img) { CheckJITCompilationForImage(Img, JITCompilationIsRequired); @@ -1527,6 +1545,13 @@ getDeviceLibPrograms(const ContextImplPtr Context, return Programs; } +// Check if device image is compressed. +static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { + + auto currFormat = static_cast(Bin->Format); + return currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE; +} + ProgramManager::ProgramPtr ProgramManager::build( ProgramPtr Program, const ContextImplPtr Context, const std::string &CompileOptions, const std::string &LinkOptions, @@ -1656,7 +1681,19 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { if (EntriesB == EntriesE) continue; - auto Img = std::make_unique(RawImg); + std::unique_ptr Img; + if (isDeviceImageCompressed(RawImg)) +#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE + Img = std::make_unique(RawImg); +#else + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Recieved a compressed device image, but " + "SYCL RT was built without ZSTD support." + "Aborting. "); +#endif + else + Img = std::make_unique(RawImg); + static uint32_t SequenceID = 0; // Fill the kernel argument mask map @@ -1693,6 +1730,10 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { [&](auto &CurrentImg) { return CurrentImg.first->getFormat() == Img->getFormat(); }); + + // Check if image is compressed, and decompress it before dumping. + CheckAndDecompressImage(Img.get()); + dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0); } @@ -2170,6 +2211,9 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( auto &[KernelImagesState, KernelImages] = *StateImagesPair; + // Check if device image is compressed and decompress it if needed + CheckAndDecompressImage(BinImage); + if (KernelImages.empty()) { KernelImagesState = ImgState; KernelImages.push_back(BinImage); diff --git a/sycl/test-e2e/Compression/Inputs/single_kernel.cpp b/sycl/test-e2e/Compression/Inputs/single_kernel.cpp new file mode 100644 index 0000000000000..eac8a63438f85 --- /dev/null +++ b/sycl/test-e2e/Compression/Inputs/single_kernel.cpp @@ -0,0 +1,17 @@ +#include + +int main() { + + sycl::queue q0; + int val = -1; + { + sycl::buffer buffer1(&val, sycl::range(1)); + + q0.submit([&](sycl::handler &cgh) { + auto acc = sycl::accessor(buffer1, cgh); + cgh.single_task([=] { acc[0] = acc[0] + 1; }); + }).wait(); + } + + return !(val == 0); +} diff --git a/sycl/test-e2e/Compression/compression.cpp b/sycl/test-e2e/Compression/compression.cpp new file mode 100644 index 0000000000000..1d8da7abc9d49 --- /dev/null +++ b/sycl/test-e2e/Compression/compression.cpp @@ -0,0 +1,7 @@ +// End-to-End test for testing device image compression. +// REQUIRES: zstd +// RUN: %{build} -O0 -g %S/Inputs/single_kernel.cpp -o %t_not_compress.out +// RUN: %{build} -O0 -g --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out +// RUN: %{run} %t_not_compress.out +// RUN: %{run} %t_compress.out +// RUN: not diff %t_not_compress.out %t_compress.out diff --git a/sycl/test-e2e/Compression/compression_aot.cpp b/sycl/test-e2e/Compression/compression_aot.cpp new file mode 100644 index 0000000000000..5b44b6a41e9ce --- /dev/null +++ b/sycl/test-e2e/Compression/compression_aot.cpp @@ -0,0 +1,5 @@ +// End-to-End test for testing device image compression in AOT. +// REQUIRES: zstd, opencl-aot, cpu + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -O0 --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out +// RUN: %{run} %t_compress.out diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp new file mode 100644 index 0000000000000..72eb3f0904790 --- /dev/null +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -0,0 +1,56 @@ +// End-to-End test for testing device image compression when we have two +// translation units, one compressed and one not compressed. +// REQUIRES: zstd, linux + +// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared -fPIC -o %T/kernel1.so +// RUN: %{build} -DENABLE_KERNEL2 -shared -fPIC -o %T/kernel2.so + +// RUN: %{build} %T/kernel1.so %T/kernel2.so -o %t_compress.out +// RUN: %{run} %t_compress.out +#if defined(ENABLE_KERNEL1) || defined(ENABLE_KERNEL2) +#include +#include +using namespace sycl; +#endif + +#ifdef ENABLE_KERNEL1 +void kernel1() { + int data = -1; + { + buffer b(&data, range(1)); + queue q; + q.submit([&](sycl::handler &cgh) { + auto acc = accessor(b, cgh); + cgh.single_task([=] { acc[0] = abs(acc[0]); }); + }); + } + assert(data == 1); +} +#endif + +#ifdef ENABLE_KERNEL2 +void kernel2() { + int data = -2; + { + buffer b(&data, range(1)); + queue q; + q.submit([&](sycl::handler &cgh) { + auto acc = accessor(b, cgh); + cgh.single_task([=] { acc[0] = abs(acc[0]); }); + }); + } + assert(data == 2); +} +#endif + +#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2) +void kernel1(); +void kernel2(); + +int main() { + kernel1(); + kernel2(); + + return 0; +} +#endif diff --git a/sycl/test-e2e/Compression/compression_separate_compile.cpp b/sycl/test-e2e/Compression/compression_separate_compile.cpp new file mode 100644 index 0000000000000..9e47bbebdc875 --- /dev/null +++ b/sycl/test-e2e/Compression/compression_separate_compile.cpp @@ -0,0 +1,70 @@ +// End-to-End test for testing device image compression when we +// seperatly compile and link device images. + +// REQUIRES: zstd, opencl-aot, cpu, linux + +////////////////////// Compile device images +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL1' -DENABLE_KERNEL1 -c %s -o %t_kernel1_aot.o +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL2' -DENABLE_KERNEL2 -c %s -o %t_kernel2_aot.o + +////////////////////// Link device images +// RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v + +////////////////////// Compile the host program +// RUN: %clangxx -fsycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o + +////////////////////// Link the host program and compressed device images +// RUN: %clangxx -fsycl %t_main.o %t_kernel1_aot.o %t_kernel2_aot.o %t_compressed_image.o -o %t_compress.out + +// RUN: %{run} %t_compress.out + +#include + +using namespace sycl; + +// Kernel 1 +#ifdef ENABLE_KERNEL1 +class test_kernel1; +void run_kernel1(int *a, queue q) { + q.single_task([=]() { *a *= 3; }).wait(); +} +#endif + +// Kernel 2 +#ifdef ENABLE_KERNEL2 +class test_kernel2; +void run_kernel2(int *a, queue q) { + q.single_task([=]() { *a += 42; }).wait(); +} +#endif + +// Main application. +#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2) +#include +#include + +#include + +class kernel_init; +void run_kernel1(int *a, queue q); +void run_kernel2(int *a, queue q); +int main() { + int retCode = 0; + queue q; + + if (!q.get_device().get_info()) + return 0; + + int *p = malloc_shared(1, q); + *p = 42; + + run_kernel1(p, q); + run_kernel2(p, q); + q.wait(); + + retCode = *p != (42 * 3 + 42); + + free(p, q); + return retCode; +} +#endif diff --git a/sycl/test-e2e/Compression/no_zstd_warning.cpp b/sycl/test-e2e/Compression/no_zstd_warning.cpp new file mode 100644 index 0000000000000..8a4460f9b8643 --- /dev/null +++ b/sycl/test-e2e/Compression/no_zstd_warning.cpp @@ -0,0 +1,4 @@ +// using --offload-compress without zstd should throw an error. +// REQUIRES: !zstd +// RUN: not %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s +// CHECK: '--offload-compress' option is specified but zstd is not available. The device image will not be compressed. diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index f7d56aaee8f8e..4675206d90dd7 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -308,6 +308,18 @@ def open_check_file(file_name): if sp[0] == 0: config.available_features.add("preview-breaking-changes-supported") +# Check if clang is built with ZSTD and compression support. +fPIC_opt = "-fPIC" if platform.system() != "Windows" else "" +ps = subprocess.Popen( + [config.dpcpp_compiler, "-fsycl", "--offload-compress", "-shared", fPIC_opt, "-x", "c++", "-", "-o", "-"], + stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, +) +op = ps.communicate(input=b"") +if ps.wait() == 0: + config.available_features.add("zstd") + # Check for CUDA SDK check_cuda_file = "cuda_include.cpp" with open_check_file(check_cuda_file) as fp: diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index ec740f913ed4d..0d8b2ac283c17 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -53,6 +53,12 @@ add_subdirectory(accessor) add_subdirectory(handler) add_subdirectory(builtins) add_subdirectory(buffer/l0_specific) + +# Enable compression unit-tests only if zstd is present. +if (LLVM_ENABLE_ZSTD) + add_subdirectory(compression) +endif() + # TODO Enable xpti tests for Windows if (NOT WIN32) add_subdirectory(xpti_trace) diff --git a/sycl/unittests/compression/CMakeLists.txt b/sycl/unittests/compression/CMakeLists.txt new file mode 100644 index 0000000000000..742e2d228072b --- /dev/null +++ b/sycl/unittests/compression/CMakeLists.txt @@ -0,0 +1,3 @@ +add_sycl_unittest(CompressionTests OBJECT + CompressionTests.cpp +) diff --git a/sycl/unittests/compression/CompressionTests.cpp b/sycl/unittests/compression/CompressionTests.cpp new file mode 100644 index 0000000000000..e9b50fa1cc2e0 --- /dev/null +++ b/sycl/unittests/compression/CompressionTests.cpp @@ -0,0 +1,80 @@ +//==------- CompressionTests.cpp --- compression unit test ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include + +#include + +using namespace sycl::detail; + +TEST(CompressionTest, SimpleCompression) { + + // Data to compress. + std::string data = "Hello World! I'm about to get compressed :P"; + size_t compressedDataSize = 0; + + auto compressedData = ZSTDCompressor::CompressBlob( + data.c_str(), data.size(), compressedDataSize, /*Compression level*/ 3); + + // Check if compression was successful. + EXPECT_NE(compressedData, nullptr); + EXPECT_GT(compressedDataSize, (size_t)0); + + // Decompress the data. + size_t decompressedSize = 0; + auto decompressedData = ZSTDCompressor::DecompressBlob( + compressedData.get(), compressedDataSize, decompressedSize); + + ASSERT_NE(decompressedData, nullptr); + ASSERT_GT(decompressedSize, (size_t)0); + + // Check if decompressed data is same as original data. + std::string decompressedStr((char *)decompressedData.get(), decompressedSize); + ASSERT_EQ(data, decompressedStr); +} + +// Test getting error code and error string. +// Intentionally give incorrect input to decompress +// to trigger an error. +TEST(CompressionTest, NegativeErrorTest) { + std::string input = "Hello, World!"; + size_t decompressedSize = 0; + bool threwException = false; + try { + auto compressedData = ZSTDCompressor::DecompressBlob( + input.c_str(), input.size(), decompressedSize); + } catch (...) { + threwException = true; + } + + ASSERT_TRUE(threwException); +} + +// Test passing empty input to (de)compress. +// There should be no error and the output should be empty. +TEST(CompressionTest, EmptyInputTest) { + std::string input = ""; + size_t compressedSize = 0; + auto compressedData = ZSTDCompressor::CompressBlob( + input.c_str(), input.size(), compressedSize, 1); + + ASSERT_NE(compressedData, nullptr); + ASSERT_GT(compressedSize, (size_t)0); + + size_t decompressedSize = 0; + auto decompressedData = ZSTDCompressor::DecompressBlob( + compressedData.get(), compressedSize, decompressedSize); + + ASSERT_NE(decompressedData, nullptr); + ASSERT_EQ(decompressedSize, (size_t)0); + + std::string decompressedStr((char *)decompressedData.get(), decompressedSize); + ASSERT_EQ(input, decompressedStr); +}