Skip to content

Commit 981d94f

Browse files
committed
Enable Intel GPU support in torchcodec on Linux (xpu device)
This commit enables support for Intel GPUs in torchcodec. It adds: * ffmpeg-vaapi for decoding * VAAPI based color space conversion (decoding output to RGBA) * RGBA surface import as torch tensor (on torch xpu device) * RGBA to RGB24 tensor slicing To build torchcodec with Intel GPU support: * Install pytorch with XPU backend support. For example, with: ``` pip3 install torch --index-url https://download.pytorch.org/whl/xpu ``` * Install oneAPI development environment following https://github.com/pytorch/pytorch?tab=readme-ov-file#intel-gpu-support * Build and install FFmpeg with `--enable-vaapi` * Install torcheval (for tests): `pip3 install torcheval` * Build torchcodec with: `ENABLE_XPU=1 python3 setup.py devel` Notes: * RGB24 is not supported color format on current Intel GPUs (as it is considered to be suboptimal due to odd alignments) * Intel media and compute APIs can't seamlessly work with the memory from each other. For example, Intel computes's Unified Shared Memory pointers are not recognized by media APIs. Thus, lower level sharing via dma fds is needed. This alos makes this part of the solution OS dependent. * Color space conversion algoriths might be quite different as it happens for Intel. This requires to check PSNR values instead of per-pixel atol/rtol differences. * Installing oneAPI environment is neded due to pytorch/pytorch#149075 This commit was primary verfied on Intel Battlemage G21 (0xe20b) and Intel Data Center GPU Flex (0x56c0). Signed-off-by: Dmitry Rogozhkin <[email protected]>
1 parent a864bf9 commit 981d94f

File tree

11 files changed

+634
-51
lines changed

11 files changed

+634
-51
lines changed

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def _build_all_extensions_with_cmake(self):
112112
torch_dir = Path(torch.utils.cmake_prefix_path) / "Torch"
113113
cmake_build_type = os.environ.get("CMAKE_BUILD_TYPE", "Release")
114114
enable_cuda = os.environ.get("ENABLE_CUDA", "")
115+
enable_xpu = os.environ.get("ENABLE_XPU", "")
115116
python_version = sys.version_info
116117
cmake_args = [
117118
f"-DCMAKE_INSTALL_PREFIX={self._install_prefix}",
@@ -120,6 +121,7 @@ def _build_all_extensions_with_cmake(self):
120121
f"-DCMAKE_BUILD_TYPE={cmake_build_type}",
121122
f"-DPYTHON_VERSION={python_version.major}.{python_version.minor}",
122123
f"-DENABLE_CUDA={enable_cuda}",
124+
f"-DENABLE_XPU={enable_xpu}",
123125
]
124126

125127
Path(self.build_temp).mkdir(parents=True, exist_ok=True)

src/torchcodec/decoders/_core/CMakeLists.txt

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,15 @@ find_package(Torch REQUIRED)
99
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
1010

1111
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}")
12+
if(ENABLE_CUDA)
13+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_CUDA")
14+
endif()
15+
if(ENABLE_XPU)
16+
find_package(PkgConfig REQUIRED)
17+
pkg_check_modules(L0 REQUIRED IMPORTED_TARGET level-zero)
18+
pkg_check_modules(LIBVA REQUIRED IMPORTED_TARGET libva)
19+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_XPU")
20+
endif()
1221

1322
function(make_torchcodec_sublibrary
1423
library_name
@@ -61,12 +70,15 @@ function(make_torchcodec_libraries
6170
AVIOContextHolder.cpp
6271
FFMPEGCommon.cpp
6372
VideoDecoder.cpp
73+
CPUOnlyDevice.cpp
6474
)
6575

6676
if(ENABLE_CUDA)
6777
list(APPEND decoder_sources CudaDevice.cpp)
68-
else()
69-
list(APPEND decoder_sources CPUOnlyDevice.cpp)
78+
endif()
79+
80+
if(ENABLE_XPU)
81+
list(APPEND decoder_sources XpuDevice.cpp)
7082
endif()
7183

7284
set(decoder_library_dependencies
@@ -81,6 +93,11 @@ function(make_torchcodec_libraries
8193
)
8294
endif()
8395

96+
if(ENABLE_XPU)
97+
list(APPEND decoder_library_dependencies
98+
PkgConfig::L0 PkgConfig::LIBVA)
99+
endif()
100+
84101
make_torchcodec_sublibrary(
85102
"${decoder_library_name}"
86103
SHARED

src/torchcodec/decoders/_core/CPUOnlyDevice.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ namespace facebook::torchcodec {
1414
TORCH_CHECK(false, "Unsupported device: " + device.str());
1515
}
1616

17+
#ifndef ENABLE_CUDA
1718
void convertAVFrameToFrameOutputOnCuda(
1819
const torch::Device& device,
1920
[[maybe_unused]] const VideoDecoder::VideoStreamOptions& videoStreamOptions,
@@ -40,5 +41,35 @@ std::optional<const AVCodec*> findCudaCodec(
4041
[[maybe_unused]] const AVCodecID& codecId) {
4142
throwUnsupportedDeviceError(device);
4243
}
44+
#endif // ENABLE_CUDA
45+
46+
#ifndef ENABLE_XPU
47+
void convertAVFrameToFrameOutputOnXpu(
48+
const torch::Device& device,
49+
[[maybe_unused]] const VideoDecoder::VideoStreamOptions& videoStreamOptions,
50+
[[maybe_unused]] UniqueAVFrame& avFrame,
51+
[[maybe_unused]] VideoDecoder::FrameOutput& frameOutput,
52+
[[maybe_unused]] std::optional<torch::Tensor> preAllocatedOutputTensor) {
53+
throwUnsupportedDeviceError(device);
54+
}
55+
56+
void initializeContextOnXpu(
57+
const torch::Device& device,
58+
[[maybe_unused]] AVCodecContext* codecContext) {
59+
throwUnsupportedDeviceError(device);
60+
}
61+
62+
void releaseContextOnXpu(
63+
const torch::Device& device,
64+
[[maybe_unused]] AVCodecContext* codecContext) {
65+
throwUnsupportedDeviceError(device);
66+
}
67+
68+
std::optional<const AVCodec*> findXpuCodec(
69+
const torch::Device& device,
70+
[[maybe_unused]] const AVCodecID& codecId) {
71+
throwUnsupportedDeviceError(device);
72+
}
73+
#endif // ENABLE_XPU
4374

4475
} // namespace facebook::torchcodec

src/torchcodec/decoders/_core/DeviceInterface.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,38 @@ void initializeContextOnCuda(
2929
const torch::Device& device,
3030
AVCodecContext* codecContext);
3131

32+
void initializeContextOnXpu(
33+
const torch::Device& device,
34+
AVCodecContext* codecContext);
35+
3236
void convertAVFrameToFrameOutputOnCuda(
3337
const torch::Device& device,
3438
const VideoDecoder::VideoStreamOptions& videoStreamOptions,
3539
UniqueAVFrame& avFrame,
3640
VideoDecoder::FrameOutput& frameOutput,
3741
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
3842

43+
void convertAVFrameToFrameOutputOnXpu(
44+
const torch::Device& device,
45+
const VideoDecoder::VideoStreamOptions& videoStreamOptions,
46+
UniqueAVFrame& avFrame,
47+
VideoDecoder::FrameOutput& frameOutput,
48+
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
49+
3950
void releaseContextOnCuda(
4051
const torch::Device& device,
4152
AVCodecContext* codecContext);
4253

54+
void releaseContextOnXpu(
55+
const torch::Device& device,
56+
AVCodecContext* codecContext);
57+
4358
std::optional<const AVCodec*> findCudaCodec(
4459
const torch::Device& device,
4560
const AVCodecID& codecId);
4661

62+
std::optional<const AVCodec*> findXpuCodec(
63+
const torch::Device& device,
64+
const AVCodecID& codecId);
65+
4766
} // namespace facebook::torchcodec

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ VideoDecoder::~VideoDecoder() {
101101
if (device.type() == torch::kCPU) {
102102
} else if (device.type() == torch::kCUDA) {
103103
releaseContextOnCuda(device, streamInfo.codecContext.get());
104+
} else if (device.type() == torch::kXPU) {
105+
releaseContextOnXpu(device, streamInfo.codecContext.get());
104106
} else {
105107
TORCH_CHECK(false, "Invalid device type: " + device.str());
106108
}
@@ -429,10 +431,16 @@ void VideoDecoder::addStream(
429431

430432
// TODO_CODE_QUALITY it's pretty meh to have a video-specific logic within
431433
// addStream() which is supposed to be generic
432-
if (mediaType == AVMEDIA_TYPE_VIDEO && device.type() == torch::kCUDA) {
433-
avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream(
434-
findCudaCodec(device, streamInfo.stream->codecpar->codec_id)
435-
.value_or(avCodec));
434+
if (mediaType == AVMEDIA_TYPE_VIDEO) {
435+
if (device.type() == torch::kCUDA) {
436+
avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream(
437+
findCudaCodec(device, streamInfo.stream->codecpar->codec_id)
438+
.value_or(avCodec));
439+
} else if (device.type() == torch::kXPU) {
440+
avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream(
441+
findXpuCodec(device, streamInfo.stream->codecpar->codec_id)
442+
.value_or(avCodec));
443+
}
436444
}
437445

438446
AVCodecContext* codecContext = avcodec_alloc_context3(avCodec);
@@ -447,8 +455,12 @@ void VideoDecoder::addStream(
447455
streamInfo.codecContext->pkt_timebase = streamInfo.stream->time_base;
448456

449457
// TODO_CODE_QUALITY same as above.
450-
if (mediaType == AVMEDIA_TYPE_VIDEO && device.type() == torch::kCUDA) {
451-
initializeContextOnCuda(device, codecContext);
458+
if (mediaType == AVMEDIA_TYPE_VIDEO) {
459+
if (device.type() == torch::kCUDA) {
460+
initializeContextOnCuda(device, codecContext);
461+
} else if (device.type() == torch::kXPU) {
462+
initializeContextOnXpu(device, codecContext);
463+
}
452464
}
453465

454466
retVal = avcodec_open2(streamInfo.codecContext.get(), avCodec, nullptr);
@@ -476,7 +488,8 @@ void VideoDecoder::addVideoStream(
476488
const VideoStreamOptions& videoStreamOptions) {
477489
TORCH_CHECK(
478490
videoStreamOptions.device.type() == torch::kCPU ||
479-
videoStreamOptions.device.type() == torch::kCUDA,
491+
videoStreamOptions.device.type() == torch::kCUDA ||
492+
videoStreamOptions.device.type() == torch::kXPU,
480493
"Invalid device type: " + videoStreamOptions.device.str());
481494

482495
addStream(
@@ -1226,6 +1239,13 @@ VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput(
12261239
avFrame,
12271240
frameOutput,
12281241
preAllocatedOutputTensor);
1242+
} else if (streamInfo.videoStreamOptions.device.type() == torch::kXPU) {
1243+
convertAVFrameToFrameOutputOnXpu(
1244+
streamInfo.videoStreamOptions.device,
1245+
streamInfo.videoStreamOptions,
1246+
avFrame,
1247+
frameOutput,
1248+
preAllocatedOutputTensor);
12291249
} else {
12301250
TORCH_CHECK(
12311251
false,

src/torchcodec/decoders/_core/VideoDecoderOps.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,13 @@ void _add_video_stream(
210210
} else if (device.value().rfind("cuda", 0) == 0) { // starts with "cuda"
211211
std::string deviceStr(device.value());
212212
videoStreamOptions.device = torch::Device(deviceStr);
213+
} else if (device.value().rfind("xpu", 0) == 0) { // starts with "xpu"
214+
std::string deviceStr(device.value());
215+
videoStreamOptions.device = torch::Device(deviceStr);
213216
} else {
214217
throw std::runtime_error(
215218
"Invalid device=" + std::string(device.value()) +
216-
". device must be either cpu or cuda.");
219+
". device must be either cpu, cuda or xpu.");
217220
}
218221
}
219222

0 commit comments

Comments
 (0)