dvrogozh
diff --git a/‎src/torchcodec/decoders/_core/CMakeLists.txt
+1-2 b/‎src/torchcodec/decoders/_core/CMakeLists.txt
+1-2
diff --git a/‎src/torchcodec/decoders/_core/CPUOnlyDevice.cpp
-44 b/‎src/torchcodec/decoders/_core/CPUOnlyDevice.cpp
-44
diff --git a/‎src/torchcodec/decoders/_core/CudaDevice.cpp
+19-29 b/‎src/torchcodec/decoders/_core/CudaDevice.cpp
+19-29
diff --git a/‎src/torchcodec/decoders/_core/CudaDevice.h
+33 b/‎src/torchcodec/decoders/_core/CudaDevice.h
+33
diff --git a/‎src/torchcodec/decoders/_core/DeviceInterface.cpp
+56 b/‎src/torchcodec/decoders/_core/DeviceInterface.cpp
+56
diff --git a/‎src/torchcodec/decoders/_core/DeviceInterface.h
+38-20 b/‎src/torchcodec/decoders/_core/DeviceInterface.h
+38-20
@@ -61,12 +61,11 @@ function(make_torchcodec_libraries
         AVIOContextHolder.cpp
         FFMPEGCommon.cpp
         VideoDecoder.cpp
+        DeviceInterface.cpp
     )
 
     if(ENABLE_CUDA)
         list(APPEND decoder_sources CudaDevice.cpp)
-    else()
-        list(APPEND decoder_sources CPUOnlyDevice.cpp)
     endif()
 
     set(decoder_library_dependencies
 
@@ -4,7 +4,7 @@
 #include <torch/types.h>
 #include <mutex>
 
-#include "src/torchcodec/decoders/_core/DeviceInterface.h"
+#include "src/torchcodec/decoders/_core/CudaDevice.h"
 #include "src/torchcodec/decoders/_core/FFMPEGCommon.h"
 #include "src/torchcodec/decoders/_core/VideoDecoder.h"
 
@@ -16,6 +16,10 @@ extern "C" {
 namespace facebook::torchcodec {
 namespace {
 
+bool g_cuda = registerDeviceInterface("cuda", [](const std::string& device) {
+  return new CudaDevice(device);
+});
+
 // We reuse cuda contexts across VideoDeoder instances. This is because
 // creating a cuda context is expensive. The cache mechanism is as follows:
 // 1. There is a cache of size MAX_CONTEXTS_PER_GPU_IN_CACHE cuda contexts for
@@ -156,39 +160,29 @@ AVBufferRef* getCudaContext(const torch::Device& device) {
       device, nonNegativeDeviceIndex, type);
 #endif
 }
+} // namespace
 
-void throwErrorIfNonCudaDevice(const torch::Device& device) {
-  TORCH_CHECK(
-      device.type() != torch::kCPU,
-      "Device functions should only be called if the device is not CPU.")
-  if (device.type() != torch::kCUDA) {
-    throw std::runtime_error("Unsupported device: " + device.str());
+CudaDevice::CudaDevice(const std::string& device) : DeviceInterface(device) {
+  if (device_.type() != torch::kCUDA) {
+    throw std::runtime_error("Unsupported device: " + device_.str());
   }
 }
-} // namespace
 
-void releaseContextOnCuda(
-    const torch::Device& device,
-    AVCodecContext* codecContext) {
-  throwErrorIfNonCudaDevice(device);
-  addToCacheIfCacheHasCapacity(device, codecContext);
+void CudaDevice::releaseContext(AVCodecContext* codecContext) {
+  addToCacheIfCacheHasCapacity(device_, codecContext);
 }
 
-void initializeContextOnCuda(
-    const torch::Device& device,
-    AVCodecContext* codecContext) {
-  throwErrorIfNonCudaDevice(device);
+void CudaDevice::initializeContext(AVCodecContext* codecContext) {
   // It is important for pytorch itself to create the cuda context. If ffmpeg
   // creates the context it may not be compatible with pytorch.
   // This is a dummy tensor to initialize the cuda context.
   torch::Tensor dummyTensorForCudaInitialization = torch::empty(
-      {1}, torch::TensorOptions().dtype(torch::kUInt8).device(device));
-  codecContext->hw_device_ctx = getCudaContext(device);
+      {1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
+  codecContext->hw_device_ctx = getCudaContext(device_);
   return;
 }
 
-void convertAVFrameToFrameOutputOnCuda(
-    const torch::Device& device,
+void CudaDevice::convertAVFrameToFrameOutput(
     const VideoDecoder::VideoStreamOptions& videoStreamOptions,
     UniqueAVFrame& avFrame,
     VideoDecoder::FrameOutput& frameOutput,
@@ -215,11 +209,11 @@ void convertAVFrameToFrameOutputOnCuda(
         "x3, got ",
         shape);
   } else {
-    dst = allocateEmptyHWCTensor(height, width, videoStreamOptions.device);
+    dst = allocateEmptyHWCTensor(height, width, device_);
   }
 
   // Use the user-requested GPU for running the NPP kernel.
-  c10::cuda::CUDAGuard deviceGuard(device);
+  c10::cuda::CUDAGuard deviceGuard(device_);
 
   NppiSize oSizeROI = {width, height};
   Npp8u* input[2] = {avFrame->data[0], avFrame->data[1]};
@@ -247,7 +241,7 @@ void convertAVFrameToFrameOutputOnCuda(
   // output.
   at::cuda::CUDAEvent nppDoneEvent;
   at::cuda::CUDAStream nppStreamWrapper =
-      c10::cuda::getStreamFromExternal(nppGetStream(), device.index());
+      c10::cuda::getStreamFromExternal(nppGetStream(), device_.index());
   nppDoneEvent.record(nppStreamWrapper);
   nppDoneEvent.block(at::cuda::getCurrentCUDAStream());
 
@@ -262,11 +256,7 @@ void convertAVFrameToFrameOutputOnCuda(
 // we have to do this because of an FFmpeg bug where hardware decoding is not
 // appropriately set, so we just go off and find the matching codec for the CUDA
 // device
-std::optional<const AVCodec*> findCudaCodec(
-    const torch::Device& device,
-    const AVCodecID& codecId) {
-  throwErrorIfNonCudaDevice(device);
-
+std::optional<const AVCodec*> CudaDevice::findCodec(const AVCodecID& codecId) {
   void* i = nullptr;
   const AVCodec* codec = nullptr;
   while ((codec = av_codec_iterate(&i)) != nullptr) {
 
@@ -0,0 +1,33 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include "src/torchcodec/decoders/_core/DeviceInterface.h"
+
+namespace facebook::torchcodec {
+
+class CudaDevice : public DeviceInterface {
+ public:
+  CudaDevice(const std::string& device);
+
+  virtual ~CudaDevice(){};
+
+  std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) override;
+
+  void initializeContext(AVCodecContext* codecContext) override;
+
+  void convertAVFrameToFrameOutput(
+      const VideoDecoder::VideoStreamOptions& videoStreamOptions,
+      UniqueAVFrame& avFrame,
+      VideoDecoder::FrameOutput& frameOutput,
+      std::optional<torch::Tensor> preAllocatedOutputTensor =
+          std::nullopt) override;
+
+  void releaseContext(AVCodecContext* codecContext) override;
+};
+
+} // namespace facebook::torchcodec
@@ -0,0 +1,56 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "src/torchcodec/decoders/_core/DeviceInterface.h"
+#include <map>
+#include <mutex>
+
+namespace facebook::torchcodec {
+
+namespace {
+std::mutex g_interface_mutex;
+std::map<std::string, CreateDeviceInterfaceFn> g_interface_map;
+
+std::string getDeviceType(const std::string& device) {
+  size_t pos = device.find(':');
+  if (pos == std::string::npos) {
+    return device;
+  }
+  return device.substr(0, pos);
+}
+
+} // namespace
+
+bool registerDeviceInterface(
+    const std::string deviceType,
+    CreateDeviceInterfaceFn createInterface) {
+  std::scoped_lock lock(g_interface_mutex);
+  TORCH_CHECK(
+      g_interface_map.find(deviceType) == g_interface_map.end(),
+      "Device interface already registered for ",
+      deviceType);
+  g_interface_map.insert({deviceType, createInterface});
+  return true;
+}
+
+std::shared_ptr<DeviceInterface> createDeviceInterface(
+    const std::string device) {
+  // TODO: remove once DeviceInterface for CPU is implemented
+  if (device == "cpu") {
+    return nullptr;
+  }
+
+  std::scoped_lock lock(g_interface_mutex);
+  std::string deviceType = getDeviceType(device);
+  TORCH_CHECK(
+      g_interface_map.find(deviceType) != g_interface_map.end(),
+      "Unsupported device: ",
+      device);
+
+  return std::shared_ptr<DeviceInterface>(g_interface_map[deviceType](device));
+}
+
+} // namespace facebook::torchcodec
@@ -7,6 +7,7 @@
 #pragma once
 
 #include <torch/types.h>
+#include <functional>
 #include <memory>
 #include <stdexcept>
 #include <string>
@@ -23,25 +24,42 @@ namespace facebook::torchcodec {
 //   deviceFunction(device, ...);
 // }
 
-// Initialize the hardware device that is specified in `device`. Some builds
-// support CUDA and others only support CPU.
-void initializeContextOnCuda(
-    const torch::Device& device,
-    AVCodecContext* codecContext);
-
-void convertAVFrameToFrameOutputOnCuda(
-    const torch::Device& device,
-    const VideoDecoder::VideoStreamOptions& videoStreamOptions,
-    UniqueAVFrame& avFrame,
-    VideoDecoder::FrameOutput& frameOutput,
-    std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
-
-void releaseContextOnCuda(
-    const torch::Device& device,
-    AVCodecContext* codecContext);
-
-std::optional<const AVCodec*> findCudaCodec(
-    const torch::Device& device,
-    const AVCodecID& codecId);
+class DeviceInterface {
+ public:
+  DeviceInterface(const std::string& device) : device_(device) {}
+
+  virtual ~DeviceInterface(){};
+
+  torch::Device& device() {
+    return device_;
+  };
+
+  virtual std::optional<const AVCodec*> findCodec(const AVCodecID& codecId) = 0;
+
+  // Initialize the hardware device that is specified in `device`. Some builds
+  // support CUDA and others only support CPU.
+  virtual void initializeContext(AVCodecContext* codecContext) = 0;
+
+  virtual void convertAVFrameToFrameOutput(
+      const VideoDecoder::VideoStreamOptions& videoStreamOptions,
+      UniqueAVFrame& avFrame,
+      VideoDecoder::FrameOutput& frameOutput,
+      std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt) = 0;
+
+  virtual void releaseContext(AVCodecContext* codecContext) = 0;
+
+ protected:
+  torch::Device device_;
+};
+
+using CreateDeviceInterfaceFn =
+    std::function<DeviceInterface*(const std::string& device)>;
+
+bool registerDeviceInterface(
+    const std::string deviceType,
+    const CreateDeviceInterfaceFn createInterface);
+
+std::shared_ptr<DeviceInterface> createDeviceInterface(
+    const std::string device);
 
 } // namespace facebook::torchcodec