From 406e336247b4ef469cb312b75d3b1febf950cc66 Mon Sep 17 00:00:00 2001 From: jinhohwang-meta Date: Thu, 8 May 2025 09:13:19 -0700 Subject: [PATCH 1/2] add opencv benchmark --- benchmarks/decoders/benchmark_decoders.py | 2 + .../decoders/benchmark_decoders_library.py | 68 +++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/benchmarks/decoders/benchmark_decoders.py b/benchmarks/decoders/benchmark_decoders.py index c01fd6fa..908785ba 100644 --- a/benchmarks/decoders/benchmark_decoders.py +++ b/benchmarks/decoders/benchmark_decoders.py @@ -28,6 +28,7 @@ TorchCodecPublic, TorchCodecPublicNonBatch, TorchVision, + OpenCVDecoder, ) @@ -61,6 +62,7 @@ class DecoderKind: {"backend": "video_reader"}, ), "torchaudio": DecoderKind("TorchAudio", TorchAudioDecoder), + "opencv": DecoderKind("OpenCV", OpenCVDecoder), } diff --git a/benchmarks/decoders/benchmark_decoders_library.py b/benchmarks/decoders/benchmark_decoders_library.py index 9b25c6fe..907ea2ba 100644 --- a/benchmarks/decoders/benchmark_decoders_library.py +++ b/benchmarks/decoders/benchmark_decoders_library.py @@ -145,6 +145,74 @@ def decode_and_resize(self, video_file, pts_list, height, width, device): ] return frames +class OpenCVDecoder(AbstractDecoder): + def __init__(self): + import cv2.videoio_registry as vr + + self._print_each_iteration_time = False + api_pref = None + for backend in vr.getStreamBufferedBackends(): + if not vr.hasBackend(backend): + continue + if not vr.isBackendBuiltIn(backend): + _, abi, api = vr.getStreamBufferedBackendPluginVersion(backend) + if (abi < 1 or (abi == 1 and api < 2)): + continue + api_pref = backend + break + self._backend = api_pref + + def decode_frames(self, video_file, pts_list): + import cv2 + + cap = cv2.VideoCapture(video_file, self._backend, []) + if not cap.isOpened(): + raise ValueError("Could not open video stream") + + fps = cap.get(cv2.CAP_PROP_FPS) + frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + approx_frame_numbers = [int(pts * fps) for pts in pts_list] + + current_frame = 0 + frames = [] + while True: + ok = cap.grab() + if not ok: + break + if current_frame in approx_frame_numbers: # only decompress needed + ret, frame = cap.retrieve() + if ret: + frames.append(frame) + + if len(frames) == len(approx_frame_numbers): + break + current_frame += 1 + cap.release() + return frames + + def decode_first_n_frames(self, video_file, n): + import cv2 + + cap = cv2.VideoCapture(video_file, self._backend, []) + if not cap.isOpened(): + raise ValueError("Could not open video stream") + + frames = [] + for i in range(n): + ok = cap.grab() + if not ok: + break + ret, frame = cap.retrieve() + if ret: + frames.append(frame) + cap.release() + return frames + + def decode_and_resize(self, video_file, pts_list, height, width, device): + import cv2 + frames = [cv2.resize(frame, (width, height)) for frame in self.decode_frames(video_file, pts_list)] + return frames + class TorchCodecCore(AbstractDecoder): def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"): From f85361aa3385f5299f29efbe67ea93ce4f154a73 Mon Sep 17 00:00:00 2001 From: jinhohwang-meta Date: Tue, 13 May 2025 07:51:37 -0700 Subject: [PATCH 2/2] reflect comments --- benchmarks/decoders/benchmark_decoders_library.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/benchmarks/decoders/benchmark_decoders_library.py b/benchmarks/decoders/benchmark_decoders_library.py index 907ea2ba..7fe46eab 100644 --- a/benchmarks/decoders/benchmark_decoders_library.py +++ b/benchmarks/decoders/benchmark_decoders_library.py @@ -151,6 +151,7 @@ def __init__(self): self._print_each_iteration_time = False api_pref = None + # Check backend abi/api for compatibility for backend in vr.getStreamBufferedBackends(): if not vr.hasBackend(backend): continue @@ -170,24 +171,24 @@ def decode_frames(self, video_file, pts_list): raise ValueError("Could not open video stream") fps = cap.get(cv2.CAP_PROP_FPS) - frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - approx_frame_numbers = [int(pts * fps) for pts in pts_list] + approx_frame_indices = [int(pts * fps) for pts in pts_list] current_frame = 0 frames = [] while True: ok = cap.grab() if not ok: - break - if current_frame in approx_frame_numbers: # only decompress needed + raise ValueError("Could not grab video frame") + if current_frame in approx_frame_indices: # only decompress needed ret, frame = cap.retrieve() if ret: frames.append(frame) - if len(frames) == len(approx_frame_numbers): + if len(frames) == len(approx_frame_indices): break current_frame += 1 cap.release() + assert len(frames) == len(approx_frame_indices) return frames def decode_first_n_frames(self, video_file, n): @@ -201,11 +202,12 @@ def decode_first_n_frames(self, video_file, n): for i in range(n): ok = cap.grab() if not ok: - break + raise ValueError("Could not grab video frame") ret, frame = cap.retrieve() if ret: frames.append(frame) cap.release() + assert len(frames) == n return frames def decode_and_resize(self, video_file, pts_list, height, width, device):