From 2c559b2f073b6c925fa97ca21196b56d7c46cf07 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 23 May 2025 13:26:13 +0100 Subject: [PATCH 1/2] Use 'output' more consistently --- src/torchcodec/_core/Encoder.cpp | 12 +-- src/torchcodec/_core/Encoder.h | 4 +- src/torchcodec/_core/FFMPEGCommon.cpp | 88 ++++++++++---------- src/torchcodec/_core/SingleStreamDecoder.cpp | 32 +++---- 4 files changed, 68 insertions(+), 68 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index a5303b0f..9fd7772c 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -177,11 +177,11 @@ void AudioEncoder::initializeEncoder( // well when "-b:a" isn't specified. avCodecContext_->bit_rate = bitRate.value_or(0); - desiredNumChannels_ = static_cast(numChannels.value_or(wf_.sizes()[0])); - validateNumChannels(*avCodec, desiredNumChannels_); + outNumChannels_ = static_cast(numChannels.value_or(wf_.sizes()[0])); + validateNumChannels(*avCodec, outNumChannels_); // The avCodecContext layout defines the layout of the encoded output, it's // not related to the input sampes. - setDefaultChannelLayout(avCodecContext_, desiredNumChannels_); + setDefaultChannelLayout(avCodecContext_, outNumChannels_); validateSampleRate(*avCodec, sampleRate); avCodecContext_->sample_rate = sampleRate; @@ -304,7 +304,7 @@ void AudioEncoder::encodeInnerLoop( bool mustConvert = (srcAVFrame != nullptr && (avCodecContext_->sample_fmt != AV_SAMPLE_FMT_FLTP || - getNumChannels(srcAVFrame) != desiredNumChannels_)); + getNumChannels(srcAVFrame) != outNumChannels_)); UniqueAVFrame convertedAVFrame; if (mustConvert) { @@ -315,14 +315,14 @@ void AudioEncoder::encodeInnerLoop( srcAVFrame->sample_rate, // No sample rate conversion srcAVFrame->sample_rate, srcAVFrame, - desiredNumChannels_)); + outNumChannels_)); } convertedAVFrame = convertAudioAVFrameSamples( swrContext_, srcAVFrame, avCodecContext_->sample_fmt, srcAVFrame->sample_rate, // No sample rate conversion - desiredNumChannels_); + outNumChannels_); TORCH_CHECK( convertedAVFrame->nb_samples == srcAVFrame->nb_samples, "convertedAVFrame->nb_samples=", diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h index afbc1d3f..55a31e8a 100644 --- a/src/torchcodec/_core/Encoder.h +++ b/src/torchcodec/_core/Encoder.h @@ -50,9 +50,9 @@ class AudioEncoder { UniqueAVCodecContext avCodecContext_; int streamIndex_; UniqueSwrContext swrContext_; - // TODO-ENCODING: desiredNumChannels should just be part of an options struct, + // TODO-ENCODING: outNumChannels should just be part of an options struct, // see other TODO above. - int desiredNumChannels_ = -1; + int outNumChannels_ = -1; const torch::Tensor wf_; diff --git a/src/torchcodec/_core/FFMPEGCommon.cpp b/src/torchcodec/_core/FFMPEGCommon.cpp index b412517e..942a69d9 100644 --- a/src/torchcodec/_core/FFMPEGCommon.cpp +++ b/src/torchcodec/_core/FFMPEGCommon.cpp @@ -159,74 +159,74 @@ namespace { #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4 // Returns: -// - the srcAVFrame's channel layout if srcAVFrame has desiredNumChannels -// - the default channel layout with desiredNumChannels otherwise. -AVChannelLayout getDesiredChannelLayout( - int desiredNumChannels, +// - the srcAVFrame's channel layout if srcAVFrame has outNumChannels +// - the default channel layout with outNumChannels otherwise. +AVChannelLayout getOutputChannelLayout( + int outNumChannels, const UniqueAVFrame& srcAVFrame) { - AVChannelLayout desiredLayout; - if (desiredNumChannels == getNumChannels(srcAVFrame)) { - desiredLayout = srcAVFrame->ch_layout; + AVChannelLayout outLayout; + if (outNumChannels == getNumChannels(srcAVFrame)) { + outLayout = srcAVFrame->ch_layout; } else { - av_channel_layout_default(&desiredLayout, desiredNumChannels); + av_channel_layout_default(&outLayout, outNumChannels); } - return desiredLayout; + return outLayout; } #else // Same as above -int64_t getDesiredChannelLayout( - int desiredNumChannels, +int64_t getOutputChannelLayout( + int outNumChannels, const UniqueAVFrame& srcAVFrame) { - int64_t desiredLayout; - if (desiredNumChannels == getNumChannels(srcAVFrame)) { - desiredLayout = srcAVFrame->channel_layout; + int64_t outLayout; + if (outNumChannels == getNumChannels(srcAVFrame)) { + outLayout = srcAVFrame->channel_layout; } else { - desiredLayout = av_get_default_channel_layout(desiredNumChannels); + outLayout = av_get_default_channel_layout(outNumChannels); } - return desiredLayout; + return outLayout; } #endif } // namespace -// Sets dstAVFrame' channel layout to getDesiredChannelLayout(): see doc above +// Sets dstAVFrame' channel layout to getOutputChannelLayout(): see doc above void setChannelLayout( UniqueAVFrame& dstAVFrame, const UniqueAVFrame& srcAVFrame, - int desiredNumChannels) { + int outNumChannels) { #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4 - AVChannelLayout desiredLayout = - getDesiredChannelLayout(desiredNumChannels, srcAVFrame); - auto status = av_channel_layout_copy(&dstAVFrame->ch_layout, &desiredLayout); + AVChannelLayout outLayout = + getOutputChannelLayout(outNumChannels, srcAVFrame); + auto status = av_channel_layout_copy(&dstAVFrame->ch_layout, &outLayout); TORCH_CHECK( status == AVSUCCESS, "Couldn't copy channel layout to avFrame: ", getFFMPEGErrorStringFromErrorCode(status)); #else dstAVFrame->channel_layout = - getDesiredChannelLayout(desiredNumChannels, srcAVFrame); - dstAVFrame->channels = desiredNumChannels; + getOutputChannelLayout(outNumChannels, srcAVFrame); + dstAVFrame->channels = outNumChannels; #endif } SwrContext* createSwrContext( AVSampleFormat srcSampleFormat, - AVSampleFormat desiredSampleFormat, + AVSampleFormat outSampleFormat, int srcSampleRate, - int desiredSampleRate, + int outSampleRate, const UniqueAVFrame& srcAVFrame, - int desiredNumChannels) { + int outNumChannels) { SwrContext* swrContext = nullptr; int status = AVSUCCESS; #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4 - AVChannelLayout desiredLayout = - getDesiredChannelLayout(desiredNumChannels, srcAVFrame); + AVChannelLayout outLayout = + getOutputChannelLayout(outNumChannels, srcAVFrame); status = swr_alloc_set_opts2( &swrContext, - &desiredLayout, - desiredSampleFormat, - desiredSampleRate, + &outLayout, + outSampleFormat, + outSampleRate, &srcAVFrame->ch_layout, srcSampleFormat, srcSampleRate, @@ -238,13 +238,13 @@ SwrContext* createSwrContext( "Couldn't create SwrContext: ", getFFMPEGErrorStringFromErrorCode(status)); #else - int64_t desiredLayout = - getDesiredChannelLayout(desiredNumChannels, srcAVFrame); + int64_t outLayout = + getOutputChannelLayout(outNumChannels, srcAVFrame); swrContext = swr_alloc_set_opts( nullptr, - desiredLayout, - desiredSampleFormat, - desiredSampleRate, + outLayout, + outSampleFormat, + outSampleRate, srcAVFrame->channel_layout, srcSampleFormat, srcSampleRate, @@ -267,19 +267,19 @@ SwrContext* createSwrContext( UniqueAVFrame convertAudioAVFrameSamples( const UniqueSwrContext& swrContext, const UniqueAVFrame& srcAVFrame, - AVSampleFormat desiredSampleFormat, - int desiredSampleRate, - int desiredNumChannels) { + AVSampleFormat outSampleFormat, + int outSampleRate, + int outNumChannels) { UniqueAVFrame convertedAVFrame(av_frame_alloc()); TORCH_CHECK( convertedAVFrame, "Could not allocate frame for sample format conversion."); - convertedAVFrame->format = static_cast(desiredSampleFormat); + convertedAVFrame->format = static_cast(outSampleFormat); - convertedAVFrame->sample_rate = desiredSampleRate; + convertedAVFrame->sample_rate = outSampleRate; int srcSampleRate = srcAVFrame->sample_rate; - if (srcSampleRate != desiredSampleRate) { + if (srcSampleRate != outSampleRate) { // Note that this is an upper bound on the number of output samples. // `swr_convert()` will likely not fill convertedAVFrame with that many // samples if sample rate conversion is needed. It will buffer the last few @@ -290,14 +290,14 @@ UniqueAVFrame convertAudioAVFrameSamples( // tighter bound. convertedAVFrame->nb_samples = av_rescale_rnd( swr_get_delay(swrContext.get(), srcSampleRate) + srcAVFrame->nb_samples, - desiredSampleRate, + outSampleRate, srcSampleRate, AV_ROUND_UP); } else { convertedAVFrame->nb_samples = srcAVFrame->nb_samples; } - setChannelLayout(convertedAVFrame, srcAVFrame, desiredNumChannels); + setChannelLayout(convertedAVFrame, srcAVFrame, outNumChannels); auto status = av_frame_get_buffer(convertedAVFrame.get(), 0); TORCH_CHECK( diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index b21977c9..0ed14b14 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1186,11 +1186,11 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU( FrameOutput& frameOutput) { AVSampleFormat srcSampleFormat = static_cast(srcAVFrame->format); - AVSampleFormat desiredSampleFormat = AV_SAMPLE_FMT_FLTP; + AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_FLTP; StreamInfo& streamInfo = streamInfos_[activeStreamIndex_]; int srcSampleRate = srcAVFrame->sample_rate; - int desiredSampleRate = + int outSampleRate = streamInfo.audioStreamOptions.sampleRate.value_or(srcSampleRate); int srcNumChannels = getNumChannels(streamInfo.codecContext); @@ -1203,50 +1203,50 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU( ". If you are hitting this, it may be because you are using " "a buggy FFmpeg version. FFmpeg4 is known to fail here in some " "valid scenarios. Try to upgrade FFmpeg?"); - int desiredNumChannels = + int outNumChannels = streamInfo.audioStreamOptions.numChannels.value_or(srcNumChannels); bool mustConvert = - (srcSampleFormat != desiredSampleFormat || - srcSampleRate != desiredSampleRate || - srcNumChannels != desiredNumChannels); + (srcSampleFormat != outSampleFormat || + srcSampleRate != outSampleRate || + srcNumChannels != outNumChannels); UniqueAVFrame convertedAVFrame; if (mustConvert) { if (!streamInfo.swrContext) { streamInfo.swrContext.reset(createSwrContext( srcSampleFormat, - desiredSampleFormat, + outSampleFormat, srcSampleRate, - desiredSampleRate, + outSampleRate, srcAVFrame, - desiredNumChannels)); + outNumChannels)); } convertedAVFrame = convertAudioAVFrameSamples( streamInfo.swrContext, srcAVFrame, - desiredSampleFormat, - desiredSampleRate, - desiredNumChannels); + outSampleFormat, + outSampleRate, + outNumChannels); } const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame; AVSampleFormat format = static_cast(avFrame->format); TORCH_CHECK( - format == desiredSampleFormat, + format == outSampleFormat, "Something went wrong, the frame didn't get converted to the desired format. ", "Desired format = ", - av_get_sample_fmt_name(desiredSampleFormat), + av_get_sample_fmt_name(outSampleFormat), "source format = ", av_get_sample_fmt_name(format)); int numChannels = getNumChannels(avFrame); TORCH_CHECK( - numChannels == desiredNumChannels, + numChannels == outNumChannels, "Something went wrong, the frame didn't get converted to the desired ", "number of channels = ", - desiredNumChannels, + outNumChannels, ". Got ", numChannels, " instead."); From 19c1ed96c44c6f2a12a246e8e80dedae44994847 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 23 May 2025 13:42:19 +0100 Subject: [PATCH 2/2] Lint --- src/torchcodec/_core/FFMPEGCommon.cpp | 3 +-- src/torchcodec/_core/SingleStreamDecoder.cpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/torchcodec/_core/FFMPEGCommon.cpp b/src/torchcodec/_core/FFMPEGCommon.cpp index 942a69d9..2609caf3 100644 --- a/src/torchcodec/_core/FFMPEGCommon.cpp +++ b/src/torchcodec/_core/FFMPEGCommon.cpp @@ -238,8 +238,7 @@ SwrContext* createSwrContext( "Couldn't create SwrContext: ", getFFMPEGErrorStringFromErrorCode(status)); #else - int64_t outLayout = - getOutputChannelLayout(outNumChannels, srcAVFrame); + int64_t outLayout = getOutputChannelLayout(outNumChannels, srcAVFrame); swrContext = swr_alloc_set_opts( nullptr, outLayout, diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 0ed14b14..9bc003a9 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1207,8 +1207,7 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU( streamInfo.audioStreamOptions.numChannels.value_or(srcNumChannels); bool mustConvert = - (srcSampleFormat != outSampleFormat || - srcSampleRate != outSampleRate || + (srcSampleFormat != outSampleFormat || srcSampleRate != outSampleRate || srcNumChannels != outNumChannels); UniqueAVFrame convertedAVFrame;