pytorch · NicolasHug · May 27, 2025 · May 23, 2025 · May 23, 2025
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -177,11 +177,11 @@ void AudioEncoder::initializeEncoder(
   // well when "-b:a" isn't specified.
   avCodecContext_->bit_rate = bitRate.value_or(0);
 
-  desiredNumChannels_ = static_cast<int>(numChannels.value_or(wf_.sizes()[0]));
-  validateNumChannels(*avCodec, desiredNumChannels_);
+  outNumChannels_ = static_cast<int>(numChannels.value_or(wf_.sizes()[0]));
+  validateNumChannels(*avCodec, outNumChannels_);
   // The avCodecContext layout defines the layout of the encoded output, it's
   // not related to the input sampes.
-  setDefaultChannelLayout(avCodecContext_, desiredNumChannels_);
+  setDefaultChannelLayout(avCodecContext_, outNumChannels_);
 
   validateSampleRate(*avCodec, sampleRate);
   avCodecContext_->sample_rate = sampleRate;
@@ -304,7 +304,7 @@ void AudioEncoder::encodeInnerLoop(
   bool mustConvert =
       (srcAVFrame != nullptr &&
        (avCodecContext_->sample_fmt != AV_SAMPLE_FMT_FLTP ||
-        getNumChannels(srcAVFrame) != desiredNumChannels_));
+        getNumChannels(srcAVFrame) != outNumChannels_));
 
   UniqueAVFrame convertedAVFrame;
   if (mustConvert) {
@@ -315,14 +315,14 @@ void AudioEncoder::encodeInnerLoop(
           srcAVFrame->sample_rate, // No sample rate conversion
           srcAVFrame->sample_rate,
           srcAVFrame,
-          desiredNumChannels_));
+          outNumChannels_));
     }
     convertedAVFrame = convertAudioAVFrameSamples(
         swrContext_,
         srcAVFrame,
         avCodecContext_->sample_fmt,
         srcAVFrame->sample_rate, // No sample rate conversion
-        desiredNumChannels_);
+        outNumChannels_);
     TORCH_CHECK(
         convertedAVFrame->nb_samples == srcAVFrame->nb_samples,
         "convertedAVFrame->nb_samples=",

diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h
@@ -50,9 +50,9 @@ class AudioEncoder {
   UniqueAVCodecContext avCodecContext_;
   int streamIndex_;
   UniqueSwrContext swrContext_;
-  // TODO-ENCODING: desiredNumChannels should just be part of an options struct,
+  // TODO-ENCODING: outNumChannels should just be part of an options struct,
   // see other TODO above.
-  int desiredNumChannels_ = -1;
+  int outNumChannels_ = -1;
 
   const torch::Tensor wf_;
 

diff --git a/src/torchcodec/_core/FFMPEGCommon.cpp b/src/torchcodec/_core/FFMPEGCommon.cpp
@@ -159,74 +159,74 @@ namespace {
 #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
 
 // Returns:
-// - the srcAVFrame's channel layout if srcAVFrame has desiredNumChannels
-// - the default channel layout with desiredNumChannels otherwise.
-AVChannelLayout getDesiredChannelLayout(
-    int desiredNumChannels,
+// - the srcAVFrame's channel layout if srcAVFrame has outNumChannels
+// - the default channel layout with outNumChannels otherwise.
+AVChannelLayout getOutputChannelLayout(
+    int outNumChannels,
     const UniqueAVFrame& srcAVFrame) {
-  AVChannelLayout desiredLayout;
-  if (desiredNumChannels == getNumChannels(srcAVFrame)) {
-    desiredLayout = srcAVFrame->ch_layout;
+  AVChannelLayout outLayout;
+  if (outNumChannels == getNumChannels(srcAVFrame)) {
+    outLayout = srcAVFrame->ch_layout;
   } else {
-    av_channel_layout_default(&desiredLayout, desiredNumChannels);
+    av_channel_layout_default(&outLayout, outNumChannels);
   }
-  return desiredLayout;
+  return outLayout;
 }
 
 #else
 
 // Same as above
-int64_t getDesiredChannelLayout(
-    int desiredNumChannels,
+int64_t getOutputChannelLayout(
+    int outNumChannels,
     const UniqueAVFrame& srcAVFrame) {
-  int64_t desiredLayout;
-  if (desiredNumChannels == getNumChannels(srcAVFrame)) {
-    desiredLayout = srcAVFrame->channel_layout;
+  int64_t outLayout;
+  if (outNumChannels == getNumChannels(srcAVFrame)) {
+    outLayout = srcAVFrame->channel_layout;
   } else {
-    desiredLayout = av_get_default_channel_layout(desiredNumChannels);
+    outLayout = av_get_default_channel_layout(outNumChannels);
   }
-  return desiredLayout;
+  return outLayout;
 }
 #endif
 } // namespace
 
-// Sets dstAVFrame' channel layout to getDesiredChannelLayout(): see doc above
+// Sets dstAVFrame' channel layout to getOutputChannelLayout(): see doc above
 void setChannelLayout(
     UniqueAVFrame& dstAVFrame,
     const UniqueAVFrame& srcAVFrame,
-    int desiredNumChannels) {
+    int outNumChannels) {
 #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
-  AVChannelLayout desiredLayout =
-      getDesiredChannelLayout(desiredNumChannels, srcAVFrame);
-  auto status = av_channel_layout_copy(&dstAVFrame->ch_layout, &desiredLayout);
+  AVChannelLayout outLayout =
+      getOutputChannelLayout(outNumChannels, srcAVFrame);
+  auto status = av_channel_layout_copy(&dstAVFrame->ch_layout, &outLayout);
   TORCH_CHECK(
       status == AVSUCCESS,
       "Couldn't copy channel layout to avFrame: ",
       getFFMPEGErrorStringFromErrorCode(status));
 #else
   dstAVFrame->channel_layout =
-      getDesiredChannelLayout(desiredNumChannels, srcAVFrame);
-  dstAVFrame->channels = desiredNumChannels;
+      getOutputChannelLayout(outNumChannels, srcAVFrame);
+  dstAVFrame->channels = outNumChannels;
 #endif
 }
 
 SwrContext* createSwrContext(
     AVSampleFormat srcSampleFormat,
-    AVSampleFormat desiredSampleFormat,
+    AVSampleFormat outSampleFormat,
     int srcSampleRate,
-    int desiredSampleRate,
+    int outSampleRate,
     const UniqueAVFrame& srcAVFrame,
-    int desiredNumChannels) {
+    int outNumChannels) {
   SwrContext* swrContext = nullptr;
   int status = AVSUCCESS;
 #if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4
-  AVChannelLayout desiredLayout =
-      getDesiredChannelLayout(desiredNumChannels, srcAVFrame);
+  AVChannelLayout outLayout =
+      getOutputChannelLayout(outNumChannels, srcAVFrame);
   status = swr_alloc_set_opts2(
       &swrContext,
-      &desiredLayout,
-      desiredSampleFormat,
-      desiredSampleRate,
+      &outLayout,
+      outSampleFormat,
+      outSampleRate,
       &srcAVFrame->ch_layout,
       srcSampleFormat,
       srcSampleRate,
@@ -238,13 +238,12 @@ SwrContext* createSwrContext(
       "Couldn't create SwrContext: ",
       getFFMPEGErrorStringFromErrorCode(status));
 #else
-  int64_t desiredLayout =
-      getDesiredChannelLayout(desiredNumChannels, srcAVFrame);
+  int64_t outLayout = getOutputChannelLayout(outNumChannels, srcAVFrame);
   swrContext = swr_alloc_set_opts(
       nullptr,
-      desiredLayout,
-      desiredSampleFormat,
-      desiredSampleRate,
+      outLayout,
+      outSampleFormat,
+      outSampleRate,
       srcAVFrame->channel_layout,
       srcSampleFormat,
       srcSampleRate,
@@ -267,19 +266,19 @@ SwrContext* createSwrContext(
 UniqueAVFrame convertAudioAVFrameSamples(
     const UniqueSwrContext& swrContext,
     const UniqueAVFrame& srcAVFrame,
-    AVSampleFormat desiredSampleFormat,
-    int desiredSampleRate,
-    int desiredNumChannels) {
+    AVSampleFormat outSampleFormat,
+    int outSampleRate,
+    int outNumChannels) {
   UniqueAVFrame convertedAVFrame(av_frame_alloc());
   TORCH_CHECK(
       convertedAVFrame,
       "Could not allocate frame for sample format conversion.");
 
-  convertedAVFrame->format = static_cast<int>(desiredSampleFormat);
+  convertedAVFrame->format = static_cast<int>(outSampleFormat);
 
-  convertedAVFrame->sample_rate = desiredSampleRate;
+  convertedAVFrame->sample_rate = outSampleRate;
   int srcSampleRate = srcAVFrame->sample_rate;
-  if (srcSampleRate != desiredSampleRate) {
+  if (srcSampleRate != outSampleRate) {
     // Note that this is an upper bound on the number of output samples.
     // `swr_convert()` will likely not fill convertedAVFrame with that many
     // samples if sample rate conversion is needed. It will buffer the last few
@@ -290,14 +289,14 @@ UniqueAVFrame convertAudioAVFrameSamples(
     // tighter bound.
     convertedAVFrame->nb_samples = av_rescale_rnd(
         swr_get_delay(swrContext.get(), srcSampleRate) + srcAVFrame->nb_samples,
-        desiredSampleRate,
+        outSampleRate,
         srcSampleRate,
         AV_ROUND_UP);
   } else {
     convertedAVFrame->nb_samples = srcAVFrame->nb_samples;
   }
 
-  setChannelLayout(convertedAVFrame, srcAVFrame, desiredNumChannels);
+  setChannelLayout(convertedAVFrame, srcAVFrame, outNumChannels);
 
   auto status = av_frame_get_buffer(convertedAVFrame.get(), 0);
   TORCH_CHECK(

diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -1186,11 +1186,11 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
     FrameOutput& frameOutput) {
   AVSampleFormat srcSampleFormat =
       static_cast<AVSampleFormat>(srcAVFrame->format);
-  AVSampleFormat desiredSampleFormat = AV_SAMPLE_FMT_FLTP;
+  AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_FLTP;
 
   StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
   int srcSampleRate = srcAVFrame->sample_rate;
-  int desiredSampleRate =
+  int outSampleRate =
       streamInfo.audioStreamOptions.sampleRate.value_or(srcSampleRate);
 
   int srcNumChannels = getNumChannels(streamInfo.codecContext);
@@ -1203,50 +1203,49 @@ void SingleStreamDecoder::convertAudioAVFrameToFrameOutputOnCPU(
       ". If you are hitting this, it may be because you are using "
       "a buggy FFmpeg version. FFmpeg4 is known to fail here in some "
       "valid scenarios. Try to upgrade FFmpeg?");
-  int desiredNumChannels =
+  int outNumChannels =
       streamInfo.audioStreamOptions.numChannels.value_or(srcNumChannels);
 
   bool mustConvert =
-      (srcSampleFormat != desiredSampleFormat ||
-       srcSampleRate != desiredSampleRate ||
-       srcNumChannels != desiredNumChannels);
+      (srcSampleFormat != outSampleFormat || srcSampleRate != outSampleRate ||
+       srcNumChannels != outNumChannels);
 
   UniqueAVFrame convertedAVFrame;
   if (mustConvert) {
     if (!streamInfo.swrContext) {
       streamInfo.swrContext.reset(createSwrContext(
           srcSampleFormat,
-          desiredSampleFormat,
+          outSampleFormat,
           srcSampleRate,
-          desiredSampleRate,
+          outSampleRate,
           srcAVFrame,
-          desiredNumChannels));
+          outNumChannels));
     }
 
     convertedAVFrame = convertAudioAVFrameSamples(
         streamInfo.swrContext,
         srcAVFrame,
-        desiredSampleFormat,
-        desiredSampleRate,
-        desiredNumChannels);
+        outSampleFormat,
+        outSampleRate,
+        outNumChannels);
   }
   const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame;
 
   AVSampleFormat format = static_cast<AVSampleFormat>(avFrame->format);
   TORCH_CHECK(
-      format == desiredSampleFormat,
+      format == outSampleFormat,
       "Something went wrong, the frame didn't get converted to the desired format. ",
       "Desired format = ",
-      av_get_sample_fmt_name(desiredSampleFormat),
+      av_get_sample_fmt_name(outSampleFormat),
       "source format = ",
       av_get_sample_fmt_name(format));
 
   int numChannels = getNumChannels(avFrame);
   TORCH_CHECK(
-      numChannels == desiredNumChannels,
+      numChannels == outNumChannels,
       "Something went wrong, the frame didn't get converted to the desired ",
       "number of channels = ",
-      desiredNumChannels,
+      outNumChannels,
       ". Got ",
       numChannels,
       " instead.");