13
13
#include < sstream>
14
14
#include < stdexcept>
15
15
#include < string_view>
16
- #include " src/torchcodec/_core/DeviceInterface.h"
17
16
#include " torch/types.h"
18
17
19
18
extern " C" {
@@ -350,8 +349,7 @@ void SingleStreamDecoder::scanFileAndUpdateMetadataAndIndex() {
350
349
scannedAllStreams_ = true ;
351
350
}
352
351
353
- SingleStreamDecoder::ContainerMetadata
354
- SingleStreamDecoder::getContainerMetadata () const {
352
+ ContainerMetadata SingleStreamDecoder::getContainerMetadata () const {
355
353
return containerMetadata_;
356
354
}
357
355
@@ -406,7 +404,7 @@ void SingleStreamDecoder::addStream(
406
404
streamInfo.stream = formatContext_->streams [activeStreamIndex_];
407
405
streamInfo.avMediaType = mediaType;
408
406
409
- deviceInterface = createDeviceInterface (device);
407
+ deviceInterface_ = createDeviceInterface (device);
410
408
411
409
// This should never happen, checking just to be safe.
412
410
TORCH_CHECK (
@@ -418,9 +416,9 @@ void SingleStreamDecoder::addStream(
418
416
// TODO_CODE_QUALITY it's pretty meh to have a video-specific logic within
419
417
// addStream() which is supposed to be generic
420
418
if (mediaType == AVMEDIA_TYPE_VIDEO) {
421
- if (deviceInterface ) {
419
+ if (deviceInterface_ ) {
422
420
avCodec = makeAVCodecOnlyUseForCallingAVFindBestStream (
423
- deviceInterface ->findCodec (streamInfo.stream ->codecpar ->codec_id )
421
+ deviceInterface_ ->findCodec (streamInfo.stream ->codecpar ->codec_id )
424
422
.value_or (avCodec));
425
423
}
426
424
}
@@ -438,8 +436,8 @@ void SingleStreamDecoder::addStream(
438
436
439
437
// TODO_CODE_QUALITY same as above.
440
438
if (mediaType == AVMEDIA_TYPE_VIDEO) {
441
- if (deviceInterface ) {
442
- deviceInterface ->initializeContext (codecContext);
439
+ if (deviceInterface_ ) {
440
+ deviceInterface_ ->initializeContext (codecContext);
443
441
}
444
442
}
445
443
@@ -501,9 +499,8 @@ void SingleStreamDecoder::addVideoStream(
501
499
// swscale requires widths to be multiples of 32:
502
500
// https://stackoverflow.com/questions/74351955/turn-off-sw-scale-conversion-to-planar-yuv-32-byte-alignment-requirements
503
501
// so we fall back to filtergraph if the width is not a multiple of 32.
504
- auto defaultLibrary = (width % 32 == 0 )
505
- ? SingleStreamDecoder::ColorConversionLibrary::SWSCALE
506
- : SingleStreamDecoder::ColorConversionLibrary::FILTERGRAPH;
502
+ auto defaultLibrary = (width % 32 == 0 ) ? ColorConversionLibrary::SWSCALE
503
+ : ColorConversionLibrary::FILTERGRAPH;
507
504
508
505
streamInfo.colorConversionLibrary =
509
506
videoStreamOptions.colorConversionLibrary .value_or (defaultLibrary);
@@ -539,30 +536,29 @@ void SingleStreamDecoder::addAudioStream(
539
536
// HIGH-LEVEL DECODING ENTRY-POINTS
540
537
// --------------------------------------------------------------------------
541
538
542
- SingleStreamDecoder:: FrameOutput SingleStreamDecoder::getNextFrame () {
539
+ FrameOutput SingleStreamDecoder::getNextFrame () {
543
540
auto output = getNextFrameInternal ();
544
541
if (streamInfos_[activeStreamIndex_].avMediaType == AVMEDIA_TYPE_VIDEO) {
545
542
output.data = maybePermuteHWC2CHW (output.data );
546
543
}
547
544
return output;
548
545
}
549
546
550
- SingleStreamDecoder:: FrameOutput SingleStreamDecoder::getNextFrameInternal (
547
+ FrameOutput SingleStreamDecoder::getNextFrameInternal (
551
548
std::optional<torch::Tensor> preAllocatedOutputTensor) {
552
549
validateActiveStream ();
553
550
UniqueAVFrame avFrame = decodeAVFrame (
554
551
[this ](const UniqueAVFrame& avFrame) { return avFrame->pts >= cursor_; });
555
552
return convertAVFrameToFrameOutput (avFrame, preAllocatedOutputTensor);
556
553
}
557
554
558
- SingleStreamDecoder::FrameOutput SingleStreamDecoder::getFrameAtIndex (
559
- int64_t frameIndex) {
555
+ FrameOutput SingleStreamDecoder::getFrameAtIndex (int64_t frameIndex) {
560
556
auto frameOutput = getFrameAtIndexInternal (frameIndex);
561
557
frameOutput.data = maybePermuteHWC2CHW (frameOutput.data );
562
558
return frameOutput;
563
559
}
564
560
565
- SingleStreamDecoder:: FrameOutput SingleStreamDecoder::getFrameAtIndexInternal (
561
+ FrameOutput SingleStreamDecoder::getFrameAtIndexInternal (
566
562
int64_t frameIndex,
567
563
std::optional<torch::Tensor> preAllocatedOutputTensor) {
568
564
validateActiveStream (AVMEDIA_TYPE_VIDEO);
@@ -577,7 +573,7 @@ SingleStreamDecoder::FrameOutput SingleStreamDecoder::getFrameAtIndexInternal(
577
573
return getNextFrameInternal (preAllocatedOutputTensor);
578
574
}
579
575
580
- SingleStreamDecoder:: FrameBatchOutput SingleStreamDecoder::getFramesAtIndices (
576
+ FrameBatchOutput SingleStreamDecoder::getFramesAtIndices (
581
577
const std::vector<int64_t >& frameIndices) {
582
578
validateActiveStream (AVMEDIA_TYPE_VIDEO);
583
579
@@ -636,7 +632,7 @@ SingleStreamDecoder::FrameBatchOutput SingleStreamDecoder::getFramesAtIndices(
636
632
return frameBatchOutput;
637
633
}
638
634
639
- SingleStreamDecoder:: FrameBatchOutput SingleStreamDecoder::getFramesInRange (
635
+ FrameBatchOutput SingleStreamDecoder::getFramesInRange (
640
636
int64_t start,
641
637
int64_t stop,
642
638
int64_t step) {
@@ -670,8 +666,7 @@ SingleStreamDecoder::FrameBatchOutput SingleStreamDecoder::getFramesInRange(
670
666
return frameBatchOutput;
671
667
}
672
668
673
- SingleStreamDecoder::FrameOutput SingleStreamDecoder::getFramePlayedAt (
674
- double seconds) {
669
+ FrameOutput SingleStreamDecoder::getFramePlayedAt (double seconds) {
675
670
validateActiveStream (AVMEDIA_TYPE_VIDEO);
676
671
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
677
672
double frameStartTime =
@@ -711,7 +706,7 @@ SingleStreamDecoder::FrameOutput SingleStreamDecoder::getFramePlayedAt(
711
706
return frameOutput;
712
707
}
713
708
714
- SingleStreamDecoder:: FrameBatchOutput SingleStreamDecoder::getFramesPlayedAt (
709
+ FrameBatchOutput SingleStreamDecoder::getFramesPlayedAt (
715
710
const std::vector<double >& timestamps) {
716
711
validateActiveStream (AVMEDIA_TYPE_VIDEO);
717
712
@@ -741,8 +736,7 @@ SingleStreamDecoder::FrameBatchOutput SingleStreamDecoder::getFramesPlayedAt(
741
736
return getFramesAtIndices (frameIndices);
742
737
}
743
738
744
- SingleStreamDecoder::FrameBatchOutput
745
- SingleStreamDecoder::getFramesPlayedInRange (
739
+ FrameBatchOutput SingleStreamDecoder::getFramesPlayedInRange (
746
740
double startSeconds,
747
741
double stopSeconds) {
748
742
validateActiveStream (AVMEDIA_TYPE_VIDEO);
@@ -875,8 +869,7 @@ SingleStreamDecoder::getFramesPlayedInRange(
875
869
// [2] If you're brave and curious, you can read the long "Seek offset for
876
870
// audio" note in https://github.com/pytorch/torchcodec/pull/507/files, which
877
871
// sums up past (and failed) attemps at working around this issue.
878
- SingleStreamDecoder::AudioFramesOutput
879
- SingleStreamDecoder::getFramesPlayedInRangeAudio (
872
+ AudioFramesOutput SingleStreamDecoder::getFramesPlayedInRangeAudio (
880
873
double startSeconds,
881
874
std::optional<double > stopSecondsOptional) {
882
875
validateActiveStream (AVMEDIA_TYPE_AUDIO);
@@ -1196,8 +1189,7 @@ UniqueAVFrame SingleStreamDecoder::decodeAVFrame(
1196
1189
// AVFRAME <-> FRAME OUTPUT CONVERSION
1197
1190
// --------------------------------------------------------------------------
1198
1191
1199
- SingleStreamDecoder::FrameOutput
1200
- SingleStreamDecoder::convertAVFrameToFrameOutput (
1192
+ FrameOutput SingleStreamDecoder::convertAVFrameToFrameOutput (
1201
1193
UniqueAVFrame& avFrame,
1202
1194
std::optional<torch::Tensor> preAllocatedOutputTensor) {
1203
1195
// Convert the frame to tensor.
@@ -1210,11 +1202,11 @@ SingleStreamDecoder::convertAVFrameToFrameOutput(
1210
1202
formatContext_->streams [activeStreamIndex_]->time_base );
1211
1203
if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
1212
1204
convertAudioAVFrameToFrameOutputOnCPU (avFrame, frameOutput);
1213
- } else if (!deviceInterface ) {
1205
+ } else if (!deviceInterface_ ) {
1214
1206
convertAVFrameToFrameOutputOnCPU (
1215
1207
avFrame, frameOutput, preAllocatedOutputTensor);
1216
- } else if (deviceInterface ) {
1217
- deviceInterface ->convertAVFrameToFrameOutput (
1208
+ } else if (deviceInterface_ ) {
1209
+ deviceInterface_ ->convertAVFrameToFrameOutput (
1218
1210
streamInfo.videoStreamOptions ,
1219
1211
avFrame,
1220
1212
frameOutput,
@@ -1547,7 +1539,7 @@ std::optional<torch::Tensor> SingleStreamDecoder::maybeFlushSwrBuffers() {
1547
1539
// OUTPUT ALLOCATION AND SHAPE CONVERSION
1548
1540
// --------------------------------------------------------------------------
1549
1541
1550
- SingleStreamDecoder:: FrameBatchOutput::FrameBatchOutput (
1542
+ FrameBatchOutput::FrameBatchOutput (
1551
1543
int64_t numFrames,
1552
1544
const VideoStreamOptions& videoStreamOptions,
1553
1545
const StreamMetadata& streamMetadata)
@@ -2047,15 +2039,15 @@ FrameDims getHeightAndWidthFromResizedAVFrame(const AVFrame& resizedAVFrame) {
2047
2039
}
2048
2040
2049
2041
FrameDims getHeightAndWidthFromOptionsOrMetadata (
2050
- const SingleStreamDecoder:: VideoStreamOptions& videoStreamOptions,
2051
- const SingleStreamDecoder:: StreamMetadata& streamMetadata) {
2042
+ const VideoStreamOptions& videoStreamOptions,
2043
+ const StreamMetadata& streamMetadata) {
2052
2044
return FrameDims (
2053
2045
videoStreamOptions.height .value_or (*streamMetadata.height ),
2054
2046
videoStreamOptions.width .value_or (*streamMetadata.width ));
2055
2047
}
2056
2048
2057
2049
FrameDims getHeightAndWidthFromOptionsOrAVFrame (
2058
- const SingleStreamDecoder:: VideoStreamOptions& videoStreamOptions,
2050
+ const VideoStreamOptions& videoStreamOptions,
2059
2051
const UniqueAVFrame& avFrame) {
2060
2052
return FrameDims (
2061
2053
videoStreamOptions.height .value_or (avFrame->height ),
0 commit comments