pytorch
diff --git a/‎.circleci/unittest/linux/scripts/run_test.sh
Lines changed: 1 addition & 1 deletion b/‎.circleci/unittest/linux/scripts/run_test.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.circleci/unittest/windows/scripts/run_test.sh
Lines changed: 1 addition & 1 deletion b/‎.circleci/unittest/windows/scripts/run_test.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.coveragerc
Lines changed: 0 additions & 7 deletions b/‎.coveragerc
Lines changed: 0 additions & 7 deletions
diff --git a/‎docs/source/datasets.rst
Lines changed: 68 additions & 22 deletions b/‎docs/source/datasets.rst
Lines changed: 68 additions & 22 deletions
diff --git a/‎docs/source/models.rst
Lines changed: 24 additions & 1 deletion b/‎docs/source/models.rst
Lines changed: 24 additions & 1 deletion
diff --git a/‎docs/source/ops.rst
Lines changed: 1 addition & 0 deletions b/‎docs/source/ops.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎hubconf.py
Lines changed: 1 addition & 0 deletions b/‎hubconf.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎references/classification/README.md
Lines changed: 3 additions & 2 deletions b/‎references/classification/README.md
Lines changed: 3 additions & 2 deletions
diff --git a/‎references/classification/train.py
Lines changed: 1 addition & 1 deletion b/‎references/classification/train.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎references/classification/train_quantization.py
Lines changed: 15 additions & 7 deletions b/‎references/classification/train_quantization.py
Lines changed: 15 additions & 7 deletions
diff --git a/‎references/classification/utils.py
Lines changed: 1 addition & 1 deletion b/‎references/classification/utils.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/expect/ModelTester.test_convnext_base_expect.pkl
939 Bytes b/‎test/expect/ModelTester.test_convnext_base_expect.pkl
939 Bytes
diff --git a/‎test/expect/ModelTester.test_convnext_large_expect.pkl
939 Bytes b/‎test/expect/ModelTester.test_convnext_large_expect.pkl
939 Bytes
diff --git a/‎test/expect/ModelTester.test_convnext_small_expect.pkl
939 Bytes b/‎test/expect/ModelTester.test_convnext_small_expect.pkl
939 Bytes
diff --git a/‎test/test_models.py
Lines changed: 1 addition & 1 deletion b/‎test/test_models.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_utils.py
Lines changed: 9 additions & 1 deletion b/‎test/test_utils.py
Lines changed: 9 additions & 1 deletion
@@ -7,4 +7,4 @@ conda activate ./env
 
 export PYTORCH_TEST_WITH_SLOW='1'
 python -m torch.utils.collect_env
-pytest --cov=torchvision --junitxml=test-results/junit.xml -v --durations 20
+pytest --junitxml=test-results/junit.xml -v --durations 20
@@ -10,4 +10,4 @@ source "$this_dir/set_cuda_envs.sh"
 
 export PYTORCH_TEST_WITH_SLOW='1'
 python -m torch.utils.collect_env
-pytest --cov=torchvision --junitxml=test-results/junit.xml -v --durations 20
+pytest --junitxml=test-results/junit.xml -v --durations 20
@@ -5,7 +5,7 @@ Torchvision provides many built-in datasets in the ``torchvision.datasets``
 module, as well as utility classes for building your own datasets.
 
 Built-in datasets
-~~~~~~~~~~~~~~~~~
+-----------------
 
 All datasets are subclasses of :class:`torch.utils.data.Dataset`
 i.e, they have ``__getitem__`` and ``__len__`` methods implemented.
@@ -25,6 +25,8 @@ All the datasets have almost similar API. They all have two common arguments:
 ``transform`` and  ``target_transform`` to transform the input and target respectively.
 You can also create your own datasets using the provided :ref:`base classes <base_classes_datasets>`.
 
+Image classification
+~~~~~~~~~~~~~~~~~~~~
 
 .. autosummary::
     :toctree: generated/
@@ -35,61 +37,105 @@ You can also create your own datasets using the provided :ref:`base classes <bas
     CelebA
     CIFAR10
     CIFAR100
-    Cityscapes
-    CocoCaptions
-    CocoDetection
     Country211
     DTD
     EMNIST
     EuroSAT
     FakeData
     FashionMNIST
     FER2013
+    FGVCAircraft
     Flickr8k
     Flickr30k
     Flowers102
-    FlyingChairs
-    FlyingThings3D
     Food101
-    FGVCAircraft
     GTSRB
-    HD1K
-    HMDB51
-    ImageNet
     INaturalist
-    Kinetics400
-    Kitti
-    KittiFlow
+    ImageNet
     KMNIST
     LFWPeople
-    LFWPairs
     LSUN
     MNIST
     Omniglot
     OxfordIIITPet
-    PCAM
-    PhotoTour
     Places365
-    RenderedSST2
+    PCAM
     QMNIST
-    SBDataset
-    SBU
+    RenderedSST2
     SEMEION
-    Sintel
+    SBU
     StanfordCars
     STL10
     SUN397
     SVHN
-    UCF101
     USPS
+
+Image detection or segmentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+    :toctree: generated/
+    :template: class_dataset.rst
+
+    CocoDetection
+    CelebA
+    Cityscapes
+    GTSRB
+    Kitti
+    OxfordIIITPet
+    SBDataset
     VOCSegmentation
     VOCDetection
     WIDERFace
 
+Optical Flow
+~~~~~~~~~~~~
+
+.. autosummary::
+    :toctree: generated/
+    :template: class_dataset.rst
+
+    FlyingChairs
+    FlyingThings3D
+    HD1K
+    KittiFlow
+    Sintel
+
+Image pairs
+~~~~~~~~~~~
+
+.. autosummary::
+    :toctree: generated/
+    :template: class_dataset.rst
+
+    LFWPairs
+    PhotoTour
+
+Image captioning
+~~~~~~~~~~~~~~~~
+
+.. autosummary::
+    :toctree: generated/
+    :template: class_dataset.rst
+
+    CocoCaptions
+
+Video classification
+~~~~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+    :toctree: generated/
+    :template: class_dataset.rst
+
+    HMDB51
+    Kinetics400
+    UCF101
+
+
 .. _base_classes_datasets:
 
 Base classes for custom datasets
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------
 
 .. autosummary::
     :toctree: generated/
 
@@ -89,6 +89,10 @@ You can construct a model with random weights by calling its constructor:
     vit_b_32 = models.vit_b_32()
     vit_l_16 = models.vit_l_16()
     vit_l_32 = models.vit_l_32()
+    convnext_tiny = models.convnext_tiny()
+    convnext_small = models.convnext_small()
+    convnext_base = models.convnext_base()
+    convnext_large = models.convnext_large()
 
 We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`.
 These can be constructed by passing ``pretrained=True``:
@@ -136,6 +140,10 @@ These can be constructed by passing ``pretrained=True``:
     vit_b_32 = models.vit_b_32(pretrained=True)
     vit_l_16 = models.vit_l_16(pretrained=True)
     vit_l_32 = models.vit_l_32(pretrained=True)
+    convnext_tiny = models.convnext_tiny(pretrained=True)
+    convnext_small = models.convnext_small(pretrained=True)
+    convnext_base = models.convnext_base(pretrained=True)
+    convnext_large = models.convnext_large(pretrained=True)
 
 Instancing a pre-trained model will download its weights to a cache directory.
 This directory can be set using the `TORCH_HOME` environment variable. See
@@ -248,7 +256,10 @@ vit_b_16                          81.072          95.318
 vit_b_32                          75.912          92.466
 vit_l_16                          79.662          94.638
 vit_l_32                          76.972          93.070
-convnext_tiny (prototype)         82.520          96.146
+convnext_tiny                     82.520          96.146
+convnext_small                    83.616          96.650
+convnext_base                     84.062          96.870
+convnext_large                    84.414          96.976
 ================================  =============   =============
 
 
@@ -464,6 +475,18 @@ VisionTransformer
     vit_l_16
     vit_l_32
 
+ConvNeXt
+--------
+
+.. autosummary::
+    :toctree: generated/
+    :template: function.rst
+
+    convnext_tiny
+    convnext_small
+    convnext_base
+    convnext_large
+
 Quantized Models
 ----------------
 
 
@@ -21,6 +21,7 @@ Operators
     clip_boxes_to_image
     deform_conv2d
     generalized_box_iou
+    generalized_box_iou_loss
     masks_to_boxes
     nms
     ps_roi_align
 
@@ -2,6 +2,7 @@
 dependencies = ["torch"]
 
 from torchvision.models.alexnet import alexnet
+from torchvision.models.convnext import convnext_tiny, convnext_small, convnext_base, convnext_large
 from torchvision.models.densenet import densenet121, densenet169, densenet201, densenet161
 from torchvision.models.efficientnet import (
     efficientnet_b0,
 
@@ -201,11 +201,12 @@ and `--batch_size 64`.
 ### ConvNeXt
 ```
 torchrun --nproc_per_node=8 train.py\ 
---model convnext_tiny --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \ 
+--model $MODEL --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \ 
 --lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \ 
 --label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.05 --norm-weight-decay 0.0 \
---train-crop-size 176 --model-ema --val-resize-size 236 --ra-sampler --ra-reps 4
+--train-crop-size 176 --model-ema --val-resize-size 232 --ra-sampler --ra-reps 4
 ```
+Here `$MODEL` is one of `convnext_tiny`, `convnext_small`, `convnext_base` and `convnext_large`. Note that each variant had its `--val-resize-size` optimized in a post-training step, see their `Weights` entry for their exact value.
 
 Note that the above command corresponds to training on a single node with 8 GPUs.
 For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
 
@@ -178,7 +178,7 @@ def load_data(traindir, valdir, args):
 
     print("Creating data loaders")
     if args.distributed:
-        if args.ra_sampler:
+        if hasattr(args, "ra_sampler") and args.ra_sampler:
             train_sampler = RASampler(dataset, shuffle=True, repetitions=args.ra_reps)
         else:
             train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
 
@@ -13,14 +13,16 @@
 
 
 try:
-    from torchvision.prototype import models as PM
+    from torchvision import prototype
 except ImportError:
-    PM = None
+    prototype = None
 
 
 def main(args):
-    if args.weights and PM is None:
+    if args.prototype and prototype is None:
         raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
+    if not args.prototype and args.weights:
+        raise ValueError("The weights parameter works only in prototype mode. Please pass the --prototype argument.")
     if args.output_dir:
         utils.mkdir(args.output_dir)
 
@@ -54,14 +56,14 @@ def main(args):
 
     print("Creating model", args.model)
     # when training quantized models, we always start from a pre-trained fp32 reference model
-    if not args.weights:
+    if not args.prototype:
         model = torchvision.models.quantization.__dict__[args.model](pretrained=True, quantize=args.test_only)
     else:
-        model = PM.quantization.__dict__[args.model](weights=args.weights, quantize=args.test_only)
+        model = prototype.models.quantization.__dict__[args.model](weights=args.weights, quantize=args.test_only)
     model.to(device)
 
     if not (args.test_only or args.post_training_quantize):
-        model.fuse_model()
+        model.fuse_model(is_qat=True)
         model.qconfig = torch.ao.quantization.get_default_qat_qconfig(args.backend)
         torch.ao.quantization.prepare_qat(model, inplace=True)
 
@@ -95,7 +97,7 @@ def main(args):
             ds, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True
         )
         model.eval()
-        model.fuse_model()
+        model.fuse_model(is_qat=False)
         model.qconfig = torch.ao.quantization.get_default_qconfig(args.backend)
         torch.ao.quantization.prepare(model, inplace=True)
         # Calibrate first
@@ -264,6 +266,12 @@ def get_args_parser(add_help=True):
     parser.add_argument("--clip-grad-norm", default=None, type=float, help="the maximum gradient norm (default None)")
 
     # Prototype models only
+    parser.add_argument(
+        "--prototype",
+        dest="prototype",
+        help="Use prototype model builders instead those from main area",
+        action="store_true",
+    )
     parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
 
     return parser
 
@@ -344,7 +344,7 @@ def store_model_weights(model, checkpoint_path, checkpoint_key="model", strict=T
 
         # Quantized Classification
         model = M.quantization.mobilenet_v3_large(pretrained=False, quantize=False)
-        model.fuse_model()
+        model.fuse_model(is_qat=True)
         model.qconfig = torch.ao.quantization.get_default_qat_qconfig('qnnpack')
         _ = torch.ao.quantization.prepare_qat(model, inplace=True)
         print(store_model_weights(model, './qat.pth'))
 
@@ -833,7 +833,7 @@ def test_quantized_classification_model(model_fn):
             model.train()
             model.qconfig = torch.ao.quantization.default_qat_qconfig
 
-        model.fuse_model()
+        model.fuse_model(is_qat=not eval_mode)
         if eval_mode:
             torch.ao.quantization.prepare(model, inplace=True)
         else:
 
@@ -124,7 +124,7 @@ def test_draw_boxes_vanilla():
     img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
     img_cp = img.clone()
     boxes_cp = boxes.clone()
-    result = utils.draw_bounding_boxes(img, boxes, fill=False, width=7)
+    result = utils.draw_bounding_boxes(img, boxes, fill=False, width=7, colors="white")
 
     path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_vanilla.png")
     if not os.path.exists(path):
@@ -149,7 +149,11 @@ def test_draw_invalid_boxes():
     img_tp = ((1, 1, 1), (1, 2, 3))
     img_wrong1 = torch.full((3, 5, 5), 255, dtype=torch.float)
     img_wrong2 = torch.full((1, 3, 5, 5), 255, dtype=torch.uint8)
+    img_correct = torch.zeros((3, 10, 10), dtype=torch.uint8)
     boxes = torch.tensor([[0, 0, 20, 20], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
+    labels_wrong = ["one", "two"]
+    colors_wrong = ["pink", "blue"]
+
     with pytest.raises(TypeError, match="Tensor expected"):
         utils.draw_bounding_boxes(img_tp, boxes)
     with pytest.raises(ValueError, match="Tensor uint8 expected"):
@@ -158,6 +162,10 @@ def test_draw_invalid_boxes():
         utils.draw_bounding_boxes(img_wrong2, boxes)
     with pytest.raises(ValueError, match="Only grayscale and RGB images are supported"):
         utils.draw_bounding_boxes(img_wrong2[0][:2], boxes)
+    with pytest.raises(ValueError, match="Number of boxes"):
+        utils.draw_bounding_boxes(img_correct, boxes, labels_wrong)
+    with pytest.raises(ValueError, match="Number of colors"):
+        utils.draw_bounding_boxes(img_correct, boxes, colors=colors_wrong)
 
 
 @pytest.mark.parametrize(