From befdb696fd9e5984d6fa311e470ac31e6c2ea5cc Mon Sep 17 00:00:00 2001 From: Joao Gomes Date: Wed, 18 May 2022 16:15:05 +0100 Subject: [PATCH 1/7] add swin_s and swin_b variants --- docs/source/models.rst | 6 + docs/source/models/swin_transformer.rst | 2 + references/classification/README.md | 8 +- .../expect/ModelTester.test_swin_b_expect.pkl | Bin 0 -> 939 bytes .../expect/ModelTester.test_swin_s_expect.pkl | Bin 0 -> 939 bytes torchvision/models/swin_transformer.py | 120 +++++++++++++++++- 6 files changed, 131 insertions(+), 5 deletions(-) create mode 100644 test/expect/ModelTester.test_swin_b_expect.pkl create mode 100644 test/expect/ModelTester.test_swin_s_expect.pkl diff --git a/docs/source/models.rst b/docs/source/models.rst index 91e0c4fa8cb..d7af2ebf935 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -101,6 +101,8 @@ You can construct a model with random weights by calling its constructor: convnext_base = models.convnext_base() convnext_large = models.convnext_large() swin_t = models.swin_t() + swin_s = models.swin_s() + swin_b = models.swin_b() We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`. @@ -227,6 +229,8 @@ convnext_small 83.616 96.650 convnext_base 84.062 96.870 convnext_large 84.414 96.976 swin_t 81.358 95.526 +swin_s +swin_b ================================ ============= ============= @@ -468,6 +472,8 @@ SwinTransformer :template: function.rst swin_t + swin_s + swin_b Quantized Models ---------------- diff --git a/docs/source/models/swin_transformer.rst b/docs/source/models/swin_transformer.rst index b8726d71d2a..2f67b0d5274 100644 --- a/docs/source/models/swin_transformer.rst +++ b/docs/source/models/swin_transformer.rst @@ -23,3 +23,5 @@ more details about this class. :template: function.rst swin_t + swin_s + swin_b diff --git a/references/classification/README.md b/references/classification/README.md index 9eb95fd00e9..da30159542b 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -228,14 +228,14 @@ and `--batch_size 64`. ### SwinTransformer ``` torchrun --nproc_per_node=8 train.py\ ---model swin_t --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0\ ---bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear\ ---lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8\ ---clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ra +--model $MODEL --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0 --bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear --lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ta_wide --model-ema --ra-sampler --ra-reps 4 --val-resize-size 224 ``` +Here `$MODEL` is one of `swin_t`, `swin_s` or `swin_b`. Note that `--val-resize-size` was optimized in a post-training step, see their `Weights` entry for the exact value. + + ### ShuffleNet V2 ``` torchrun --nproc_per_node=8 train.py \ diff --git a/test/expect/ModelTester.test_swin_b_expect.pkl b/test/expect/ModelTester.test_swin_b_expect.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ce3b91d96ca4b3c4c40c1f7ae65845accf14043c GIT binary patch literal 939 zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW zr zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5~nk0-@b2ax9*qHklB}#JbRx@4d*_Qb7FRGLWlP4t*`# zD5h?^kLynE6AC`FFSBX8-S(#9eP0sJ?|a?1)9&|*emkGfQu{6PTlewVF0cz&)@~P{ z%4^?pYl0mk-yypd5^46%ZaeoCC}`QI9b>jHc-*#cNr}MzSK`|Hm?jJFyS5?9URbom zzVD>t{=oli`xrzN_gb-Y?(5&qv;Sge@qUYE&ux^? zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5=VD($bQaVr~L~$!}h;s2;Kj3{aU*q{cgM6uY3336Mwz8p(t~=vXhn_N3!Dn z-i7=2-E&=P$A8*kzw&Q=`+xu?`)9X%Y+HCP+p>Jrwlj8`W_OgG!Cso@hHW3m+;CG8>h{y!|L&Wi6m36sPSbvd^LjQ9ODEf3d$4JrK SwinTransformer: + """ + Constructs a swin_small architecture from + `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows `_. + + Args: + weights (:class:`~torchvision.models.Swin_S_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.Swin_S_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.Swin_S_Weights + :members: + """ + weights = Swin_S_Weights.verify(weights) + + return _swin_transformer( + patch_size=4, + embed_dim=96, + depths=[2, 2, 18, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + stochastic_depth_prob=0.3, + weights=weights, + progress=progress, + **kwargs, + ) + + +def swin_b(*, weights: Optional[Swin_B_Weights] = None, progress: bool = True, **kwargs: Any) -> SwinTransformer: + """ + Constructs a swin_base architecture from + `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows `_. + + Args: + weights (:class:`~torchvision.models.Swin_B_Weights`, optional): The + pretrained weights to use. See + :class:`~torchvision.models.Swin_B_Weights` below for + more details, and possible values. By default, no pre-trained + weights are used. + progress (bool, optional): If True, displays a progress bar of the + download to stderr. Default is True. + **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer`` + base class. Please refer to the `source code + `_ + for more details about this class. + + .. autoclass:: torchvision.models.Swin_B_Weights + :members: + """ + weights = Swin_B_Weights.verify(weights) + + return _swin_transformer( + patch_size=4, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + stochastic_depth_prob=0.2, + weights=weights, + progress=progress, + **kwargs, + ) From 63e2c223f4d7a152b419b7c0f8e6850e7ae20c69 Mon Sep 17 00:00:00 2001 From: Joao Gomes Date: Wed, 18 May 2022 16:07:41 +0000 Subject: [PATCH 2/7] fix swin_b params --- torchvision/models/swin_transformer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py index 1fb0b9a73b3..cf9e27d5480 100644 --- a/torchvision/models/swin_transformer.py +++ b/torchvision/models/swin_transformer.py @@ -570,11 +570,11 @@ def swin_b(*, weights: Optional[Swin_B_Weights] = None, progress: bool = True, * return _swin_transformer( patch_size=4, - embed_dim=96, - depths=[2, 2, 6, 2], - num_heads=[3, 6, 12, 24], + embed_dim=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32], window_size=7, - stochastic_depth_prob=0.2, + stochastic_depth_prob=0.5, weights=weights, progress=progress, **kwargs, From d598e2e47ef17650a841a15a9f3e722019b7489e Mon Sep 17 00:00:00 2001 From: Joao Gomes Date: Wed, 18 May 2022 17:11:09 +0100 Subject: [PATCH 3/7] fix n parameters and acc numbers --- torchvision/models/swin_transformer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py index cf9e27d5480..fe01d5fd071 100644 --- a/torchvision/models/swin_transformer.py +++ b/torchvision/models/swin_transformer.py @@ -435,16 +435,16 @@ class Swin_S_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( url="https://download.pytorch.org/models/swin_s-30134662.pth", transforms=partial( - ImageClassification, crop_size=224, resize_size=238, interpolation=InterpolationMode.BICUBIC + ImageClassification, crop_size=224, resize_size=246, interpolation=InterpolationMode.BICUBIC ), meta={ **_COMMON_META, - "num_params": 28288354, + "num_params": 49606258, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer", "metrics": { - "acc@1": 81.358, - "acc@5": 95.526, + "acc@1": 83.196, + "acc@5": 96.360, }, "_docs": """These weights reproduce closely the results of the paper using its training recipe.""", }, @@ -460,12 +460,12 @@ class Swin_B_Weights(WeightsEnum): ), meta={ **_COMMON_META, - "num_params": 28288354, + "num_params": 87768224, "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer", "metrics": { - "acc@1": 81.358, - "acc@5": 95.526, + "acc@1": 83.582, + "acc@5": 96.640, }, "_docs": """These weights reproduce closely the results of the paper using its training recipe.""", }, From bfbc4f93a382ad6be0637bed741a4123651afd59 Mon Sep 17 00:00:00 2001 From: Joao Gomes Date: Wed, 18 May 2022 17:18:57 +0100 Subject: [PATCH 4/7] adding missing acc numbers --- docs/source/models.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/models.rst b/docs/source/models.rst index d7af2ebf935..cd235e45299 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -228,9 +228,9 @@ convnext_tiny 82.520 96.146 convnext_small 83.616 96.650 convnext_base 84.062 96.870 convnext_large 84.414 96.976 -swin_t 81.358 95.526 -swin_s -swin_b +swin_t 81.474 95.776 +swin_s 83.196 96.360 +swin_b 83.582 96.640 ================================ ============= ============= From d3c19aea279fd6f401100612098de3ebfe50ed93 Mon Sep 17 00:00:00 2001 From: Joao Gomes Date: Wed, 18 May 2022 17:33:50 +0100 Subject: [PATCH 5/7] apply ufmt --- torchvision/models/swin_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py index 8422d1e8abb..25593fafa15 100644 --- a/torchvision/models/swin_transformer.py +++ b/torchvision/models/swin_transformer.py @@ -422,7 +422,7 @@ class Swin_T_Weights(WeightsEnum): "min_size": (224, 224), "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer", "_metrics": { - "ImageNet-1K": { + "ImageNet-1K": { "acc@1": 81.474, "acc@5": 95.776, } From 47a6b61833d2bd1d7283a1c5019c61faf213e015 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 19 May 2022 09:14:45 +0100 Subject: [PATCH 6/7] Updating `_docs` to reflect training recipe --- torchvision/models/swin_transformer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/models/swin_transformer.py b/torchvision/models/swin_transformer.py index 25593fafa15..6e001c1d2dd 100644 --- a/torchvision/models/swin_transformer.py +++ b/torchvision/models/swin_transformer.py @@ -427,7 +427,7 @@ class Swin_T_Weights(WeightsEnum): "acc@5": 95.776, } }, - "_docs": """These weights reproduce closely the results of the paper using its training recipe.""", + "_docs": """These weights reproduce closely the results of the paper using a similar training recipe.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -450,7 +450,7 @@ class Swin_S_Weights(WeightsEnum): "acc@5": 96.360, } }, - "_docs": """These weights reproduce closely the results of the paper using its training recipe.""", + "_docs": """These weights reproduce closely the results of the paper using a similar training recipe.""", }, ) DEFAULT = IMAGENET1K_V1 @@ -473,7 +473,7 @@ class Swin_B_Weights(WeightsEnum): "acc@5": 96.640, } }, - "_docs": """These weights reproduce closely the results of the paper using its training recipe.""", + "_docs": """These weights reproduce closely the results of the paper using a similar training recipe.""", }, ) DEFAULT = IMAGENET1K_V1 From 8c65388d477df103beaa8fa541be8f5834c920f8 Mon Sep 17 00:00:00 2001 From: Joao Gomes Date: Thu, 19 May 2022 09:48:30 +0100 Subject: [PATCH 7/7] Fix exted for swin_b --- test/expect/ModelTester.test_swin_b_expect.pkl | Bin 939 -> 939 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/test/expect/ModelTester.test_swin_b_expect.pkl b/test/expect/ModelTester.test_swin_b_expect.pkl index ce3b91d96ca4b3c4c40c1f7ae65845accf14043c..d807ca3ed1588c47c932891b26b99835c3f86d94 100644 GIT binary patch delta 230 zcmVpyca6V1D5gD(aX>vR z_YpqWCn&#+QpY~R%Va+lQei(37+6t0?4_nYn~W7cIFDOB z#uLrIO~lkbq&G@FHy#APZ23VxhDB??LQ0apC|6OxAcWyP6tf6F{BJ2gFr0$ESo^EK giNBma@{zc{P)i30yYI)elMn*X1iSCYw3Fll$F>A$bN~PV delta 230 zcmV*|IW1GH2dK7}%0+SQI*05SX z6BTqnj>$p4QU3