From 9f43dbbf56ce86a21ef9b7f9ce5f677dd7ca5e08 Mon Sep 17 00:00:00 2001 From: lezcano Date: Thu, 25 Mar 2021 18:49:34 +0000 Subject: [PATCH 01/17] Parametrizaitons tutorial --- .../thumbnails/cropped/parametrizations.png | Bin 0 -> 35776 bytes index.rst | 8 + .../parametrizations_tutorial.py | 326 ++++++++++++++++++ intermediate_source/tensorboard_tutorial.rst | 2 +- 4 files changed, 335 insertions(+), 1 deletion(-) create mode 100644 _static/img/thumbnails/cropped/parametrizations.png create mode 100644 intermediate_source/parametrizations_tutorial.py diff --git a/_static/img/thumbnails/cropped/parametrizations.png b/_static/img/thumbnails/cropped/parametrizations.png new file mode 100644 index 0000000000000000000000000000000000000000..426a14d98f5f7fbbf695626658ad1946fe4ef63e GIT binary patch literal 35776 zcmeEtbcnTI)&B)>Oj7rou)-Lc&u~R(Owu^vdwR2NV4z(ry2B4GD=JNk!qU zZeYP_7sh9r#ZbtDAAG_UiA(Gz6Q1<_OD+;+>G=~*zEZu#3PU>WIv-4D-){6C*cXst=D#k!;j7i3u!AhP{V)= zWaN#4(zMfeQ^1d2G$Vv^m<%z5v0Ep)*e`E6k|fmZ|3V6V%9dFFH<0|l!2j~`KREpF zH2hZ`{#OkCR~-M>8~#6NjSFI|Ezj?_6M|`8ninipY=Ma{ovmwGs~5zaOvE5pOI}KY z@~s^sP$mSts7EXpABu_1I(=nKr%j>=3!mO7er#x*j;~kza3eNESMo9D=Pm>Oo9hvN z=b|(onfMub<`}{U4l4@lll|(YrXXVTpc~Rbt7Lh6Nz_J%>31YUEPozl@qb>VqArNV zto$Ym#N0-rm3ad3mwk*O^wW)gy~0!%Lug$b8Rc}fKWX^<_)!Yp@5A!jJOrUAbu}ST z6?&{PkuOQ`e4_S@_`qrYEU>?EI-Wss;r#w}m+PqO7CJhASat`x1 z@Tbayc-Vtw$~TUy_n|w%WP_35E>ch?=L}oGg~t))L5s zs22%Q&i&9DY5FyXE+;RA)}ckhR5#4PA5C74NgJ>c-Qw|xWOxUTUlm1m^MDDydC3)B zre-9-()+;H*U!)D_}{O?(^K^?d15*bGEww(VCEVYd~c#u249o~ZFzC*%{b6|7+1Fg5wk)-R6mMA6@wo}(QHWjf#f7j2Y zg466!?-2RW_e13Y+`}FWSi!-i*8cvSzp*CQn%Gzk;mu? z*pw|(guP<;E_KR3ZgMdH?(cy6D*zGb_keaX=4{8!2s(fIcvTPMe47IMDW)DJ*`R9= zGh*%dMhbCalH7JxxL_)G;Y!ku>(JeID+|EY9mRq?>bZt24*zSue_c$M&r-3ZsXGe-)nav&lrUJdijq|_SV~>I$yX#-FuIB_h$i+`6!fjz zM+fKg=?8xN_<>Q|ea~_K@PH9x>%1J%o;&xF7@O{Vw0N3(o`-szsy-j(MJk@G89Mu zpu@kS9UVSh*c*UA4m@PxGpJizz=~kxQdIJG9V_vs^VvY**vf--6)uv-K1dxI-XscDZr{r=5(;D_I$;qc56T1 zW7sB5+Mbc*SD;r}MY_JV6Q~Cd{&6Sx^5qNO`r0scc#+~+4u|%ao{Rq^5OI-4u-rlz z%kS&P$iny9&?YfX;C>72C+a!hgG8|5NEvej!;bohn)YgZf?*U|^o_hb$bf%&aFnph zn{w{EhG3*(fO(^eUw~6V346_L-UF~&SWEgQ^C=(P<+&xtC3(tM5DIA%yW7q1nu)~7 zrDkW#`UK*E3l~sl0Y7|KA5C>PFb+HA4od&0K0*3+LvI3lcy^{}Vv=8lX{``Xy}qxZ z4ci=)f8|1gnbA^k%ph!UXAEWUC(adY?ou_V0)V95;p-K`M3pp}E8!*^{l* z#MUyPe{PcF+bXS-D%qT6j`cAVoG@yE|***CiCpLDFdqy@_BcqyS0#O8f{tp-F zpdn+VFh|8}PW7;>aT!%Bg3e$FpO;<_^kbbw^iB5f_qaD@8w-)`izx3}75a1)nm*}1 z5c>O;Ir(alyh(OG#Q8bSh$;-;Zt>4NUNHWVLUfsIf$hwqrFt>gb?si}lIKKBFxfP! zH8GG7VQ*cv3@a=tAOBphM52v*+r5)2S@-eAc`a=947B@2&9g7oV`NR-M}RX4Cqh&- zPxOM@Bxsz7`{Te+fpF2>@X6dMcp#MJ`PrUdH)XMT-p>OD9d-kS*3-j3hAR8#5DtCm z2#>?pX@|4WzcwsK3da6;gVZHto*99Cd=BXZ+)jz=Son1@zq`xYeP4G_GNc(78b4Pf zmR0wut~T-GX)9*m<<^MZt(T*IRaoG8CI1Q|fH_+zp0ba$oMe&vfg4w~B{xB0Uh)Ku zHDH%zC4~ETHsnpfrq$~L2wHS21}?ttv{o+<&-fXNO|Ro}8THYyiTmJ=3*AU!oWZ6I z;E5tt5&>fH0{Zu{-dY|_H70$KpYk(c(%d~a27#}($IQsFOBlBT_#*qUoLL+45_}l2 zN3`S^1_$50ATHh-px3^y>cvsxj?(==YB`jme}G*`8vKzUY&<>Q_sT?9{-w%wQ zQLnYDOCiihPX&K=;;l>Zd-Pwi&|-(&m!E4%QAQfPOIdZY9xTJCOXc98E>_xGc8EBW zJ}i_#d&1zBbJ`F2_sDZ?L}wiVqe!T`%5!ym^=1Bh8IDl0Hjaz_`>lQ)-oc4nK|vx^ zQRN_~ep!wQ3UG_f2tG0Pqa@0KxRZ~ZT{UYW=CG-kfP;D}u}O-%YWHx@MQCVODCpvQ zex2VqS$-yE@t(qFx91dm)dOhizx-{fGbkxx_MOhZiNe`qkAb%*OYJ`vt>%C0{rse- ztD5zeN5#DLO`6neC0ak-%TpmQei4W~XF$b(5&Ps9z1q@NR`xkQm;EXRC}|mHnf-5$ zy-95o4-17C7?f!I?*CdT2|??=BDI{?NVsX8i?b*{;;6f=6w|m!8~8osqG!+hDSU{P zFmzL1c2O#`yrL?4aS)~}$J_tlA^SM@j1CDW`ROL-G=}TTB~2E1a2G!%1K6l&Ln?1~ zR7rqUR{|tk^9g)C&RTGLfQTL7aQz6KOHEPdUY}Z0RNl;`w1QO(?Yd;u0@3>bVntHP zwK?pTpmBStW*EEF`6f!X;vbd5Vvouuqu``Y=J78?F9+ly9!&<)0hI~48@Cv4`t`wU zTZQ#Z&`m#8Y#Wot|Io_FFL+UI_-T=i(PT(c_@VXbAc*B-_~Q;Be>|(dwR$bv=(Uq# z@YBtr*YU3(+ezUUioU-7OU+Xz#WQlccv{tzuUjD2 z$wKwO<=(j0!&&RjvO}?Av%YXEJ&vn{mzA}6qN{*?Ch_ufmRU(R$jcwPI;po`I4!Hg|WTj#<{C2t8u`IkxpuU2PeMfg1O@Ft>5w45f(Jk)sn5aX~K1WZ~Tw9v@4efn`D%7?`%Jc_1<8M>q2Q-g8;fRCVzd7A8Nxa=+I4x5c7LuaG z+>Rs`JrC|E5EpZ%ZPWsKDJp-%6SW%aU!5tdY8eLQ0y^AF3 z@vbd6&S;t9(EJjH7dg*X@s4YJ{04c7l&?_2jfw}f`HIGO=B;6!B{Jd+jKDK#b#ETq zuaW(=s??4n>b@3lfw;^OyZ>44b!ZjGvXH|!bBy_o%+G~ zK?Sm^SEbav9KnuB&TQ+=G#3E~y1?527+R5qz_?N2vQ}IbD#&--jNRPFu<>O0JPamwdTmnH9PXNj6Kua--A}_QazHuc4ME_5E;9^_rL+SATG7TNFAf^; z@2)nZ3AQ6K=#;l>HcLrxYe;xMIf#*$OcgHWI5qzv+{xd6lfCptRfh9ByuahgebdC^ z-Q)S0z6W+89bwFQIYp#}+^@CNC7bW`>&h}^K{!NVmxA#d!R*^A89IrKncHkw7C(L_ zW`qTj=(+~&#K|1%ByYUum21a$pov(CJ+Lw{p_-tLi*NOKOXDVDV5_xR`p}s{#2bb) zmR_vPb&^${C)1(Y?fI*6(A|sJ>a`)z?OrJ4c&@nLF2~IN+1?1FLZ1wqKM|!J%L05Y zK^JoP)4&eo&}>htCFn=t6!Pk)W(hy!O+#d(J{y?ZqM zF#0n+(vI)b6UJ3}YO4ISYeLYla)B9D>7)7fQ#?2)+mi*SZk}bl>=cF9)#5N^$ z!bNOY(++y5GHTc~(O|WwXXK_#C)l9l>1OkX_ls53dm}tY>O>E^=3&F@yMm83EaCo( z0(95g=fy6U_a#@eyU<{@$?hMC?*=KOoB-XVG`*#_(Q z@V!(A*DgxP`xTpka==#~p~Lfa@zET{1dN*SFQ19OZ;)u9cO;@NxLezOKbHA8K&ayo z<9|&LI^)^2sCh-#(Ff^y9~}HzQZI!t=T*#}Ob4sg83%~Ej03SY)My@rmv};&4sAlOHQ%w2OD9T7Le1l#teADJN$Dv{K zYr{LFl{9IhKApI7XXX#mx=U;;EccjJgbXkWWU}NFKQ)`4-gKtMG0)9p3@+Rx0@LIV zn&Ly)$t$w?=%6>%NkiWFE{iqIA`&EqqxPMy%oC$?vusSalH)Ql&7SJD(fNlaq+WKs zO4KFg6?p%OY69a7FWwJ-P}jGoy$Jc|BHY2s7Jmh2fTAsSq{ER``EGpSOyLv7R0R4= z0kG5Mul%9tFRa%Y*gZH*!pXrXQzYR&_8R2sy5y!pab$1ix&?!{yN2A^N!?t}88&$` zXo?{MJ8A{hQbjEz`6F{R&(#r@)5^x4)oFVzXNM=y@%uo{e340G(+kiK^}?vB)Tur> z&J?v~G%R#p<-GKSRWYJ2$UVKN%z57R>0vz&e!hrX1Hk;v!i*1@L&eP!g}k`K6pebm z1m0QUSxY!=VIijXv}hWzO8mscoAK1c{J1Fj6M5`_9g&DxiCBQcy=jngR&@gSy+7x9 zr%_JghPfn*Ig0s!*je!yT`^?CIto8Lz>r>@0*@=tdwI%w@pJDA$}w|G6;@S>w=Ck2C&h+C&;Jn3^;>5~TtWfi7BuSVfby)$#QfQ&sVbM)ljDioQs9NsF*I zx{<(x_kL#uZeIuoZpGdh!Y3p`5Ld+`39Ln8Kg}{bhFi4HUt2Z*N5Xgx;Q8aTHLP7U zt`orv?bOAVCDZ*T_(u$JnA+EJWvxfZ@d}F9a}U>jwo%Dru~yC80)tTWYmMC&ckjrH zrUyykzb?%8#P>th8I+0oV`q^uXd=o}rC!Hzn>m+x9{*#j!lw z$&;lGaUEz>>C&*ac$^GV=YxAcXvx@(jC@8d3fSShNJm98uO9&LWgzX{$L?`WHm)U3Ys zQ`;ks9A4Ueh!mI;j*b}0LC*MY_>ezijoLX_K^;4)y-CoTO-VN#P-aMx7A;3_^27J9=2aNsm6?NdwAMXw!#_at_1JjZJ zLk=VZ5qAd~Y~Ek&t6Po3@uWkgS(G15*-&C*bBx0R@x|JnA?@I;DM_eM>n-fM=tuJ) ztt1rBJwLaZL^Q_iwb<+j$HJifTzjJLGrnjcS2%fFeDDzMh9$lV>m0_5StUTTi(EZ? zJBgQNp~<(WAZup6e%v%O>XC@Fw^aYat9JEAIm-)E2Sxz0&uP!3pt~H) zMcUN2GCKqAo#^v-S2M6#E&8HH;QdjvuvK;XIiN)sjUX?fb!b9dJ{+W2#oSSxfXbpT%+7o%s-($sfW zQQqH4fuNN^B#z%c{P~4jFb)sjH?Dh|54mD%_(BTp*bQAifZJM5S&R=)r_IYk?-L37 z)uL7_pY){aVOVQkBeWpI{mG=a4bKl_(n7bw!-9VaxjyzQ@-oV%Llh*npG=(y5zl)$ z7Q^>9CmMB+mj*q9SR=&|I6Q&WQ!)oM`D++ZaFGeN*<-;_I`T_dBCitTsn%{P(4m-Z z7>54rc@NlT#D5 z&jPV$v7IteWNMeh_}bPZZra zkphyqt?f^TBYYY`vzPvt>_0X$Gc@};?|d?b^!|OcsL!CNvfrLqe{LGC4hq40kwkd7 zdag99OxymB^??|Nn>sI6N^|pxyzD8%pHdbtE{Aif-#+;*n#EmB0|DecaO;Awp=&U0^GR6B<|m8ZsKT#f2U?1j zS9XAVi4(^Yt1Q@2e7XB=5*boXD0<-H$fj0acH+;98okfjjq~XF7f?bxv0?!#qWx*B_l$+9^>`8;@-J_G2@$)P zO#CNZiOvZ^kIjW2@9IDC5DA64LYqdDI6d(r3O2taH%F>Bb&W1j%W?c(SAeEQdI{0Q zx4N#_?y;K+e{3|pzWxf5+DR5XeO5Fr=aP3$9$m9keob8VZ%o;7Z);Ft>+VI*SRNcX zk+-rsU=25miri5RqjXJoYUL+^H7y^XP8bz~VJ6)9T9p<^z^qGL~^~c%vuO_|-wMew@Zz1xNGLtN2!wRpgF}JGANe>G}t- z*=zqYEu02$e;zk!RtVS`oJ#L?R&>p(CcLY;O^SQJL;ywX{d_tG&eR`eF$LBOkjAUU zKXD%t$YmZg--;l0MOyYSp98&^&!^HnD;hx6OASdOzJq7WiL+Z!D=z`C%WVNCSeC%< zYbGgMIl;UhyxeylPQ<^@fjY*V7glLso;WVK`Jw{vkZtxLsQbPU>&Wao5~u5U;NAEG z&Vo0)X;i3K%^VT|!cw^2>ATgs1%?Hy1bCcQI5lTSqYb!b^fhRsyynS*Q&K1a;LujL z5hJYIWtg{Q%h{4Wiuex4#78Ul!gh^h_b3h_x1NN`R8D_m3b8wH0R>G|uZFD(@6-Ab z`+B-y8y(8j`q!rRU05d~%b>6AFPtC%{EUE4%E`G9)Jk(p2jw&;9E%C5`E*we)djZU zHdow74uP}URrjeX^pb0Y+Do0-AC^XB?BOWzgp7(5!Q|#Oy!83NZnKhr4+h_X|1{uh zKGR#377tX2C-C!ncffG+5zOd-Wqc%%nHlXblpW6%2#oak`(yn0e1HHz;*iO!_IbU0 z6l=UHNvp;z34l0My1l6lcT0M0Y}9r*x|S?nx)2!J*o)vBTSKJS_#sTafQ6pBPt#vJ zDr>t}xKhI}+kkx?;461xf1`v17NOLtJ1%P>Lsuu#uDkvGN#|i(s&+Sa*y-(G+_^__ zZ~g1x1M%W-rwGU(SK7|Y0M{$peW&5N-ZIG*@3a4cj1@elI%C2Inco*oKG^8ye+Q;b zo@x52!=gvEWHSGi^xF>VU`T}vI@9{$F!Dk-V;a$J?tHdFk25fNyGg~)=&p|y_^2bo95ku0>!_|G*tnH+{owWMN zgO&pk6n3<+KpV2YP?M7Gt(5>x}Cou>j(hN$k^UK;X-wf13@CuabT zGxre8WBA(h+XR*{)OqyyATYS7IcG%w^ujWU!Y1Ca%ev%+o!_yX94*{VeA`=>FaQv8 zI^&sql^2&^@h-2EIpv^`@$P%Rdw*;94Y$dekM}my;!;6QMpg%bpx$pn;E>43uZ z7$t_@)$b&kA6j>NgGv;C(PYi)J2l?sK|`Uz`kgQwi`%Fy%zE`X-K8{&*-h&L5(A5e zmBsVo=ey$E6K>w9K>R=t+i6#G6ZFw1S|Qut7mXwaDeg1F54~0XPNCe1jK-DSPiG$a zNQ%b|0gQjJvX_j0{sp&r7@3K#uti7W4tMf-O|D((Gwdn=dg6d2E2gMAmHjeTtvwX@r9w+dFMe0n$OV6+H3+%H ze&DHhQMPQ3*#1FspPjyFP?Jaho2!TP;%u?>l$H1$Eom@~T637Lt5oGzQ2%u-+tbB! z^hyUX5#;0krRJmot?}-+5uZ}uh%AmiP3d%PIrm|I<_?eCG}?3-?5c8xE)J7YDjPL* zxdt^ojXuwLq4l2Ipfqek1(WBV(D{t{omI+%x8VR*&FrUFi6|l%ZNp1A1~CndDF&dw?V!lz;!VrP;dA!Q;1HQPmGN%L zE|=V*9_Kv{)VD#Og^`@-`J+R`M?aua1lOw#*hPy3yES>%Fq@;a>x97?i*qZ7+xlNs zx`;G2v~>Vf&EFfidoSAhba-wz`A`HNo}3Bx?N;SWK_iO?CHBmWr`{hy0=jeD8QTXb zYJFcuK0~R08x4(Dr)^>-5Zc>OtuX8DL^$VkEV1C)r*U&1{yOy_948t3%=pOjAkwEil&1GK zOJ1%>)kNK^SfC-tTwm>|ce-NXJlK3qSTJ5NGE*gm+uG*26(+X(Sze&8t0RdnV5*=a z4X4s0WQTe`szU9{!BE2cMe6y&s}Oa{p)%&N-f7e~(|%!IyK6;v(0y>*=8p8;PGxv< zG=Vq;^y6a3r_&#fsi^umY?@;?6gt0>MR?y`4|c_eJz21MgbpP(!hIkKm5xa(hEbs< zq=UP;y>>a}_Yh7(t-M%oX}R4vdm_Qgj|H|k#<5#O*<32Q-!U1YiBD+b2_&$y)PN|@ zdw#uzq60f8`hiXE`kB2qJ6k?5XVSApv$S*24iS&3vr*bkialrlg*JXfGz}|0zV(#A zmM2Q%!GT%QpZ6W&6oNL%xgUCNhj_WrV^=*<=rBx%;cMZ~p(6DkEX50CIR!9<+075_ zG6s+5E6=an{n&CaOlSfpOq)Mk7y7R_9mVB0(JbTTEg(_ZfOuD1+y~Oauiu1RnScd! z0S^VhiaD-;NDA#w044MebCf35!GP0U5q_Y^?J%4)mb)fF^npVS-kNz7A;%P_IaKjn4cD7594%la<^g6EyKJjTGfQ|n=)r^V^#@qT0q zrF{kFwAhhB_~p&r)&9=;8+_3dz_g!Iu(O6b4%Jn6#n4qzFr@E4BhIVD2S}>lwN}X%Yz&A>mJP1_h410!}Shk@j6@9Bbui>xX+lyn=vX$ByDgZ|5I@wsox>k6%jeoKdAbIqSA*GGQjxeqDn#eb|fHsen(>Q5x$J z%8RKQHI(KHiT!x4G&H&QlPYg|un(TwoXz1Af@Dd4{aCC_8NvEPZDRb^C=K(Ssw|%E zXik(%4R$`+&c&_owNG_qXRsqosnaV6w~N51J41@fj%N2q(_x9wFlaCt&H(*0vYM@N z2Ak$FaF!MX?thw~x`Va$(2i&GG?F0n$tXt@J$hyo(044|+xN)U+CtjelEs5Qh=GDf z*U3Xt7I$83Ud<=4#cw^szkU=-*miWtS6;aihySvzIj*clh)P|Z3VxMzaEDP!BKU+{ zPKU@Xl5u!INJ(Toa*LTY46ELq2hz|!!IPrC!hm(BB%Ew}OQ%GAG5+3zUBL4Npg!cz zn^>a;VZ8B8jgtU6=Op;us0Bc*s3~#8#I@$ZU_YI^ei=xsF0j<#mNXuX}{u0 zirlewn4`|G+Lnd}gaL`A_b2m#p3g0Hd<0b%7$ZSmk0mCb7hGAo)$^nXdm*S&#o^ch zncyZmUg=Z5i9$(&IR|yXm!qQ+wukdLVDboE5Nc1`ek`Yq*-fmUA7MI*xgp+ z3)nDS{bW#gGv<(S;kfxxkhw6jL)pykPK=R`la*xH!tP+CdKk{=Gd9$+Tg_j-#W(?0 zyGxcCHsxp8%UU!aP$zp*M@uc%|7YKi%nDv^v^VhwOM-7S^KUP+hxNrccdsc0PY*uJ z6AZEcC~{uFX-Cc_QmQnPfehP)U*w(NbmV|jUa;B0LWM7LYZ+|c*y{jZJqeC^P)=Io ztnGG&aIi@GYCy(NMTbaxuB4%%X~MF=vww43eBln~xnhvp9nXKWN?QeuQ~fOv_lZIv z@55B58fUl$UGO!(Poa+K#VxzLX;ea^r`y?vJF}uAC1Mi;=y=nNFLmC#kNn7ONeT}t zyqXXgY`}FW4iYH}Mu=Aiy-Qc(WUNkY(!Yc*5O!yO!p@A#Ht_^?NP1lO)ktDXEYR(M zdawR?MN9+JEF=&C0=Pi*EZpZ*C`bg$QPC=%d7n}%sJAaUef_* z!1i4d-%%k{3EW^)rLWI^tTE-ehRzF{YeC2A`#G-ww;UpUtgvKTk8L4$E9_kbqpSrB z);%s_K84Ze#ViS6r&j!QgbBO2IfCeHbbY&j?lGkO}+kx z_G0nj$faShxM?!E#}yp(iNgKe-VO2N1R?uR&mP*j+2c7gl#a%fwwwd|M*96?_Acjo5+tEVYLQNl?j{%?-_e) zr67xf+ zpB`vC|o)WY|`XR-&VK!9BN7u14PAPsgR91l2sg9-%P7;^y~czO(J-srI3G8Pt?A9z>5T+_bmlLf+p(y%3GT1Y0#fPR5mgcS{4Yw$7qkL@we+9qQY zeH%{FFUdP=>$~U%5>aF-?YM0b_xFoE?u%dGIx?=22MG&v^OXR8&gv8^f8%YgIX&Ex zJh)^jfaq9D8hs7!SoUsQHe9>;`(*a_)5KE^AZp*8hrHiHvBqtSxd zo&kpB&Hl$mY{hq)k}dDMQd^AC%$sos{jLp;f96P`POOFAdTs7G{H33~@Z+*c(9n-Z zC&jZXKwWpFi7yZ<4MulrA;%C{NU%P|_hw`~dKLqH!;N;ImV`<*+UI^lY7?tc&Bu#+ ztZ^s~Uf6&_$LPDq1D#MVZSyMuMsAe`qWr;55>+^d-$33k;|i>#^lz(;5Ngx-y>|vV zH*wXkn1B6WT3u(+1}z=X;m?!cN4$-T)Bsy3J2phVD-V=NIX&e1w%wKzqU-+>bFKK*7z?fwq$pY#fIXX!%b;~o3AQ1Uhn%}3u4h*`iBkqn%*SZz>m@CiSo zg~!R23Rd#jd!gaQluu_~&p7_m$M2xQ2f}<)=BRiN2!{bZ??`jYDfE8Wj4#rjbYKTqn)>Wao;Ju9n!gK|6{C{tx^W;J~~@%#~S_Izj33Ezg>hfw>mI9#=M-K{R>nSvXsNit1=?_yE))uf1NO^BqP*y^0ByXLW}JR7b# zSQ@tJPR?)(e6c06i(I`Z7Q8>{ZFXTlf~>>SM%HsRxoDf0aUukHk=n`nlR9*M-}IIL zu@+AN0dAV?(2K%p0*_~#a;I;ZEi&wdJZpfvF_?m-6pjVF0!Qd!5CDoNfoaHQL=q#A zXYbWAt!Z}ISO*4>*Ri)o4{z6w z@Y>woiS-{%3WBQ{I~ghB;swE9w-b%x@yWrg=x-wDU*X#z_7}IL;UtoS%hWdLt#NjZ zwxt4CcBuffM#91%^zB@Xi23;AZ&dB#MKNxe&dAPTF@LBnjyD=i|2A!%*X`&4_b2{Q zEvB?DH!?QC;T1!=ksv4~kIAOqL!7HiCx70rCzZ!O*;5kM&6C=WjaQ0hp){g1?fYHH z^M*~&o1lVWP|{;WKraHr(3RMmTXhXARzLIpodJ=?1KZ!kJv`qdhaGEjSEzNW=_Do&P&fV)3Ro-iYa)ss$5 zdoR`KDjbHCyNNvLE3xO!tBZQzBZa+Yp%o*6_(|SF;r3TF(W7nKzfnGiTdgfdG49vZeP97=Dv6td| zoF^L%Z1F-lA2d!H`rXRU*EPq{F=9nppKBX=%dPZA;$Cb}Kat^`s_ft4;*R!17jT$2{_54_gPTB|~lDRG=FcNfdnJOe+h{dY%6};m?dX&kQ4Lm{fXnaV@ z@an!v&i095ZGu{)k~_){drtq3d%idlSHJ~JekzPr%&`TIYwHhO znh$aNtB2!o?L__t(jR%Ub2r zPXcogw+fonrK4p_BZjK0)&KJXCQCY| zwAU(E>A_s$h+h5PtBhUoG1nv#JR$_$NtMPobJFUcwcO=4Mk&jwnH>QHNx7^PSp2#w zps71VA|{_|u$=FqUin25yXubEuq@;j3HhG%1#TF|5FvzAc+*4&f+D+q+)1$oz%L)| zNOz6OzUvRLv%YBIa;ElsvMyvEbxSd#whbGU%fT;>Bd(;-zs}?$h&4bK^wg$h?8Ss8GRRuiB&~*t3$f6&n@exQg8U;wQ%F*)U66j$o_?C+a4J-XJhb;$5g zjGC{yg{MkzlR;3WFRss^M>!Nz| z=^;eK9kNmH@R0Q!0D*@vPL`bEwgQ49I$GNd^Mrluvo~kCTC(6s<0iPLwhSG0S`3l% ziL}zapEP8+O-tbeLWAwuJH3BTa~1CMT6sw|>wr0TlqY)uV9F*RPK zxYiWqh6?Yp_&Ur7-sk?P9-1xsk|tiZxfL_*XCe}9@UWQBmZa(b|)eYTv*p0^9HY_G5-nni|e!ftivIgK5Q)jG+(P|%s)d*82fds|gWpah_ zZOH)({{R|p-9|f>d11Oi*b2qRKa76rl_(2fzW9cw#!7yU^P=fGPdEyW13nsv!ASiRq2rME3SN6MQV=ZBYfQ-~O8S?Q`c( zuJn5&D_$JC)EW~sbvuJgHk}UWDtax0&<(&p50&Tv#qTwgH+)9UD>xOTpV?uMismNC+~VsRR9B&e+c zy7gBKnTyq#UL>>GMS{)-@wU6Drgrg3C^F~syPsa>rgem0WyVnrv4jJO6>W!p3G-C- zr^)~0bi+KDKpPXBLjpV2Nl&S9-oV`7lE5PU2BT-xKJk!^QI$*n@|vOirSf%kJLdjx z60cEZW^Kq#&Z@ zuPv2?Rn-_#7~4|`vXWO!hK%F|`5fd{ewHiV76s%Co{YG>cJsG!YyiKZRm4UC7@d-P48Zvod zSnFf}b7F#b)bEpd=gy`MBYm9jS`q)fyMb-&`q8JdOU$XLz>exFvQR2Es5${XDI>dU zl8a~upVtfJZ!Fy%Nec7eoPC`N-Q(F5#er@{F+w4}avjq}50zG>3T@!;BR|{QS+2%E zTLND%s~usenpLL)S}ybB)D|s~q4Q&UWaIs(%n+0$@%EWAA-_c&ujHC(tQPR5N#pC?yT6ZxS?d7vyE;3L{ptBB>(0Ew z57_|F9=z4Cek`QX$sjCZL_R5@^4MHA;>`0QvsDw~miSat=eMfgsQ$i6T@&SYJwV5@ zF3pTAK#1<#GS-|uFsQbjOaFjnS9qs+H!6ZL)ZmW^VPSkgT`hof4Dfk1yU|={iG*?! zs%n1?zuCGNp=_z2q-X+Mp9~Y2$F*#(lJUwrCVvGYve|a?(Q*Q45V`-H)6!I&vP8l|M2|q z-4FX^$F6(FeeXK2>w2H>^K}|TnD}r&6~8vi9M<8KnqH65-Yy018QmRk(9-t%vRwJq z4Wp&StYOG0SZOaBJ=Zr z2HNQtJLUP9$*+*<<>3_Ny((I^9fqE#h6bx|WO$B`@g@-VW+9ntW2%Y_cS7lwOEz>q zM*NN{cY^J_>kfPhrCXj0K9!w!{*M0V6-b);Z3SiLiK1sib0M36B~x)|?@cvcwB>-`V0^UfGHncl0*cO&Pru9*F~`EJi1x zZJY%g@?t@?!ViU?oK9ygUWz?*nyErMMv8)74WbN5>gXlng`QI4o49{-Lfw|aC2k}; z<5!#z*N2qT)mOKVzsVl^ysk5Y5YByqG0W_(&c4L6ijN&PoOW?FPj|ZCeGXT>qux;K zWT6u${;^cRv;T_+ROw?&b*O>0rrbB#5Z@03*fb|83bdsOexAyVN48jz<)t`F-MBt~Q@(*>W|0KG5Zm7kXX!w3Ug=%s?#Y ztJF;cddxW~mjK`NPMDWf#FBbgr1u4P%qnK#*dM|ppXH(1T>SG8n-KJAd(P=Q`!K3#?7ASVUI%d3gtHK6rW$DxXPp0>H#vL1 zAi;??HMY1*U0$k@ITL#5ax}U4g9#8P#4>I z8{70ZhVhW(@Djk2jf59v6lHb|UcWivR$X`8EK>NwJU%z(Mp2gZ9=goYnDiAW${Iyh z%pBXvjpOz?rZYVfrk7)Zuk5Jb3D!Yos%+j6Hmt)+{h7G%am~1>76!_4A&f($NL6Rp z(aQ_iYxz9Wn&}ckbhI`Am+wsxUz5|+aUF|rh(#q1 zN&mvt*c_lx)13A4mmije@{oxxr@1t;uyuL+F#eL0+DfUyx8O8Hvbgms=84^?3WIN9xA0u`MELv7FByCnV4mE}R~7 z7$jFJmjI#trPpzFJc8b(gUlvhHl7hulLS@Nn0ELa(8EJ&vJa=^n0PR!*4V7*ntOw3 z;zAKJZh2Ixw+Y(Mnm*HN3}%9M?Zp|q&C~ZJzO>2i?=;-mi9Dy3$$$PTbc)ZmuVE>h zf>I-4>6eJz;Na5g3&tpqitbc?LUk8FfC>ouZ z#z*B^iyrsexRoX7+*Xq?(+dl*r2%7;*d0F2=E$xk(A~>?wUEs-_moNo7BjbA_}WV~ z*+fHG=Z6I2h1VOin+)$sekq2-w6J~&8#mGiW0@n(A? zF>a}RcEM%lhf~w?iz@%m#~~N=hCBLVvIvt-w8;K+_~P)y=W>S7>Ped=DSSv8r?5J% zs4w9Kjo}ZaR?!BNx|b>owb`&nA*M7Dx2&1jZ?0iOOEam$=Vl_|-;)tjFm5oq+4+N0 z?AIP?I~+4Y@KM|#qg3?gP^Cju_Lv5Lm@;_@9kUdd*Y8_Djb`SG>w5MEq|fVEe)!#Q zK6~B{HTe*}bi3A5D}0QrQXKCnv@}%ycN`&l9Ol0E`oTjEVS6XkhxgZk4+9^x$Akzo zBoAM#kyn@)<)(HWtr)c5V?eBy>Xij8uaxyAKOCtMFSXQk^_YxPHmXL;qTO62RaAxd z>}oOJ_FLa2(|A~u5`LIpLKa}+DnHLK9n5Fq#ayL-J)9~~z!5BhBGHJ}s~XQY}jx(fM+ z`vp8k+sD=l?wj|T+Se;)iLN-mDVJmqR9J3fNff$@Wq=DBb#>a<^~8=~sUiudUK(A8 zKNaNX*%>~n{b!x>s_8(x(35)?c0V}d(}x}B(P1Fcoq!+aW)b~NRpwG zBE~FY#^}a8Dk!{NDJuY28&U`#&>Iyq0M`ZuWbvi&I2Tqce!P41OuK`Fcg^iW?QKU{&Xd0;yjw4lFYzY6{|NgA z8*gBviCUfIDcbLI>r&C>&L-{m+i;P!*eHhxdx~*BoTjqo1m!x{N-%qw6Iq7-3$*sO3b?Mfq&;S<@1|E{nD;W@tq$LDW*_nghK$u& zdY`;Kn0!%C*?J}650y0n=?M<$;UM(=y^|MgWP_PesRm&q9q`OhS>sTDrx-NUDI@5k zrC67ibil3Clad#<;O6cm8On*tvZ5O5%i-T#;0TWxLvXMw>wkIcDjJ`#x_&~o#q{Uw zNcz`zn&@bty``SC=Cnq%lp|$+^x_!z9FMMrncU@4y~ww|du`tO@&xR{ic?apDDlxD2k8EdrRR?Ah{{re1MbC)Z<8`6iO9(b{3`n`4T5q3=f4WZ0W6 zWj`(7!{f;ZfckrI+)&}*Z<2x$u#+QBHf?A#+f9AT+?(A@(&Krg?|lv1AUC?Unj;>x zi||sY+6Uz)*axFH${%XNjt-&`Yp*On^_ggy-T|1|a;1AeCmq+1)1GG2W$Sb9Nzu~X z1s3G-S}3?n$(7KdCrvLBO0-L+Zgi#Yy9|;wUPMdoWga)9UeN$tPWV=amk&n(F35o= z1s%?B#l$lhfLO+JN41sK0HD~dP|SW%-OqiCY)p?uGG17Ul&dtHKC zb^A}f*4o&1 zc69N<{PWYjGhJ>ZU~*RV*12@hAz0@AXe@rk5!Q6c-+22)#T*4KvbMv|opaV4LK)0r zs54mwEN&}NrkHQ(iqOu=q6NO*HrUA-elhxEC`!Widt55iY)=CE@^F2q<8(da`xD9n zCljVv?vy{%x8VBaWn)2YxPlB{MsWRVvTT=LMoD5GFRS0qzFFDa1<7-4M7QTp=bxz? zK6UE0ZnF73dt@`SitqLvzdNBxRP-)WggjPcb#^t5IqKJA{*`Y(IM4FIwg%s!Iu2{f*emUseV2n;H7a@FB)*6X*R=<9Oj<|n=Ki@iZoq8x7gA6TJ`5@FzKm8Q|y4LtL z;h&A|pvTck;ZgGag7L{$?U@$Tfyg`Fu8@buYt^+YtP7ATdZ=au( zQqXu--9*T}7tnYMDs$*z{t)nMNA zGhP}K<{s0$q}3cW=cnXUfO8LIt^{avRQ}s4{?4WZZ;@E3j&ho5q@+j(@GrXEz=3vfk47Zj@3rfri+Z$kN{1k}7{ML?8r|B)R|c*vhuuLpT{Ibu*49rf z1&fK(Mv{PFvmOIZ@tUW<#d0r2C@{Dj(BJw zNm`gF8#6+9s#B^$eQ&SbsqRlF6-F}NHy+fdoIbzvxd%cjdG?q(brN`Gj;M8m>*3=E zm9Kts1uNq;2UA!}CXOrW9`uXS1OqIvg+sG!Z|nn4uvqR1M>-OfsS@+fCYkl|0d>w- zlOoAy_Y0^Unli4}h%ZS?7P2TLkf#!s{idWmRqhTyZoT+8dhtx{o~&r8tz2xm+e2(Q ze!2N3t|ygiYR7$@)$d$b>g3;7vEa9^ZP+n06)zr;%UiW+d+IWyZo`CKroKb10geQw zFuu(PNOsb_oQdZli*)!I#vv#HEoD`Fk?2SCy}1 zEaOT39Dj9r4rv`6$-M2@#;BM73>+{Lvvs+$)j-YQB;@sxwC(Yf3nX&r(HMnU;;a%M zZ(#ME6+k6tw_;&~!q#|eQqeh){wOERx$q+&*VZNPlCgM$cF%99+v!V%XZVL_@(s0h zLi)bq+VVk}xL@}LT3EeL^*dL23V&_AuX&>TyreovZ2mdq_iK-5jkV0zPV!**YBw40 zqDDr936IRR6)3ge!$C>=yyP4Ch`W;Fzq$1PeU(k|vc6sYL`&w-ec<5$qPornyDC;{ zRNWmOCXt;UJ6QVehX5fQ>dKvefd1QROoCjwDeAyx?36Yum?%;XccO#l&Aih^?1rn< z+x+=Bxc(8(h!vSh^N0!CJ!>DPm3T34s2F(8{z5oQ1DV+G1Ci-eWsTTW(p`%;#_ivMQ_s`MgClz^nq^#rWAIH4R zTu8J*<>c{iWP`)x>KczQX6ew_1|j z*Www8E+Y7LSM(*+HN;~ zo`= zUl{`T3iXhy*#Ew8Xjf$qON4ba;01BcUZ^ZX)OoUbqJOFOScMYVCs3#fZK z{L@`7+#A-4gu^p!%gR|FPqy3DYJgt}I&A&w29$I*jvS!@AKIx~sUZ_wM ziJg-9>aRoAfEE0qSD@5<^<9&@d_&#&aU8qPood*Iwr+=$V8HO!7%Qq%g6;0j^v}Fo zB9aM?Sp35+*>AlO|B`qM6KA0}xNt30tBBpsl{az-3psd{xHXf+^RS?pp{U;3#W;&9 z&o_A$sgv9Lynnv_IfUbXAC;tO`}k0t@rvrT2YA~n!}a8LC&AwHO!mvJLq>Pg175te z>5j!lRCZAZDYFJ#kCs|eNi#e!N+^?_mZy>V#)Xo0C||wlcuY*~nflLe*&Bk+po|TP zO4>4V<6B7h40|l--8chM6!F4INv<_UjB2cWyATus^%>H;j@0*Y=kru})2z4(+w5e$ zaqBLV=~@1GHrVsKKGj`{e*I2um!_k0{y_43MyFZL^CM$qhp*0WcPSe`v6p)NkwV7x z+lp?ah3g@EK6jD2lK*&7p@8%qPLR5xoZjD6UV-i>r`)b`zRrLpiwoxYt();@v9du8 z7K%eF$Z0$`-`El@@IiL;{GV^l@i)uozi#9&Ja1U{RB)wBx|}xs<55F-C!f7=O1tXh zA zIA5op^h%P^G2$zmZ2Af5v#MB*eIw@iL;T~DuS2$;u_3MW(+^dG=s946Q;^|$eB*i} zz)nddhqK&PX@mGTpX0UKaXL|qcHa-H4xsm0@PCeR>W$cExQm|iI#nDnJJgl|EPc%neSxk!Nh!!0 zOBOJh&59UUwq{r*;#$f963a1ZW~CBL84V|ZUDqfe0Hh^sUL3*Dod$O>^qtocFSe7d z_GRNQomPp%&TLR&xKc4O>sf*3LQO~UREC$8u$bm%^V*P6dU?HwP8$1H*WN&kdm98DJb{qBxEtw10ct3Y0vXUD?RS)H#=h>G2RF(>2gt!iws0 zgK<%!SbkB!slMtk!q7-S_tj_tcXe_8VA&g;CS6ykiEFji=~g-SX{}I+Et9OWnX2h~ z5_K}+hIqdl21qPoh4F9n*#X0J<+b^0c-t7q8t3RxMW&&K zf8?7X15Rm|OI{f((Rp5a$W31BoJV5gpRT4X#7to2l@Iu-t^+$SNhWLkz!0V=UtG#M z%waj-^I8kdYx+qi>vjEKW^01ONHhz!-ZNLdbJ>w$J8wO6TS;xlP*2{kN!u%M)>8pV zdHvBQ(m)&`Z965GZLGWwdg#m*qN^o|kFAM+JK7=jfWgo&6WwbC4}ItHX)I*7gGQ#* zpw>xncX*Zbf=L4X9Mnt<=i+RbO-Fxdgh}B@2KVHZ9({VAGog=*B_0 zoovEyELmmMMjJZJLAf)EKcUmcW#-F{(yp@0G6AZ{zYx`kS*u^wH%i}!fJUF}aAv7e z6v5Pr2t```M9hU6^!=ywVmcFyzF7uSVE)dqP+{d7*kiiF7y7IVkz1y&-9jsBq66^~ zhQGnG6kp9zZ&`R3`y%zU<|S8HhCaR2Sw_TZUUvblKKl&-C2#M56$i1;7Y?$mu92vb z7UR~WV&j#J7G$v$f?$M_>^)XGNHanA4)v=UIPSMA$_Gp-2s|s-G|-Efk6E#9vRsrC z=iLC!%uRlJ3$f~&BC)9^Qbj>wrQJu638y^|%;+RD(u?Wy{%yBQco zmJFfcV|J(YOuw=;bIHUh;e?Ojqlf5!)4aWhBNl60%R5ouWNrlbqBFT_j7~`v&_+(6 zL}tZRa`w5QzAu83zQ3VrSPb*e8UPaa->O4z|ZYBR{s;^OS~a8 zSRHOVAj+yCnH!9<0Df7w(?=Y$A_-X6izrfMhcZR%HmGBk+sC$!dKDtyrF$>^6ofHa93~_k_bAz`5uHjI11bIG}{*WXM6BC*8E%!Q>M??&CsCOiQAcDEN*adAf znV{2B(DiRwtr3OqKSrXE-dQ8}@^3`6iq=%d%nJ{1#V-MKeJ9A3av@V2|Bggx15a+) zR44aFQE(vW@JL$t&Ieq)gePB_+WT7P9YW+m%Q+RY<&;yy2UM`7ZARf*R{Er5%))?n zjoF~ewdxuR!+YK8hVjNS2wuSmy4C>qYB{i84LmgHzub(O?62V~pE+MJ?}9dMO*8O7 zhk)fx?9BGGq37$Se}mIxvLPbt#hEjAI@EO!hO@7b9A2f7=BE<8Ccd}@OxBlv9x#PH zli4O)4OiMlnuZ;Ks4kpm^Q?7#(}X%X3OLg@srj3*>$xbgY-YDH=QjMr*o}^Q$}?(x z_bqOMeKoWrI)(C4iEMS8Op9Vfi`M1uvddvd{4pLuRZkNF4i}_BIn_0Bs+)&>OuK|1 ze3L?5950l+K(1dzFa~?=Sbx zVr09ZMCNmeq1PGRUTAAH>6+cV+0{hN*@VIE5n2e5#(`IM{mqwGri*?8so9Ms9jcjn zE}Y@jFKL457LHeS6IbK1b1kZBsKWKth`Bv2CEaWnm5f*Cl(ieeo3MYs>lpd+;^O`D zCBA?(TC`clzz%2ro9ltHP|gAU>d^R8a2`;Zexo_;^?2y9;oF~Q5vBbC5Iyrr=gzKt zxKmNHORxRz4z?ZiY>`v}?5em6yz8{snoJ-T(Y54-U$i*yG=8pJc)=^AL`EC4T?zR8 z{J2t@AtdU~ENi5gv1nrvltmYvu*bA;5++%e4=iUO_ReVDwxJ5P+|jT&{mq3>HFroK{!uru>m-_&79JkNA*-k4D;X7xL`jWTbQE(I50_7SCKAhDb7szxs@1i-a% z3Hwfhn^8yJteeCS8SyX&Iff7PLY~jml3t{Fsyowyux=w;hgnyRV&|Xag{yvdf*FSe zPo8B(fX^x~eh*-qyp9car)WqP#ACymTR@LM1*%ECK#qC6bhzGtZf&iB*tWaP)#{^h#a#pAUyc-zEhpA;?rrTEf4+8cF|&rhXem6n?MF)+*`d4zQdG+8jI?v5g5{Z2VA zJpVGqGw*AcRr)tt-Xg5w5$>h6N+jRJ4FPO>_~s2f{6k5TT&D4geU&Me2&^hOfW6yrXj zQC5dpurP>mkCnBqqOE5zxjacW=-T&3A?8=bBx-{|E-%h$y@1UxcA~yQNcGErGm>$< z5bDxcxiFI9uJktjk*gq|@qKC*vd5Rc9Hr6Nf2eKNul!n**z9=2xKB&wb@hDdjzz4f zq^uS>C|WvihhP@Y8ldM28xWM)+HxsEVsx_bio0($UFSa=i4A%27#~+GQEL@le%(%L z*>$uT_{C=qQ@KdZwypGl5kDuvT=zLf`lFn)Rb@`LCM1LszF2P$FnaI5-BgY#cD zjf&y(IQD4(z2@Cs(C2fpbg5@lci|bnx#OHAA$V1h9di}tzIBST%il`c`ni29DloFr zFP80pRNZ?0@#d1`tl}xQTMM0Mw3SKr{?$pHDa^l~sjISGLI* zR=+{KVbwpYo^jMCqq@qXc1c`a2jw3VmnI~osgRIz7J(^hU?7!OvW$N1c~be2mE%}U zHYwLtaJ3^0xAY(`%;WOgOKX#aj%s9_SNd@yBpbIjT0Usi30`_Lwb`h4bX>%{ZpE62Zs{AQK_|@|?x1u(m)ss{t!k`)YE4H|9NF?EGym zz}K3J-hFS#8R}CmDwQD<*le4eaab}jz}^?eg59zfePW!7em|V12+4(S#(&=mxO{> zHxd6@@KYvdgLcx(&Bx1GC*SA=4M(Nj%T&-!M&anbJVQ$3BPPf4#E5>&^@s+2F*Lu+ zKt5&5>5xkUdZyyzS1Law@ng*2P`>S*zyX2UI^5;_Ga1jD-qcYjkQ0pP1P^1Y-)ArD zK{J^wd>&cf+Fr=)Ak7Kio?e<+#K{O`w6|=*vpS{~iTR(L1nS=M<=1jGL zjL#`2=5Y6T<7ZFS=D}d!8@I^|{ai(OKZu)8}#ssLH#^4Mt zzUhiC!#Kx79_3AF>iCX77cI#6(+%TM&3CVt^TiYRX4Cj)kvX$WCb=l{c(Ink9lH^S ze{h3D9OGyrupBWUW81U{}lPdG3r-eHTxYK1DIRBLIKMd(yYtKX; z^K~)C$NiglAnRSpE>#VL026g(kj>xj@B-&gom-j3EM?9deU}&2Sr?@y8K?U(fu?*0 zW$p_g41N$e){ zt)=EWr~oW#)`Xd*tZ(ur=(T~lPoBBYsy<{*Byf|wj7|8!S=A|VOfDH9=!Nn4MUuqJ znOqvR4a_`}=mpGIlx&rWO>=Ji)d9im-+g4DU|noY*7`Aym^tO|5|!z_blXd&#(AM; z0Tp9gs+9BSz}9tIBbrvO=M3_$Rf1m|+;OVG;5#k_bXnK)FuQ!_Vf*t2V<2=bL1r@X z(oAO_3v9l-Vq(hly;G?+`*~C6`|nkCd6&6)z9q_ig!NqPNYQC*V){${{lUR#-59@8PzTkVh^{e|9pReUU8{74b$Q zim@MrKyD1oLx9cwFnv&nrIeYERS{n|WQT@yN^z>tXEC#hU?{n&}b~UB$2jn0~)vy{vMH!JMsU zd1+X+9A@lpG!t=d3zF8|k)Cx248By4|3guEpq*>$&vLDUS-xAGcahYR2_=lrdIs+C z$(Z6odz%)+?0%JPsgR4e5smc8A1b@4cB{qqQ0%&Yl3%2f_n8UHJn2y`o<+}CJury^ znO!(eZCT!GpEKj?=eQQeW;mj?sgm)hHw+z3xZKaOa z6)8Zrq8a<0B+BRit~9X$t=IZgo4xUs4CHxht=gBm#!g#w?G3_VCjzGO6Jc9f6=l-}>OJ3srEVS|YU2q_WI_Fs=9j`!;Jp6$FbslMtWcC@HNYYoG@i{?!sDok6;vmi1(`U(x|$!Yg570P zL*h~JKQOzP8W(gvN#Giz+D0LsdlH{CS32YX-6&=WhuCZ|`wMz*CwD9=@B)RejDZY# zEV6CyOWWT(x%}xLP^Tzkq(&bMavc(7y?`#92^_yQ5w(x`8k)P+PKJ_Wj~2C=rS*VG zm_k!S`P0U$d-}<@292s;Tc5dWWQk!YtD1;}1_CpwImv9ZGh&;~Zm|!=KMQNf;w9iC zc}EmUT*)tg2XB^+4Hs+IyY*UDqDDuwI@>cY!mY!a?!85^KEAg7vTiJ=v1~4EeL!Ev z#|b_DI;n?RbWKR_VomEky*V(@tZzalc7ARN%&p3}s%NeS&CQeA(r3;CuM~rQ`tmvB zQx6JP9pQ7%^{q+zd^QoSbXo#ouyHFPxst;zz5LXYNU!ZXW+2KEit3R95j!4>{QzVj zPAPA|;QfSM!`LoxY8zbhF?4k1wM`AJf6FfvM?4MvGXrtSGEHjML!rw9bua^lRNDV8 zM*6G}@%ZGqB{km)oa%=tH@sZm137u!Mc|cA9pebPY-;I*f01g(z_hBJ#hMCmJnvV| zF)`NB83e> z8nm40GlZT0SlHA5A!x6WGJa3hli31-*FTO+LbIH@se8~BEYMWRa9~U9@hc8EDFJ(1dO@XMADM>yB?ZQ32fCix*@aReQ|nD} zis6{}Q^T1^qi`AT9CzHQBVZ4JY`#>q5t1pM24Ytgi}VuJ^=CJd;6hc3!EoEt4&)cE zs5*V!PkjZf2(OxylK7Ft0)Q&NixH>aB;bTfK|r*0#$4=aV0mdeFE;*6v*kku>by!~ z-mB?Mi#M4EHFry~%vi@gGn#32ij~-M$(7{UF>s-I&9VGWF%Ig*e*kCotc3sj1Su~C zc3M2%d2>onaVcj-Ip!H0FY{5ey<=VLKz{x`pUbiA9f#ho&P&|a4>QJX?FDod=)#_# zhnkZum_wJ@vooOh)|hBsVSN$(9}{SD=!+J|p-b3U0YPbOb=Ni~4ge;=JLH1_FKU7j zl;l)h1W};!5mU=)TEot`+Odg|@0;>qN6-)dX&?^~ll60@pW`8C2kv{+r;GXBi_DUe zh6@7Pj3u42Vk_K8za0Ux7$38*_1;^A`34ku_LK*>;D^Fx);vw<{RKF*vZRq3BjPtx zi@i`0g5vDGXi1SoD$*Kfy=YO+Xd39MaQZ>~+{;Nz0}-fpOuP{O5#;eEtjySpe}Exv z^OLF>tz!{AGKpV8qp!5yQP8ZonGI8L#$JLvU1A>}PL5wQSES%QQ!}FaUb4z>syW0$|{%N)-3i2_vUM#7Mkat|NP;lPiYblW+}Ju-Q+yJ zk`9{f>PF!VKf*L<8jQ26>eoKK`PiV5@IAD6D|C!A{+|L1aC=pvq8z&JPXs=H;oc^9*;lz(2B z6k4>HgAx3o*i~q)*F45tbd{`h(uDQdQhtPWBdNiSe%6Qc$IS*LljN7hGVclTDdV;S zu^kpKKQduZ*23(y0f;tz>3h6=_D59RVvd1833~8GR!&#%`rfWY&4UU#-h#dC1FYge zQkrsjzJ@+*2T+{l6J4c`;t5=HP}G_)cfvx(LFL@Lzfz&BUD`2ocY|Dvn3JfL2T<|E z`m!fLdDoJ%Y4cAM0yci(_wHZBO@8!on0mYQW)~B_;Ff8uHj+TkuRAV^u_?b+d*_wD zascfPzl=Kmlilt^=$`wZx1+2Ne$0sp4G~!VdtNO}k86H&TOULoQ2jzs2V>h8#y0F@ z&I(N**gPA|Pgz0y>RH{-{MYGx+$LxW+`}656-oUiw);~tBb#-UzpRWqLlW-Zc-rE! zyz{miLy1wkW>9Vp-3`1V8rOm|YQPIy<}G4$8{6))&I)=pK|gK?O(P$WCoveN8Npe=T4cg=-I>^njq%CtQHx|m!N#Tx(YDVYy4ij0;Uy(KWDuz)iYr7=#18cAmT7RU@>Fb_6|_ z$FKNxW(eXgG3nDP#R~yzBu24&$#v&hTpw4~q&y8Ecl~PF$JyT|r>3#F3sMdcg2*-k zyddTITZvGBWqmrbLo>kN>|cG+Or?iaA&e>i09v#DD9d}8*s6mfFG*JpXeR7jXATI|cV zLCEa#B1LqTYArH1U~AZ1gz(YAZlZ(mTgb5D-FmuTv|!^uxnsj2nbS|To3*;lA3Z_0 zlzqUYtyEGZeVK%WWWV;vl8hd6ieD!%XN~UczOyFE!t5FTTtE3zuxA%J7#-TSKgh0GIcY zo+@eLyl9v<6ly}Zi}~B=6(i}>V}#qvEHVn02+%Q{( zNf{M8m;eunWkcb|ufvl{2xTWDS4$%vJ?jz9rvA<`kf@ANXWr$QtCbk|PNNNu$9U(% zW}2vmY3qS*il67MYm5B;iL&6ewNp!$1ky=qkt$Xh@gp>`z^Jk-YRMA-WL*_pK{0iO zBE^R08$8m`WWJcdQn!IKFQN6wZ3`p9;8a;M(yHK~yt zgVoOSfPS6=`{B8_V2`wfUmkB)#gyb(U-&WSWQVr>-HTxI*GP*Rp1uIJT=uW)l_WJh zcwz!@c;-}vlssFHW_rM{{0aMbhI?Sx3}MP9vaO9z$U5(gvY%1apfVv4jVtgb)HAN1 zW7L#?fy78Ygh#fS{~DG}TH*h!gZo+8iC3FF8f?C7M0fR-?+irf2X{RHH#{<@N6m2?B<2;*^zjPLUrVHW?3mw$nDQ+#z+aksJ&)** z<4a2$5m2Ajleha-f~tB@T=!9c{b4OdG{(tp;<;WK(D|JP+;shv1Ihz;NHWIVdBjwN zfv5UuGXr|W*{Xc^NE*veaQxekJM|B#qgM1 zY8X%*llu7aIM=|+YkMdkThv-JeDHI7={MyhF8A`dvmQJNK4A)VT&#!eUS8vKTMw6q zoS(z&%st~|elNc@lJupKQR?WZ1=N;i4u>Qn?Pjd40&B&=gd4+-k4j=9LYbK~(nS!r zn(1qtCQE?jlesg2Gyjs6aRgrW5O$UV|LNxMIZvwGWOLtZFKI&|Eal?j%8&AIeGTt- z7V`gVu|M30EJPP-WTFhqsV}=2w^S~Aa zz4=B*z;SxEJ9{`sYDRocqo*IFykJ=txF1bhHmB6kq4}X_t2o(8GTG{Iz4=lawZ48z zk|esCkjM{&GM}~ETy?3e3}t$rSEHlPcYZFhwtOwMV1xeLid&*;g9t=|9O4wZK%j4E z*jUW4al!l;KANCc3cH#-ZZrRL?*u-!7$te8ANjqC*J+R_9DMKSMcXiv&{$bi@u(ja zYyjPsN)%_BQXZw;?%za~Z?J3=d;DSd?vK9E@!-0)6B9)DVe%d@z-47?p=60Ltb5bf z7>C+%ZYkfBk!Ca{H(a=~O;CM$NW`;6DfvBP(_nX1Hr9K;gsugnsz;=@mEND{w|dIK zHd&a8DmJP4KC|`N_xMU_5Mp;|v^|7d|1!fnn(SHk{4O8GU_Ld*vPEC*sP=at#y$qJ zc3#-m+C3h+j*?})wdm+>niN#aglfruv+3;Z@YQ5!Ekf@iwt0I%w;VL%vtB>TO7_n1 zI{!axcN2TG>EANvcZkg-4Xpk*EUVQj?^E=~lo#7gvTyRO64C|XtjO_ z^qU5a@dYx)E@Lw}jDoD4-SV1p{aE-IN~UEVezU^&At-mFWjK}HxQ&#RJ`33%3%Eh{ zZ|}p(J?2nTM0i#0uu58+@nPr9fW073+}vjJg&pILs()?_y^?MC}Z^ zKjLfQJXh!1>nU3XkS|#U($11v?+!~=V)hxy?$ci*jcZivi2}?-eKJCwWlzlHM={|^ zorCC^B)!~WUw`k8I}@j8Zq!ZP7Y1~TcijkL4-KMZhMwQ}=UUoX^RtLfXr0Jbx8NLO zZBlCJcf-PNSg(|fjLP7>f~;OEc(K2xhh)A@q8BnZ^>>+>$$0KXb}NE3KwhzjZgcAe z3Aa79g* zc0nU^f7MlOkt|bFRXf2m3V2%0Z)xT~)+Mn$DDwUCyh@md>?t!j{mb_&tAWK%Vv_VN zUq)lhXZqWA=ZR*dettvv`hzR5h23Jmu}EnHDbXV3@VWY9ZBGg!qtb@!I?Yvr(h zHn5e~SQ(OhWjfN#3fX2_nEXlu*vP*Dbwl*?yxX3gN1+N<2YzkGC;hs6D$VEaweNV8 zMn=Yb{ji&??FcFh@nmue-IXEDzIZR)=wlE~!v+Q8#llGigM z-(5pHo}(-CD>Ru{FBW1Gr6^=pU&WI9e85S}*GAOPRiTwUh?H1q<=8vhF5f1x?%n@x z%!100Ms^uR!)0X@H?l~`G+^MFU0|^&Xv%&z(E(c|TQXF&3omMW>$bMhuw6FcP>?l$ zX}SZXQ34|}ev5u1yU%=$>am@V(a}n~Zi;hhQ#%P$k;h=}EgJ(kD-<-U*I5@ZS-UeK z@N`JxWAav%l|)5BfikJdZE>a_;Vp#3teo-!2KLawiA8O|8L7!jn06V7=npgq{I}H> zmeY^n{zdgMn~aJq`a0<@M*Ay!Bn8IgOk-(INUVLwg=UB#$Hl+JoA_y6txcB0HBu`! zO;omGg!_0O*CvVN=C*LyJtYn!)Wdz(QtV7yN}h<=*2VEzJl+-e2sj)P_ZU-T*d(!c z$LD<9&@G>IJpE(hRkSnr_9$8(ylW-z);4$e{*Q#NAWg#dc*^V)5t5d|No7XrE*mb^ zQe{NrA)T9>Oe;Xw?<+uTtn`IC%4@Uzb4p7;JzU>@#CG+qq~##1MDuM!R?p5;GUf-@ z`7N1G$2g4wM~vgZ=7CrbgFgoxN!bi8bW zoo8VV4Wz^m{x3~nuZMH(JKG!^xP*`o*<+e?=U~@QIZ6(|8kOE{K`Fpx=khEm(UV55 zXG6tIWh|Cz!-gE=y0Qaan!rsEaEFS$|Fq{#Yc}HVe61`Tcp%_LlfpI2^ZOM+t}6}f zh9=+Y*4oE10SHo$BMH)V`$%D|#{-0nO-y@FK%O05kD1g3}7$O87M@8N$83gef7!3_oOG&o_ESn{CTq!9aphiRs2 zvjvt^T;4K*@2i?E!IcRzRFHPy7}bs%DFfp&4rXUR7P8iprR*jsFirWqj@x4R5bVi5w++At_vwy;sy!D3h}c`9C68$iDyp literal 0 HcmV?d00001 diff --git a/index.rst b/index.rst index 1019a4031fa..ba695bedfbf 100644 --- a/index.rst +++ b/index.rst @@ -324,6 +324,13 @@ Welcome to PyTorch Tutorials :link: beginner/hyperparameter_tuning_tutorial.html :tags: Model-Optimization,Best-Practice +.. customcarditem:: + :header: Parametrizations Tutorial + :card_description: Learn how to use torch.nn.utils.parametrizations to put constriants in your parameters (e.g. make them orthogonal, symmetric positive definite, low-rank...) + :image: _static/img/thumbnails/cropped/parametrizations.png + :link: intermediate/parametrizations_tutorial.html + :tags: Model-Optimization,Best-Practice + .. customcarditem:: :header: Pruning Tutorial :card_description: Learn how to use torch.nn.utils.prune to sparsify your neural networks, and how to extend it to implement your own custom pruning technique. @@ -620,6 +627,7 @@ Additional Resources beginner/profiler beginner/hyperparameter_tuning_tutorial + intermediate/parametrizations_tutorial intermediate/pruning_tutorial advanced/dynamic_quantization_tutorial intermediate/dynamic_quantization_bert_tutorial diff --git a/intermediate_source/parametrizations_tutorial.py b/intermediate_source/parametrizations_tutorial.py new file mode 100644 index 00000000000..5391f7c5c0f --- /dev/null +++ b/intermediate_source/parametrizations_tutorial.py @@ -0,0 +1,326 @@ +# -*- coding: utf-8 -*- +""" +Parametrizations Tutorial +========================= +**Author**: `Mario Lezcano `_ + +Regularizing deep-learning models is a surprisingly challenging task. +Classical techniques such as penalty methods often fall short when applied +on deep models due to the complexity of the function being optimized. +This is particularly problematic when working with ill-conditioned models. +Examples of these are RNNs trained on long sequences and GANs. A number +of techniques have been proposed in the recent years to regularize these +models and improve their convergence. On recurrent models, it has been +proposed to control the singular values of the recurrent kernel for the +RNN to be well-conditioned. This can be achieved, for example, by making +the recurrent kernel `orthogonal `_. +Another way to regularize recurrent models is via +"`weight normalization `_". +This approach proposes to decouple the learning of the parameters from the +learning of their scale. To do so, the parameter is divided by its +`Frobenius norm `_. +A similar regularization was proposed for GANs under the name of +"`spectral normalization `_". This method +controls the Lipschitz constant of the network by dividing its parameters by +their `spectral norm `_, +rather than its Frobenius norm. + +All these methods have a pattern in common. They all transform a parameter +in an appropriate way before using it. In the first case, they make it orthogonal by +using a function that maps matrices to orthogonal matrices. In the case of weight +and spectral normalization, they divide the original parameter by its norm. + +More genreally, all these examples use a function to put extra structure on the parameters. +In other words, they use a function to constrain the parameters. + +In this tutorial, you will learn how to implement and use this patern to write and +put constraints on your model. Doing so is as easy as writing your own ``nn.Module``. + +Requirements +------------ +``"torch>=1.9.0"`` + +Implementing Parametrizations by Hand +------------------------------------- + +Assume that we want to have a square linear layer with symmetric weights, that is, +with weights :math:`X` such that :math:`X = X^{\intercal}`. One way to do so is +to copy the upper triangular part of the matrix into its lower triangular part +""" + +import torch +import torch.nn as nn +import torch.nn.utils.parametrizations as P + +def symmetric(X): + return X.triu() + X.triu(1).transpose(-1, -2) + +X = torch.rand(3, 3) +A = symmetric(X) +print(A) +assert torch.allclose(A, A.T) + +############################################################################### +# We can then use this idea to implement a linear layer with symmetric weights: +class LinearSymmetric(nn.Module): + def __init__(self, n_features): + super().__init__() + self.weight = nn.Parameter(torch.rand(n_features, n_features)) + + def forward(self, x): + A = symmetric(self.weight) + return x @ A + +############################################################################### +# The layer can be then used as a regular linear layer. +layer = LinearSymmetric(3) +out = layer(torch.rand(8, 3)) + +############################################################################### +# This implementation, although correct and self-contained, presents a number of problems: +# +# 1) It reimplements the layer. We had to implement the linear layer as ``x @ A``. This is +# not very problematic for a linear layer, but imagine having to reimplement a CNN or a +# Transformer... +# 2) It does not separate the layer and the parametrization. If the parametrization were +# more difficult, we would have to rewrite its code for each layer that we want to use it +# in +# 3) It recomputes the parametrization everytime forward is called. If we used the layer +# several times during the forward pass, (imagine the recurrent kernel of an RNN) we would +# be recomputing the same ``A`` every time the layer is called. +# +# Parametrizations come to solve all these and other problems. +# +# Introduction to Parametrizations +# -------------------------------- +# +# Let's start by reimplementing the code above using ``torch.nn.utils.parametrizations``. +# The only thing that we have to do is to write the parametrization as a regular ``nn.Module`` +class Symmetric(nn.Module): + def forward(self, X): + return X.triu() + X.triu(1).transpose(-1, -2) + +############################################################################### +# This is all we need to do. Once we have this, we can transform any regular layer into a +# symmetric layer by doing +layer = nn.Linear(3, 3) +P.register_parametrization(layer, "weight", Symmetric()) + +############################################################################### +# Now, the matrix of the linear layer is symmetric +A = layer.weight +print(A) +assert torch.allclose(A, A.T) + +############################################################################### +# We can do the same thing with any other layer. For example, we can create a CNN with +# skew-symmetric kernels (:math:`X = -X^{\intercal}`). We use a similar parametrization, +# copying minus the upper triangular part into the lower-triangular part. +class Skew(nn.Module): + def forward(self, X): + A = X.triu(1) + return A - A.transpose(-1, -2) + + +cnn = nn.Conv2D(in_channels=5, out_channels=8, kernel_size=3) +P.register_parametrization(layer, "weight", Skew()) +# Print a few kernels +print(cnn.weight[0, 1]) +print(cnn.weight[2, 2]) + +############################################################################### +# Inspecting a parametrized module +# -------------------------------- +# When a module is parametrized, we find that the module has changed a bit. +# We may observe this by simply printing the module +layer = nn.Linear(3, 3) +print(f"Unparametrized:\n{layer}") +P.register_parametrization(layer, "weight", Symmetric()) +print(f"Parametrized:\n{layer}") + +############################################################################### +# We see that the ``Symmetric`` class has been registered under a ``parametrizations`` attribute. +# This ``parametrizations`` attribute is an ``nn.ModuleList``, and it can be accessed as such +print(layer.parametrizations.weight[0]) + +############################################################################### +# Note that each element in the `ModuleList` is itself a list, and we have to select the first +# element of this list. It will be clear later the reason for this, when we see how to contactenate +# paramtrizations. +# +# Something that we may notice is that, if we print the parameters, we see that the +# parameter ``weight`` has been moved +print(dict(layer.named_parameters())) + +############################################################################### +# It now sits under ``layer.parametrizations.weight.original`` +print(layer.parametrizations.weight.original) + +############################################################################### +# Besides these two small differences, the parametrization is doing exactly the same +# as our manual implementation +symmetric = Symmetric() +assert torch.allclose(layer.weight, symmetric(layer.parametrizations.weight.original)) + +############################################################################### +# Parametrizations are first-class citizens +# ----------------------------------------- +# Since ``layer.parametrizations`` is an `nn.ModuleList`, it means that the parametrizations +# are properly registered as submodules of the original module. As such, the same rules +# for registering parameters in a module apply to register a parametrization. +# For example, if a parametrization has parameters, these will be moved from CPU +# to CUDA when calling ``model = model.cuda()``. +# +# Caching the value of a parametrization +# -------------------------------------- +# Parametrizations come with an in-built caching system via the context manager ``P.cached()`` +class NoisyParametrization(nn.Module): + def forward(self, X): + print("Computing the Parametrization") + return X + +layer = nn.Linear(2, 3) +P.register_parametrization(layer, "weight", NoisyParametrization()) +print("Here, layer.weight is recomputed every time we call it") +Y = layer.weight + layer.weight.T +l = layer.weight.sum() +with P.cached(): + print("Here, it is computed just the first time layer.weight is called") + Y = layer.weight + layer.weight.T + l = layer.weight.sum() + +############################################################################### +# Composing Parametrizations +# -------------------------- +# Concatenating two parametrizations is as easy as registering them on the same tensor. +# We may use this to create complex parametrizations from simple ones. For example, the +# `Cayley map `_ +# maps the skew-symmetric matrices to the orthogonal matrices of positive determinant. We can +# concatenate ``Skew`` and a parametrization that implements the Cayley map to get a layer with +# orthogonal weight. +class CayleyMap(nn.Module): + def __init__(self, n): + self.register_buffer("Id", torch.eye(n)) + + def forward(self, X): + # (I + X)(I - X)^{-1} + return torch.solve(self.Id + X, self.Id - X).solution + +layer = nn.Linear(3, 3) +P.register_parametrization(layer, "weight", Skew()) +P.register_parametrization(layer, "weight", CayleyMap(3)) +X = layer.weight +assert torch.allclose(X.T @ X, torch.eye(3)) # X is orthogonal + +############################################################################### +# This may also be used to prune a parametrized module, or to reuse parametrizations. For example, +# we may use the fact that the exponential of matrices maps the symmetric matrices to the +# Symmetric Positive Definite (SPD) matrices, and the skew-symmetric matrices to the orthogonal +# matrices. Using these two facts, we may reuse the parametrizations. +class MatrixExponential(nn.Module): + def forward(X): + return torch.matrix_exp(X) + +layer_orthogonal = nn.Linear(3, 3) +P.register_parametrization(layer_orthogonal, "weight", Skew()) +P.register_parametrization(layer_orthogonal, "weight", MatrixExponential()) +X = layer_orthogonal.weight +assert torch.allclose(X.T @ X, torch.eye(3)) # X is orthogonal + +layer_spd = nn.Linear(3, 3) +P.register_parametrization(layer_spd, "weight", Symmetric()) +P.register_parametrization(layer_spd, "weight", MatrixExponential()) +X = layer_spd.weight +assert torch.allclose(X, X.T) # X is symmetric +assert (torch.symeig(X).eigenvalues > 0.).all() # X is positive definite + +############################################################################### +# Intializing Parametrizations +# ---------------------------- +# Parametrizations come with a mechanism to initialize them. If we implement a method +# ``right_inverse`` with signature +# +# .. code-block:: python +# +# def right_inverse(self, X: Tensor) -> Tensor +# +# it will be used when assigning to the parametrized tensor. +# +# Let's upgrade our implementation of the ``Skew`` class to support this +class Skew(nn.Module): + def forward(self, X): + A = X.triu(1) + return A - A.transpose(-1, -2) + + def is_skew(self, X): + return torch.allclose(X, -X.transpose(-1, -2)) + + def right_inverse(self, A): + if not self.is_skew(A): + raise ValueError(f"The provided matrix {A} is not skew-symmetric") + return A.triu(1) + +############################################################################### +# We may now initialize a layer that is parametrized with ``Skew`` +layer = nn.Linear(3, 3) +P.register_parametrization(layer, "weight", Skew()) +X = torch.rand(3, 3) +X = X - X.T # X is now skew-symmetric +layer.weight = X # Initialize layer.weight to be X +assert torch.allclose(layer.weight, X) # layer.weight == X + +############################################################################### +# This ``right_inverse`` works as expected when we compose parametrizations. To see this, let's +# upgrade the Cayley parametrization to also support being initialized +class CayleyMap(nn.Module): + def __init__(self, n): + self.register_buffer("Id", torch.eye(n)) + + def forward(self, X): + # (I + X)(I - X)^{-1} + return torch.solve(self.Id + X, self.Id - X).solution + + def right_inverse(self, A): + # See https://en.wikipedia.org/wiki/Cayley_transform#Matrix_map + # (X - I)(X + I)^{-1} + return torch.solve(X - self.Id, self.Id + X).solution + +layer_orthogonal = nn.Linear(3, 3) +P.register_parametrization(layer_orthogonal, "weight", Skew()) +P.register_parametrization(layer_orthogonal, "weight", CayleyMap(3)) +# Sample an orthogonal matrix with positive determinant +X = torch.empty(3, 3) +nn.init.orthogonal_(X) +if X.det() < 0.: + X[0].neg_() +layer_orthogonal.weight = X +assert torch.allclose(X, layer_orthogonal.weight) # layer_orthogonal.weight == X + +############################################################################### +# This initialization step can be written more succinctly as +layer_orthogonal.weight = nn.init.orthogonal_(layer_orthogonal.weight) + +############################################################################### +# The name of this method comes from the fact that we would often expect +# that ``forward(right_inverse(X)) == X``. This is a direct way of rewritting that +# the forward afer the initalization with value ``X`` should return the value ``X``. +# This constraint is not enforced in the code. In fact, at times, it might be of +# interest to relax this relation. For example, consider the following implementation +# of a randomized pruning method. +class PruningParametrization(nn.Module): + def __init__(self, X, p_drop=0.2): + # sample zeros with probability p_drop + mask = torch.full_like(X, 1.0 - p_drop) + self.mask = torch.bernoulli(mask) + + def forward(self, X): + return X * self.mask + + def right_inverse(self, A): + return A + +############################################################################### +# In this case, it is not true that ``forward(right_inverse(X)) == X``. This is +# only true when the matrix ``A`` passed to ``right_inverse`` has zeros in the +# same positions as the mask. Even then, if we assign a tensor to a pruned parameter, +# it will comes as no surprise that tensor will be, in fact, pruned. diff --git a/intermediate_source/tensorboard_tutorial.rst b/intermediate_source/tensorboard_tutorial.rst index 2dfac682b6e..1fd946235b6 100644 --- a/intermediate_source/tensorboard_tutorial.rst +++ b/intermediate_source/tensorboard_tutorial.rst @@ -1,5 +1,5 @@ Visualizing Models, Data, and Training with TensorBoard -==================================================== +======================================================= In the `60 Minute Blitz `_, we show you how to load in data, From 6b97e9e94c718dc33484bc39e2e4b7290bc34aca Mon Sep 17 00:00:00 2001 From: lezcano Date: Thu, 25 Mar 2021 19:05:18 +0000 Subject: [PATCH 02/17] Add remove_parametrization --- .../parametrizations_tutorial.py | 41 +++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/intermediate_source/parametrizations_tutorial.py b/intermediate_source/parametrizations_tutorial.py index 5391f7c5c0f..626190357f9 100644 --- a/intermediate_source/parametrizations_tutorial.py +++ b/intermediate_source/parametrizations_tutorial.py @@ -72,7 +72,7 @@ def forward(self, x): return x @ A ############################################################################### -# The layer can be then used as a regular linear layer. +# The layer can be then used as a regular linear layer layer = LinearSymmetric(3) out = layer(torch.rand(8, 3)) @@ -84,7 +84,7 @@ def forward(self, x): # Transformer... # 2) It does not separate the layer and the parametrization. If the parametrization were # more difficult, we would have to rewrite its code for each layer that we want to use it -# in +# in. # 3) It recomputes the parametrization everytime forward is called. If we used the layer # several times during the forward pass, (imagine the recurrent kernel of an RNN) we would # be recomputing the same ``A`` every time the layer is called. @@ -115,7 +115,7 @@ def forward(self, X): ############################################################################### # We can do the same thing with any other layer. For example, we can create a CNN with # skew-symmetric kernels (:math:`X = -X^{\intercal}`). We use a similar parametrization, -# copying minus the upper triangular part into the lower-triangular part. +# copying minus the upper triangular part into the lower-triangular part class Skew(nn.Module): def forward(self, X): A = X.triu(1) @@ -197,7 +197,7 @@ def forward(self, X): # `Cayley map `_ # maps the skew-symmetric matrices to the orthogonal matrices of positive determinant. We can # concatenate ``Skew`` and a parametrization that implements the Cayley map to get a layer with -# orthogonal weight. +# orthogonal weight class CayleyMap(nn.Module): def __init__(self, n): self.register_buffer("Id", torch.eye(n)) @@ -216,7 +216,7 @@ def forward(self, X): # This may also be used to prune a parametrized module, or to reuse parametrizations. For example, # we may use the fact that the exponential of matrices maps the symmetric matrices to the # Symmetric Positive Definite (SPD) matrices, and the skew-symmetric matrices to the orthogonal -# matrices. Using these two facts, we may reuse the parametrizations. +# matrices. Using these two facts, we may reuse the parametrizations class MatrixExponential(nn.Module): def forward(X): return torch.matrix_exp(X) @@ -306,7 +306,7 @@ def right_inverse(self, A): # the forward afer the initalization with value ``X`` should return the value ``X``. # This constraint is not enforced in the code. In fact, at times, it might be of # interest to relax this relation. For example, consider the following implementation -# of a randomized pruning method. +# of a randomized pruning method: class PruningParametrization(nn.Module): def __init__(self, X, p_drop=0.2): # sample zeros with probability p_drop @@ -323,4 +323,31 @@ def right_inverse(self, A): # In this case, it is not true that ``forward(right_inverse(X)) == X``. This is # only true when the matrix ``A`` passed to ``right_inverse`` has zeros in the # same positions as the mask. Even then, if we assign a tensor to a pruned parameter, -# it will comes as no surprise that tensor will be, in fact, pruned. +# it will comes as no surprise that tensor will be, in fact, pruned.: +# +# Removing a Parametrization +# -------------------------- +# We may remove a parametrization from a module by using ``P.remove_parametrization()`` +layer = nn.Linear(3, 3) +print(layer) +print(layer.weight) +P.register_parametrization(layer, "weight", Skew()) +print(layer) +print(layer.weight) +P.remove_parametrization(layer, "weight") +print(layer) +print(layer.weight) + +############################################################################### +# While doing so, we may choose to leave the original parameter (i.e. that in +# ``layer.parametriations.weight.original``) rather than its parametrized version +# by setting the flag ``leave_parametrized=False`` +layer = nn.Linear(3, 3) +print(layer) +print(layer.weight) +P.register_parametrization(layer, "weight", Skew()) +print(layer) +print(layer.weight) +P.remove_parametrization(layer, "weight", leave_parametrized=False) +print(layer) +print(layer.weight) From 6448c4b5b3e70105c417a97853900c041ed323a9 Mon Sep 17 00:00:00 2001 From: lezcano Date: Thu, 25 Mar 2021 20:56:32 +0000 Subject: [PATCH 03/17] Correct name --- intermediate_source/parametrizations_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intermediate_source/parametrizations_tutorial.py b/intermediate_source/parametrizations_tutorial.py index 626190357f9..40ec6a38c9f 100644 --- a/intermediate_source/parametrizations_tutorial.py +++ b/intermediate_source/parametrizations_tutorial.py @@ -50,7 +50,7 @@ import torch import torch.nn as nn -import torch.nn.utils.parametrizations as P +import torch.nn.utils.parametrize as P def symmetric(X): return X.triu() + X.triu(1).transpose(-1, -2) @@ -94,7 +94,7 @@ def forward(self, x): # Introduction to Parametrizations # -------------------------------- # -# Let's start by reimplementing the code above using ``torch.nn.utils.parametrizations``. +# Let's start by reimplementing the code above using ``torch.nn.utils.parametrize``. # The only thing that we have to do is to write the parametrization as a regular ``nn.Module`` class Symmetric(nn.Module): def forward(self, X): From 93d1cd2338dffeeff8dca9a35e520d7715821f66 Mon Sep 17 00:00:00 2001 From: lezcano Date: Thu, 25 Mar 2021 20:57:58 +0000 Subject: [PATCH 04/17] minor --- intermediate_source/parametrizations_tutorial.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/intermediate_source/parametrizations_tutorial.py b/intermediate_source/parametrizations_tutorial.py index 40ec6a38c9f..021c81b6829 100644 --- a/intermediate_source/parametrizations_tutorial.py +++ b/intermediate_source/parametrizations_tutorial.py @@ -182,12 +182,12 @@ def forward(self, X): layer = nn.Linear(2, 3) P.register_parametrization(layer, "weight", NoisyParametrization()) print("Here, layer.weight is recomputed every time we call it") -Y = layer.weight + layer.weight.T -l = layer.weight.sum() +foo = layer.weight + layer.weight.T +bar = layer.weight.sum() with P.cached(): print("Here, it is computed just the first time layer.weight is called") - Y = layer.weight + layer.weight.T - l = layer.weight.sum() + foo = layer.weight + layer.weight.T + bar = layer.weight.sum() ############################################################################### # Composing Parametrizations From 3390cb7cb73d527ff9164dc1f8e6bc42c625cb38 Mon Sep 17 00:00:00 2001 From: lezcano Date: Thu, 25 Mar 2021 21:29:36 +0000 Subject: [PATCH 05/17] Proper version number --- intermediate_source/parametrizations_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/parametrizations_tutorial.py b/intermediate_source/parametrizations_tutorial.py index 021c81b6829..a292e3ebaee 100644 --- a/intermediate_source/parametrizations_tutorial.py +++ b/intermediate_source/parametrizations_tutorial.py @@ -38,7 +38,7 @@ Requirements ------------ -``"torch>=1.9.0"`` +``"torch>=1.9.0a0+git7aeee28"`` Implementing Parametrizations by Hand ------------------------------------- From 542d4d751ab0ecedb56f5a8597edb560a34f3130 Mon Sep 17 00:00:00 2001 From: lezcano Date: Fri, 26 Mar 2021 10:07:47 +0000 Subject: [PATCH 06/17] Fuzzy spellcheck --- intermediate_source/parametrizations_tutorial.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/intermediate_source/parametrizations_tutorial.py b/intermediate_source/parametrizations_tutorial.py index a292e3ebaee..b27d235cc05 100644 --- a/intermediate_source/parametrizations_tutorial.py +++ b/intermediate_source/parametrizations_tutorial.py @@ -30,10 +30,10 @@ using a function that maps matrices to orthogonal matrices. In the case of weight and spectral normalization, they divide the original parameter by its norm. -More genreally, all these examples use a function to put extra structure on the parameters. +More generally, all these examples use a function to put extra structure on the parameters. In other words, they use a function to constrain the parameters. -In this tutorial, you will learn how to implement and use this patern to write and +In this tutorial, you will learn how to implement and use this pattern to write and put constraints on your model. Doing so is as easy as writing your own ``nn.Module``. Requirements @@ -173,7 +173,7 @@ def forward(self, X): # # Caching the value of a parametrization # -------------------------------------- -# Parametrizations come with an in-built caching system via the context manager ``P.cached()`` +# Parametrizations come with an inbuilt caching system via the context manager ``P.cached()`` class NoisyParametrization(nn.Module): def forward(self, X): print("Computing the Parametrization") @@ -302,7 +302,7 @@ def right_inverse(self, A): ############################################################################### # The name of this method comes from the fact that we would often expect -# that ``forward(right_inverse(X)) == X``. This is a direct way of rewritting that +# that ``forward(right_inverse(X)) == X``. This is a direct way of rewriting that # the forward afer the initalization with value ``X`` should return the value ``X``. # This constraint is not enforced in the code. In fact, at times, it might be of # interest to relax this relation. For example, consider the following implementation From 70ae33491894f54888d67cf56aeb47cb090f50df Mon Sep 17 00:00:00 2001 From: lezcano Date: Fri, 26 Mar 2021 10:15:03 +0000 Subject: [PATCH 07/17] version --- intermediate_source/parametrizations_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/parametrizations_tutorial.py b/intermediate_source/parametrizations_tutorial.py index b27d235cc05..70ccc7095fc 100644 --- a/intermediate_source/parametrizations_tutorial.py +++ b/intermediate_source/parametrizations_tutorial.py @@ -38,7 +38,7 @@ Requirements ------------ -``"torch>=1.9.0a0+git7aeee28"`` +``"torch>=1.9.0a0+7aeee28"`` Implementing Parametrizations by Hand ------------------------------------- From ea59b2b471e81133f6b5b9da59744475a06839b7 Mon Sep 17 00:00:00 2001 From: lezcano Date: Mon, 29 Mar 2021 13:27:44 +0100 Subject: [PATCH 08/17] Remove _tutorial from name --- .../parametrizations_tutorial.py | 353 ------------------ 1 file changed, 353 deletions(-) delete mode 100644 intermediate_source/parametrizations_tutorial.py diff --git a/intermediate_source/parametrizations_tutorial.py b/intermediate_source/parametrizations_tutorial.py deleted file mode 100644 index 70ccc7095fc..00000000000 --- a/intermediate_source/parametrizations_tutorial.py +++ /dev/null @@ -1,353 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Parametrizations Tutorial -========================= -**Author**: `Mario Lezcano `_ - -Regularizing deep-learning models is a surprisingly challenging task. -Classical techniques such as penalty methods often fall short when applied -on deep models due to the complexity of the function being optimized. -This is particularly problematic when working with ill-conditioned models. -Examples of these are RNNs trained on long sequences and GANs. A number -of techniques have been proposed in the recent years to regularize these -models and improve their convergence. On recurrent models, it has been -proposed to control the singular values of the recurrent kernel for the -RNN to be well-conditioned. This can be achieved, for example, by making -the recurrent kernel `orthogonal `_. -Another way to regularize recurrent models is via -"`weight normalization `_". -This approach proposes to decouple the learning of the parameters from the -learning of their scale. To do so, the parameter is divided by its -`Frobenius norm `_. -A similar regularization was proposed for GANs under the name of -"`spectral normalization `_". This method -controls the Lipschitz constant of the network by dividing its parameters by -their `spectral norm `_, -rather than its Frobenius norm. - -All these methods have a pattern in common. They all transform a parameter -in an appropriate way before using it. In the first case, they make it orthogonal by -using a function that maps matrices to orthogonal matrices. In the case of weight -and spectral normalization, they divide the original parameter by its norm. - -More generally, all these examples use a function to put extra structure on the parameters. -In other words, they use a function to constrain the parameters. - -In this tutorial, you will learn how to implement and use this pattern to write and -put constraints on your model. Doing so is as easy as writing your own ``nn.Module``. - -Requirements ------------- -``"torch>=1.9.0a0+7aeee28"`` - -Implementing Parametrizations by Hand -------------------------------------- - -Assume that we want to have a square linear layer with symmetric weights, that is, -with weights :math:`X` such that :math:`X = X^{\intercal}`. One way to do so is -to copy the upper triangular part of the matrix into its lower triangular part -""" - -import torch -import torch.nn as nn -import torch.nn.utils.parametrize as P - -def symmetric(X): - return X.triu() + X.triu(1).transpose(-1, -2) - -X = torch.rand(3, 3) -A = symmetric(X) -print(A) -assert torch.allclose(A, A.T) - -############################################################################### -# We can then use this idea to implement a linear layer with symmetric weights: -class LinearSymmetric(nn.Module): - def __init__(self, n_features): - super().__init__() - self.weight = nn.Parameter(torch.rand(n_features, n_features)) - - def forward(self, x): - A = symmetric(self.weight) - return x @ A - -############################################################################### -# The layer can be then used as a regular linear layer -layer = LinearSymmetric(3) -out = layer(torch.rand(8, 3)) - -############################################################################### -# This implementation, although correct and self-contained, presents a number of problems: -# -# 1) It reimplements the layer. We had to implement the linear layer as ``x @ A``. This is -# not very problematic for a linear layer, but imagine having to reimplement a CNN or a -# Transformer... -# 2) It does not separate the layer and the parametrization. If the parametrization were -# more difficult, we would have to rewrite its code for each layer that we want to use it -# in. -# 3) It recomputes the parametrization everytime forward is called. If we used the layer -# several times during the forward pass, (imagine the recurrent kernel of an RNN) we would -# be recomputing the same ``A`` every time the layer is called. -# -# Parametrizations come to solve all these and other problems. -# -# Introduction to Parametrizations -# -------------------------------- -# -# Let's start by reimplementing the code above using ``torch.nn.utils.parametrize``. -# The only thing that we have to do is to write the parametrization as a regular ``nn.Module`` -class Symmetric(nn.Module): - def forward(self, X): - return X.triu() + X.triu(1).transpose(-1, -2) - -############################################################################### -# This is all we need to do. Once we have this, we can transform any regular layer into a -# symmetric layer by doing -layer = nn.Linear(3, 3) -P.register_parametrization(layer, "weight", Symmetric()) - -############################################################################### -# Now, the matrix of the linear layer is symmetric -A = layer.weight -print(A) -assert torch.allclose(A, A.T) - -############################################################################### -# We can do the same thing with any other layer. For example, we can create a CNN with -# skew-symmetric kernels (:math:`X = -X^{\intercal}`). We use a similar parametrization, -# copying minus the upper triangular part into the lower-triangular part -class Skew(nn.Module): - def forward(self, X): - A = X.triu(1) - return A - A.transpose(-1, -2) - - -cnn = nn.Conv2D(in_channels=5, out_channels=8, kernel_size=3) -P.register_parametrization(layer, "weight", Skew()) -# Print a few kernels -print(cnn.weight[0, 1]) -print(cnn.weight[2, 2]) - -############################################################################### -# Inspecting a parametrized module -# -------------------------------- -# When a module is parametrized, we find that the module has changed a bit. -# We may observe this by simply printing the module -layer = nn.Linear(3, 3) -print(f"Unparametrized:\n{layer}") -P.register_parametrization(layer, "weight", Symmetric()) -print(f"Parametrized:\n{layer}") - -############################################################################### -# We see that the ``Symmetric`` class has been registered under a ``parametrizations`` attribute. -# This ``parametrizations`` attribute is an ``nn.ModuleList``, and it can be accessed as such -print(layer.parametrizations.weight[0]) - -############################################################################### -# Note that each element in the `ModuleList` is itself a list, and we have to select the first -# element of this list. It will be clear later the reason for this, when we see how to contactenate -# paramtrizations. -# -# Something that we may notice is that, if we print the parameters, we see that the -# parameter ``weight`` has been moved -print(dict(layer.named_parameters())) - -############################################################################### -# It now sits under ``layer.parametrizations.weight.original`` -print(layer.parametrizations.weight.original) - -############################################################################### -# Besides these two small differences, the parametrization is doing exactly the same -# as our manual implementation -symmetric = Symmetric() -assert torch.allclose(layer.weight, symmetric(layer.parametrizations.weight.original)) - -############################################################################### -# Parametrizations are first-class citizens -# ----------------------------------------- -# Since ``layer.parametrizations`` is an `nn.ModuleList`, it means that the parametrizations -# are properly registered as submodules of the original module. As such, the same rules -# for registering parameters in a module apply to register a parametrization. -# For example, if a parametrization has parameters, these will be moved from CPU -# to CUDA when calling ``model = model.cuda()``. -# -# Caching the value of a parametrization -# -------------------------------------- -# Parametrizations come with an inbuilt caching system via the context manager ``P.cached()`` -class NoisyParametrization(nn.Module): - def forward(self, X): - print("Computing the Parametrization") - return X - -layer = nn.Linear(2, 3) -P.register_parametrization(layer, "weight", NoisyParametrization()) -print("Here, layer.weight is recomputed every time we call it") -foo = layer.weight + layer.weight.T -bar = layer.weight.sum() -with P.cached(): - print("Here, it is computed just the first time layer.weight is called") - foo = layer.weight + layer.weight.T - bar = layer.weight.sum() - -############################################################################### -# Composing Parametrizations -# -------------------------- -# Concatenating two parametrizations is as easy as registering them on the same tensor. -# We may use this to create complex parametrizations from simple ones. For example, the -# `Cayley map `_ -# maps the skew-symmetric matrices to the orthogonal matrices of positive determinant. We can -# concatenate ``Skew`` and a parametrization that implements the Cayley map to get a layer with -# orthogonal weight -class CayleyMap(nn.Module): - def __init__(self, n): - self.register_buffer("Id", torch.eye(n)) - - def forward(self, X): - # (I + X)(I - X)^{-1} - return torch.solve(self.Id + X, self.Id - X).solution - -layer = nn.Linear(3, 3) -P.register_parametrization(layer, "weight", Skew()) -P.register_parametrization(layer, "weight", CayleyMap(3)) -X = layer.weight -assert torch.allclose(X.T @ X, torch.eye(3)) # X is orthogonal - -############################################################################### -# This may also be used to prune a parametrized module, or to reuse parametrizations. For example, -# we may use the fact that the exponential of matrices maps the symmetric matrices to the -# Symmetric Positive Definite (SPD) matrices, and the skew-symmetric matrices to the orthogonal -# matrices. Using these two facts, we may reuse the parametrizations -class MatrixExponential(nn.Module): - def forward(X): - return torch.matrix_exp(X) - -layer_orthogonal = nn.Linear(3, 3) -P.register_parametrization(layer_orthogonal, "weight", Skew()) -P.register_parametrization(layer_orthogonal, "weight", MatrixExponential()) -X = layer_orthogonal.weight -assert torch.allclose(X.T @ X, torch.eye(3)) # X is orthogonal - -layer_spd = nn.Linear(3, 3) -P.register_parametrization(layer_spd, "weight", Symmetric()) -P.register_parametrization(layer_spd, "weight", MatrixExponential()) -X = layer_spd.weight -assert torch.allclose(X, X.T) # X is symmetric -assert (torch.symeig(X).eigenvalues > 0.).all() # X is positive definite - -############################################################################### -# Intializing Parametrizations -# ---------------------------- -# Parametrizations come with a mechanism to initialize them. If we implement a method -# ``right_inverse`` with signature -# -# .. code-block:: python -# -# def right_inverse(self, X: Tensor) -> Tensor -# -# it will be used when assigning to the parametrized tensor. -# -# Let's upgrade our implementation of the ``Skew`` class to support this -class Skew(nn.Module): - def forward(self, X): - A = X.triu(1) - return A - A.transpose(-1, -2) - - def is_skew(self, X): - return torch.allclose(X, -X.transpose(-1, -2)) - - def right_inverse(self, A): - if not self.is_skew(A): - raise ValueError(f"The provided matrix {A} is not skew-symmetric") - return A.triu(1) - -############################################################################### -# We may now initialize a layer that is parametrized with ``Skew`` -layer = nn.Linear(3, 3) -P.register_parametrization(layer, "weight", Skew()) -X = torch.rand(3, 3) -X = X - X.T # X is now skew-symmetric -layer.weight = X # Initialize layer.weight to be X -assert torch.allclose(layer.weight, X) # layer.weight == X - -############################################################################### -# This ``right_inverse`` works as expected when we compose parametrizations. To see this, let's -# upgrade the Cayley parametrization to also support being initialized -class CayleyMap(nn.Module): - def __init__(self, n): - self.register_buffer("Id", torch.eye(n)) - - def forward(self, X): - # (I + X)(I - X)^{-1} - return torch.solve(self.Id + X, self.Id - X).solution - - def right_inverse(self, A): - # See https://en.wikipedia.org/wiki/Cayley_transform#Matrix_map - # (X - I)(X + I)^{-1} - return torch.solve(X - self.Id, self.Id + X).solution - -layer_orthogonal = nn.Linear(3, 3) -P.register_parametrization(layer_orthogonal, "weight", Skew()) -P.register_parametrization(layer_orthogonal, "weight", CayleyMap(3)) -# Sample an orthogonal matrix with positive determinant -X = torch.empty(3, 3) -nn.init.orthogonal_(X) -if X.det() < 0.: - X[0].neg_() -layer_orthogonal.weight = X -assert torch.allclose(X, layer_orthogonal.weight) # layer_orthogonal.weight == X - -############################################################################### -# This initialization step can be written more succinctly as -layer_orthogonal.weight = nn.init.orthogonal_(layer_orthogonal.weight) - -############################################################################### -# The name of this method comes from the fact that we would often expect -# that ``forward(right_inverse(X)) == X``. This is a direct way of rewriting that -# the forward afer the initalization with value ``X`` should return the value ``X``. -# This constraint is not enforced in the code. In fact, at times, it might be of -# interest to relax this relation. For example, consider the following implementation -# of a randomized pruning method: -class PruningParametrization(nn.Module): - def __init__(self, X, p_drop=0.2): - # sample zeros with probability p_drop - mask = torch.full_like(X, 1.0 - p_drop) - self.mask = torch.bernoulli(mask) - - def forward(self, X): - return X * self.mask - - def right_inverse(self, A): - return A - -############################################################################### -# In this case, it is not true that ``forward(right_inverse(X)) == X``. This is -# only true when the matrix ``A`` passed to ``right_inverse`` has zeros in the -# same positions as the mask. Even then, if we assign a tensor to a pruned parameter, -# it will comes as no surprise that tensor will be, in fact, pruned.: -# -# Removing a Parametrization -# -------------------------- -# We may remove a parametrization from a module by using ``P.remove_parametrization()`` -layer = nn.Linear(3, 3) -print(layer) -print(layer.weight) -P.register_parametrization(layer, "weight", Skew()) -print(layer) -print(layer.weight) -P.remove_parametrization(layer, "weight") -print(layer) -print(layer.weight) - -############################################################################### -# While doing so, we may choose to leave the original parameter (i.e. that in -# ``layer.parametriations.weight.original``) rather than its parametrized version -# by setting the flag ``leave_parametrized=False`` -layer = nn.Linear(3, 3) -print(layer) -print(layer.weight) -P.register_parametrization(layer, "weight", Skew()) -print(layer) -print(layer.weight) -P.remove_parametrization(layer, "weight", leave_parametrized=False) -print(layer) -print(layer.weight) From f3d1204ca8f828713c8555f4080864fb3aac7838 Mon Sep 17 00:00:00 2001 From: lezcano Date: Mon, 29 Mar 2021 14:08:32 +0100 Subject: [PATCH 09/17] Forgot to add the file... --- intermediate_source/parametrizations.py | 353 ++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 intermediate_source/parametrizations.py diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py new file mode 100644 index 00000000000..70ccc7095fc --- /dev/null +++ b/intermediate_source/parametrizations.py @@ -0,0 +1,353 @@ +# -*- coding: utf-8 -*- +""" +Parametrizations Tutorial +========================= +**Author**: `Mario Lezcano `_ + +Regularizing deep-learning models is a surprisingly challenging task. +Classical techniques such as penalty methods often fall short when applied +on deep models due to the complexity of the function being optimized. +This is particularly problematic when working with ill-conditioned models. +Examples of these are RNNs trained on long sequences and GANs. A number +of techniques have been proposed in the recent years to regularize these +models and improve their convergence. On recurrent models, it has been +proposed to control the singular values of the recurrent kernel for the +RNN to be well-conditioned. This can be achieved, for example, by making +the recurrent kernel `orthogonal `_. +Another way to regularize recurrent models is via +"`weight normalization `_". +This approach proposes to decouple the learning of the parameters from the +learning of their scale. To do so, the parameter is divided by its +`Frobenius norm `_. +A similar regularization was proposed for GANs under the name of +"`spectral normalization `_". This method +controls the Lipschitz constant of the network by dividing its parameters by +their `spectral norm `_, +rather than its Frobenius norm. + +All these methods have a pattern in common. They all transform a parameter +in an appropriate way before using it. In the first case, they make it orthogonal by +using a function that maps matrices to orthogonal matrices. In the case of weight +and spectral normalization, they divide the original parameter by its norm. + +More generally, all these examples use a function to put extra structure on the parameters. +In other words, they use a function to constrain the parameters. + +In this tutorial, you will learn how to implement and use this pattern to write and +put constraints on your model. Doing so is as easy as writing your own ``nn.Module``. + +Requirements +------------ +``"torch>=1.9.0a0+7aeee28"`` + +Implementing Parametrizations by Hand +------------------------------------- + +Assume that we want to have a square linear layer with symmetric weights, that is, +with weights :math:`X` such that :math:`X = X^{\intercal}`. One way to do so is +to copy the upper triangular part of the matrix into its lower triangular part +""" + +import torch +import torch.nn as nn +import torch.nn.utils.parametrize as P + +def symmetric(X): + return X.triu() + X.triu(1).transpose(-1, -2) + +X = torch.rand(3, 3) +A = symmetric(X) +print(A) +assert torch.allclose(A, A.T) + +############################################################################### +# We can then use this idea to implement a linear layer with symmetric weights: +class LinearSymmetric(nn.Module): + def __init__(self, n_features): + super().__init__() + self.weight = nn.Parameter(torch.rand(n_features, n_features)) + + def forward(self, x): + A = symmetric(self.weight) + return x @ A + +############################################################################### +# The layer can be then used as a regular linear layer +layer = LinearSymmetric(3) +out = layer(torch.rand(8, 3)) + +############################################################################### +# This implementation, although correct and self-contained, presents a number of problems: +# +# 1) It reimplements the layer. We had to implement the linear layer as ``x @ A``. This is +# not very problematic for a linear layer, but imagine having to reimplement a CNN or a +# Transformer... +# 2) It does not separate the layer and the parametrization. If the parametrization were +# more difficult, we would have to rewrite its code for each layer that we want to use it +# in. +# 3) It recomputes the parametrization everytime forward is called. If we used the layer +# several times during the forward pass, (imagine the recurrent kernel of an RNN) we would +# be recomputing the same ``A`` every time the layer is called. +# +# Parametrizations come to solve all these and other problems. +# +# Introduction to Parametrizations +# -------------------------------- +# +# Let's start by reimplementing the code above using ``torch.nn.utils.parametrize``. +# The only thing that we have to do is to write the parametrization as a regular ``nn.Module`` +class Symmetric(nn.Module): + def forward(self, X): + return X.triu() + X.triu(1).transpose(-1, -2) + +############################################################################### +# This is all we need to do. Once we have this, we can transform any regular layer into a +# symmetric layer by doing +layer = nn.Linear(3, 3) +P.register_parametrization(layer, "weight", Symmetric()) + +############################################################################### +# Now, the matrix of the linear layer is symmetric +A = layer.weight +print(A) +assert torch.allclose(A, A.T) + +############################################################################### +# We can do the same thing with any other layer. For example, we can create a CNN with +# skew-symmetric kernels (:math:`X = -X^{\intercal}`). We use a similar parametrization, +# copying minus the upper triangular part into the lower-triangular part +class Skew(nn.Module): + def forward(self, X): + A = X.triu(1) + return A - A.transpose(-1, -2) + + +cnn = nn.Conv2D(in_channels=5, out_channels=8, kernel_size=3) +P.register_parametrization(layer, "weight", Skew()) +# Print a few kernels +print(cnn.weight[0, 1]) +print(cnn.weight[2, 2]) + +############################################################################### +# Inspecting a parametrized module +# -------------------------------- +# When a module is parametrized, we find that the module has changed a bit. +# We may observe this by simply printing the module +layer = nn.Linear(3, 3) +print(f"Unparametrized:\n{layer}") +P.register_parametrization(layer, "weight", Symmetric()) +print(f"Parametrized:\n{layer}") + +############################################################################### +# We see that the ``Symmetric`` class has been registered under a ``parametrizations`` attribute. +# This ``parametrizations`` attribute is an ``nn.ModuleList``, and it can be accessed as such +print(layer.parametrizations.weight[0]) + +############################################################################### +# Note that each element in the `ModuleList` is itself a list, and we have to select the first +# element of this list. It will be clear later the reason for this, when we see how to contactenate +# paramtrizations. +# +# Something that we may notice is that, if we print the parameters, we see that the +# parameter ``weight`` has been moved +print(dict(layer.named_parameters())) + +############################################################################### +# It now sits under ``layer.parametrizations.weight.original`` +print(layer.parametrizations.weight.original) + +############################################################################### +# Besides these two small differences, the parametrization is doing exactly the same +# as our manual implementation +symmetric = Symmetric() +assert torch.allclose(layer.weight, symmetric(layer.parametrizations.weight.original)) + +############################################################################### +# Parametrizations are first-class citizens +# ----------------------------------------- +# Since ``layer.parametrizations`` is an `nn.ModuleList`, it means that the parametrizations +# are properly registered as submodules of the original module. As such, the same rules +# for registering parameters in a module apply to register a parametrization. +# For example, if a parametrization has parameters, these will be moved from CPU +# to CUDA when calling ``model = model.cuda()``. +# +# Caching the value of a parametrization +# -------------------------------------- +# Parametrizations come with an inbuilt caching system via the context manager ``P.cached()`` +class NoisyParametrization(nn.Module): + def forward(self, X): + print("Computing the Parametrization") + return X + +layer = nn.Linear(2, 3) +P.register_parametrization(layer, "weight", NoisyParametrization()) +print("Here, layer.weight is recomputed every time we call it") +foo = layer.weight + layer.weight.T +bar = layer.weight.sum() +with P.cached(): + print("Here, it is computed just the first time layer.weight is called") + foo = layer.weight + layer.weight.T + bar = layer.weight.sum() + +############################################################################### +# Composing Parametrizations +# -------------------------- +# Concatenating two parametrizations is as easy as registering them on the same tensor. +# We may use this to create complex parametrizations from simple ones. For example, the +# `Cayley map `_ +# maps the skew-symmetric matrices to the orthogonal matrices of positive determinant. We can +# concatenate ``Skew`` and a parametrization that implements the Cayley map to get a layer with +# orthogonal weight +class CayleyMap(nn.Module): + def __init__(self, n): + self.register_buffer("Id", torch.eye(n)) + + def forward(self, X): + # (I + X)(I - X)^{-1} + return torch.solve(self.Id + X, self.Id - X).solution + +layer = nn.Linear(3, 3) +P.register_parametrization(layer, "weight", Skew()) +P.register_parametrization(layer, "weight", CayleyMap(3)) +X = layer.weight +assert torch.allclose(X.T @ X, torch.eye(3)) # X is orthogonal + +############################################################################### +# This may also be used to prune a parametrized module, or to reuse parametrizations. For example, +# we may use the fact that the exponential of matrices maps the symmetric matrices to the +# Symmetric Positive Definite (SPD) matrices, and the skew-symmetric matrices to the orthogonal +# matrices. Using these two facts, we may reuse the parametrizations +class MatrixExponential(nn.Module): + def forward(X): + return torch.matrix_exp(X) + +layer_orthogonal = nn.Linear(3, 3) +P.register_parametrization(layer_orthogonal, "weight", Skew()) +P.register_parametrization(layer_orthogonal, "weight", MatrixExponential()) +X = layer_orthogonal.weight +assert torch.allclose(X.T @ X, torch.eye(3)) # X is orthogonal + +layer_spd = nn.Linear(3, 3) +P.register_parametrization(layer_spd, "weight", Symmetric()) +P.register_parametrization(layer_spd, "weight", MatrixExponential()) +X = layer_spd.weight +assert torch.allclose(X, X.T) # X is symmetric +assert (torch.symeig(X).eigenvalues > 0.).all() # X is positive definite + +############################################################################### +# Intializing Parametrizations +# ---------------------------- +# Parametrizations come with a mechanism to initialize them. If we implement a method +# ``right_inverse`` with signature +# +# .. code-block:: python +# +# def right_inverse(self, X: Tensor) -> Tensor +# +# it will be used when assigning to the parametrized tensor. +# +# Let's upgrade our implementation of the ``Skew`` class to support this +class Skew(nn.Module): + def forward(self, X): + A = X.triu(1) + return A - A.transpose(-1, -2) + + def is_skew(self, X): + return torch.allclose(X, -X.transpose(-1, -2)) + + def right_inverse(self, A): + if not self.is_skew(A): + raise ValueError(f"The provided matrix {A} is not skew-symmetric") + return A.triu(1) + +############################################################################### +# We may now initialize a layer that is parametrized with ``Skew`` +layer = nn.Linear(3, 3) +P.register_parametrization(layer, "weight", Skew()) +X = torch.rand(3, 3) +X = X - X.T # X is now skew-symmetric +layer.weight = X # Initialize layer.weight to be X +assert torch.allclose(layer.weight, X) # layer.weight == X + +############################################################################### +# This ``right_inverse`` works as expected when we compose parametrizations. To see this, let's +# upgrade the Cayley parametrization to also support being initialized +class CayleyMap(nn.Module): + def __init__(self, n): + self.register_buffer("Id", torch.eye(n)) + + def forward(self, X): + # (I + X)(I - X)^{-1} + return torch.solve(self.Id + X, self.Id - X).solution + + def right_inverse(self, A): + # See https://en.wikipedia.org/wiki/Cayley_transform#Matrix_map + # (X - I)(X + I)^{-1} + return torch.solve(X - self.Id, self.Id + X).solution + +layer_orthogonal = nn.Linear(3, 3) +P.register_parametrization(layer_orthogonal, "weight", Skew()) +P.register_parametrization(layer_orthogonal, "weight", CayleyMap(3)) +# Sample an orthogonal matrix with positive determinant +X = torch.empty(3, 3) +nn.init.orthogonal_(X) +if X.det() < 0.: + X[0].neg_() +layer_orthogonal.weight = X +assert torch.allclose(X, layer_orthogonal.weight) # layer_orthogonal.weight == X + +############################################################################### +# This initialization step can be written more succinctly as +layer_orthogonal.weight = nn.init.orthogonal_(layer_orthogonal.weight) + +############################################################################### +# The name of this method comes from the fact that we would often expect +# that ``forward(right_inverse(X)) == X``. This is a direct way of rewriting that +# the forward afer the initalization with value ``X`` should return the value ``X``. +# This constraint is not enforced in the code. In fact, at times, it might be of +# interest to relax this relation. For example, consider the following implementation +# of a randomized pruning method: +class PruningParametrization(nn.Module): + def __init__(self, X, p_drop=0.2): + # sample zeros with probability p_drop + mask = torch.full_like(X, 1.0 - p_drop) + self.mask = torch.bernoulli(mask) + + def forward(self, X): + return X * self.mask + + def right_inverse(self, A): + return A + +############################################################################### +# In this case, it is not true that ``forward(right_inverse(X)) == X``. This is +# only true when the matrix ``A`` passed to ``right_inverse`` has zeros in the +# same positions as the mask. Even then, if we assign a tensor to a pruned parameter, +# it will comes as no surprise that tensor will be, in fact, pruned.: +# +# Removing a Parametrization +# -------------------------- +# We may remove a parametrization from a module by using ``P.remove_parametrization()`` +layer = nn.Linear(3, 3) +print(layer) +print(layer.weight) +P.register_parametrization(layer, "weight", Skew()) +print(layer) +print(layer.weight) +P.remove_parametrization(layer, "weight") +print(layer) +print(layer.weight) + +############################################################################### +# While doing so, we may choose to leave the original parameter (i.e. that in +# ``layer.parametriations.weight.original``) rather than its parametrized version +# by setting the flag ``leave_parametrized=False`` +layer = nn.Linear(3, 3) +print(layer) +print(layer.weight) +P.register_parametrization(layer, "weight", Skew()) +print(layer) +print(layer.weight) +P.remove_parametrization(layer, "weight", leave_parametrized=False) +print(layer) +print(layer.weight) From a2dec3f2e303b4b1cb3d5a2f49722f13e3ed695f Mon Sep 17 00:00:00 2001 From: lezcano Date: Wed, 7 Apr 2021 12:30:48 +0100 Subject: [PATCH 10/17] Rename parametrizations_tutorial by parametrizations everywhere Add Alban's suggestions Correct the code Beter spacing after enumeration --- index.rst | 6 +- intermediate_source/parametrizations.py | 177 ++++++++++++++---------- 2 files changed, 107 insertions(+), 76 deletions(-) diff --git a/index.rst b/index.rst index ba695bedfbf..dbb602cae00 100644 --- a/index.rst +++ b/index.rst @@ -326,9 +326,9 @@ Welcome to PyTorch Tutorials .. customcarditem:: :header: Parametrizations Tutorial - :card_description: Learn how to use torch.nn.utils.parametrizations to put constriants in your parameters (e.g. make them orthogonal, symmetric positive definite, low-rank...) + :card_description: Learn how to use torch.nn.utils.parametrizations to put constriants on your parameters (e.g. make them orthogonal, symmetric positive definite, low-rank...) :image: _static/img/thumbnails/cropped/parametrizations.png - :link: intermediate/parametrizations_tutorial.html + :link: intermediate/parametrizations.html :tags: Model-Optimization,Best-Practice .. customcarditem:: @@ -627,7 +627,7 @@ Additional Resources beginner/profiler beginner/hyperparameter_tuning_tutorial - intermediate/parametrizations_tutorial + intermediate/parametrizations intermediate/pruning_tutorial advanced/dynamic_quantization_tutorial intermediate/dynamic_quantization_bert_tutorial diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py index 70ccc7095fc..bbfae335faf 100644 --- a/intermediate_source/parametrizations.py +++ b/intermediate_source/parametrizations.py @@ -17,13 +17,14 @@ Another way to regularize recurrent models is via "`weight normalization `_". This approach proposes to decouple the learning of the parameters from the -learning of their scale. To do so, the parameter is divided by its -`Frobenius norm `_. +learning of their norm. To do so, the parameter is divided by its +`Frobenius norm `_ +and a separate parameter encoding their norm is learnt. A similar regularization was proposed for GANs under the name of "`spectral normalization `_". This method controls the Lipschitz constant of the network by dividing its parameters by their `spectral norm `_, -rather than its Frobenius norm. +rather than their Frobenius norm. All these methods have a pattern in common. They all transform a parameter in an appropriate way before using it. In the first case, they make it orthogonal by @@ -36,9 +37,7 @@ In this tutorial, you will learn how to implement and use this pattern to write and put constraints on your model. Doing so is as easy as writing your own ``nn.Module``. -Requirements ------------- -``"torch>=1.9.0a0+7aeee28"`` +Requirements: ``torch>=1.9.0`` Implementing Parametrizations by Hand ------------------------------------- @@ -50,15 +49,15 @@ import torch import torch.nn as nn -import torch.nn.utils.parametrize as P +import torch.nn.utils.parametrize as parametrize def symmetric(X): return X.triu() + X.triu(1).transpose(-1, -2) X = torch.rand(3, 3) A = symmetric(X) -print(A) -assert torch.allclose(A, A.T) +assert torch.allclose(A, A.T) # A is symmetric +print(A) # Quick visual check ############################################################################### # We can then use this idea to implement a linear layer with symmetric weights: @@ -89,11 +88,11 @@ def forward(self, x): # several times during the forward pass, (imagine the recurrent kernel of an RNN) we would # be recomputing the same ``A`` every time the layer is called. # -# Parametrizations come to solve all these and other problems. -# # Introduction to Parametrizations # -------------------------------- # +# Parametrizations come to solve all these and other problems. +# # Let's start by reimplementing the code above using ``torch.nn.utils.parametrize``. # The only thing that we have to do is to write the parametrization as a regular ``nn.Module`` class Symmetric(nn.Module): @@ -104,7 +103,7 @@ def forward(self, X): # This is all we need to do. Once we have this, we can transform any regular layer into a # symmetric layer by doing layer = nn.Linear(3, 3) -P.register_parametrization(layer, "weight", Symmetric()) +parametrize.register_parametrization(layer, "weight", Symmetric()) ############################################################################### # Now, the matrix of the linear layer is symmetric @@ -114,16 +113,17 @@ def forward(self, X): ############################################################################### # We can do the same thing with any other layer. For example, we can create a CNN with -# skew-symmetric kernels (:math:`X = -X^{\intercal}`). We use a similar parametrization, -# copying minus the upper triangular part into the lower-triangular part +# `skew-symmetric `_ kernels. +# We use a similar parametrization, copying minus the upper triangular part into the +# lower-triangular part class Skew(nn.Module): def forward(self, X): A = X.triu(1) return A - A.transpose(-1, -2) -cnn = nn.Conv2D(in_channels=5, out_channels=8, kernel_size=3) -P.register_parametrization(layer, "weight", Skew()) +cnn = nn.Conv2d(in_channels=5, out_channels=8, kernel_size=3) +parametrize.register_parametrization(layer, "weight", Skew()) # Print a few kernels print(cnn.weight[0, 1]) print(cnn.weight[2, 2]) @@ -131,24 +131,34 @@ def forward(self, X): ############################################################################### # Inspecting a parametrized module # -------------------------------- -# When a module is parametrized, we find that the module has changed a bit. -# We may observe this by simply printing the module +# +# When a module is parametrized, we find that the module has changed in two ways: +# +# 1) It has a new ``module.parametrizations`` attribute +# +# 2) The weight has been moved to ``module.parametrizations.weight.original`` +# +# | +# We may observe this first change by printing the module layer = nn.Linear(3, 3) -print(f"Unparametrized:\n{layer}") -P.register_parametrization(layer, "weight", Symmetric()) -print(f"Parametrized:\n{layer}") +print("Unparametrized:") +print(layer) +parametrize.register_parametrization(layer, "weight", Symmetric()) +print("Parametrized:") +print(layer) ############################################################################### # We see that the ``Symmetric`` class has been registered under a ``parametrizations`` attribute. -# This ``parametrizations`` attribute is an ``nn.ModuleList``, and it can be accessed as such -print(layer.parametrizations.weight[0]) +# This ``parametrizations`` attribute is an ``nn.ModuleDict``, and it can be accessed as such +print(layer.parametrizations.weight) ############################################################################### -# Note that each element in the `ModuleList` is itself a list, and we have to select the first -# element of this list. It will be clear later the reason for this, when we see how to contactenate -# paramtrizations. +# Note that each element in the ``ModuleDict`` is of type ``ParametrizationList``. +# This ``ParametrizationList`` behaves like an ``nn.Sequential`` that also holds the weight. +# element of this list. It will be clear later the reason for this, when we see how to concatenate +# parametrizations. # -# Something that we may notice is that, if we print the parameters, we see that the +# The other thing that we notice is that, if we print the parameters, we see that the # parameter ``weight`` has been moved print(dict(layer.named_parameters())) @@ -160,12 +170,14 @@ def forward(self, X): # Besides these two small differences, the parametrization is doing exactly the same # as our manual implementation symmetric = Symmetric() -assert torch.allclose(layer.weight, symmetric(layer.parametrizations.weight.original)) +weight_orig = layer.parametrizations.weight.original +assert torch.allclose(layer.weight, symmetric(weight_orig)) ############################################################################### # Parametrizations are first-class citizens # ----------------------------------------- -# Since ``layer.parametrizations`` is an `nn.ModuleList`, it means that the parametrizations +# +# Since ``layer.parametrizations`` is an ``nn.ModuleList``, it means that the parametrizations # are properly registered as submodules of the original module. As such, the same rules # for registering parameters in a module apply to register a parametrization. # For example, if a parametrization has parameters, these will be moved from CPU @@ -173,33 +185,37 @@ def forward(self, X): # # Caching the value of a parametrization # -------------------------------------- -# Parametrizations come with an inbuilt caching system via the context manager ``P.cached()`` +# +# Parametrizations come with an inbuilt caching system via the context manager +# ``parametrize.cached()`` class NoisyParametrization(nn.Module): def forward(self, X): print("Computing the Parametrization") return X -layer = nn.Linear(2, 3) -P.register_parametrization(layer, "weight", NoisyParametrization()) +layer = nn.Linear(4, 4) +parametrize.register_parametrization(layer, "weight", NoisyParametrization()) print("Here, layer.weight is recomputed every time we call it") foo = layer.weight + layer.weight.T bar = layer.weight.sum() -with P.cached(): +with parametrize.cached(): print("Here, it is computed just the first time layer.weight is called") foo = layer.weight + layer.weight.T bar = layer.weight.sum() ############################################################################### -# Composing Parametrizations -# -------------------------- +# Concatenating Parametrizations +# ------------------------------ +# # Concatenating two parametrizations is as easy as registering them on the same tensor. -# We may use this to create complex parametrizations from simple ones. For example, the +# We may use this to create complex parametrizations from simpler ones. For example, the # `Cayley map `_ # maps the skew-symmetric matrices to the orthogonal matrices of positive determinant. We can # concatenate ``Skew`` and a parametrization that implements the Cayley map to get a layer with # orthogonal weight class CayleyMap(nn.Module): def __init__(self, n): + super().__init__() self.register_buffer("Id", torch.eye(n)) def forward(self, X): @@ -207,29 +223,29 @@ def forward(self, X): return torch.solve(self.Id + X, self.Id - X).solution layer = nn.Linear(3, 3) -P.register_parametrization(layer, "weight", Skew()) -P.register_parametrization(layer, "weight", CayleyMap(3)) +parametrize.register_parametrization(layer, "weight", Skew()) +parametrize.register_parametrization(layer, "weight", CayleyMap(3)) X = layer.weight -assert torch.allclose(X.T @ X, torch.eye(3)) # X is orthogonal +assert torch.allclose(X.T @ X, torch.eye(3), atol=1e-6) # X is orthogonal ############################################################################### # This may also be used to prune a parametrized module, or to reuse parametrizations. For example, -# we may use the fact that the exponential of matrices maps the symmetric matrices to the -# Symmetric Positive Definite (SPD) matrices, and the skew-symmetric matrices to the orthogonal -# matrices. Using these two facts, we may reuse the parametrizations +# the matrix exponential maps the symmetric matrices to the Symmetric Positive Definite (SPD) matrices +# But the matrix exponential also maps the skew-symmetric matrices to the orthogonal matrices. +# Using these two facts, we may reuse the parametrizations before to our advantage: class MatrixExponential(nn.Module): - def forward(X): + def forward(self, X): return torch.matrix_exp(X) layer_orthogonal = nn.Linear(3, 3) -P.register_parametrization(layer_orthogonal, "weight", Skew()) -P.register_parametrization(layer_orthogonal, "weight", MatrixExponential()) +parametrize.register_parametrization(layer_orthogonal, "weight", Skew()) +parametrize.register_parametrization(layer_orthogonal, "weight", MatrixExponential()) X = layer_orthogonal.weight -assert torch.allclose(X.T @ X, torch.eye(3)) # X is orthogonal +assert torch.allclose(X.T @ X, torch.eye(3), atol=1e-6) # X is orthogonal layer_spd = nn.Linear(3, 3) -P.register_parametrization(layer_spd, "weight", Symmetric()) -P.register_parametrization(layer_spd, "weight", MatrixExponential()) +parametrize.register_parametrization(layer_spd, "weight", Symmetric()) +parametrize.register_parametrization(layer_spd, "weight", MatrixExponential()) X = layer_spd.weight assert torch.allclose(X, X.T) # X is symmetric assert (torch.symeig(X).eigenvalues > 0.).all() # X is positive definite @@ -237,6 +253,7 @@ def forward(X): ############################################################################### # Intializing Parametrizations # ---------------------------- +# # Parametrizations come with a mechanism to initialize them. If we implement a method # ``right_inverse`` with signature # @@ -252,49 +269,49 @@ def forward(self, X): A = X.triu(1) return A - A.transpose(-1, -2) - def is_skew(self, X): - return torch.allclose(X, -X.transpose(-1, -2)) - def right_inverse(self, A): - if not self.is_skew(A): - raise ValueError(f"The provided matrix {A} is not skew-symmetric") + # We assume that A is skew-symmetric + # We take the upper-triangular elements, as these are those used in the forward return A.triu(1) ############################################################################### # We may now initialize a layer that is parametrized with ``Skew`` layer = nn.Linear(3, 3) -P.register_parametrization(layer, "weight", Skew()) +parametrize.register_parametrization(layer, "weight", Skew()) X = torch.rand(3, 3) X = X - X.T # X is now skew-symmetric layer.weight = X # Initialize layer.weight to be X assert torch.allclose(layer.weight, X) # layer.weight == X ############################################################################### -# This ``right_inverse`` works as expected when we compose parametrizations. To see this, let's -# upgrade the Cayley parametrization to also support being initialized +# This ``right_inverse`` works as expected when we concatenate parametrizations. +# To see this, let's upgrade the Cayley parametrization to also support being initialized class CayleyMap(nn.Module): def __init__(self, n): + super().__init__() self.register_buffer("Id", torch.eye(n)) def forward(self, X): + # Assume X skew-symmetric # (I + X)(I - X)^{-1} return torch.solve(self.Id + X, self.Id - X).solution def right_inverse(self, A): + # Assume A orthogonal # See https://en.wikipedia.org/wiki/Cayley_transform#Matrix_map # (X - I)(X + I)^{-1} return torch.solve(X - self.Id, self.Id + X).solution layer_orthogonal = nn.Linear(3, 3) -P.register_parametrization(layer_orthogonal, "weight", Skew()) -P.register_parametrization(layer_orthogonal, "weight", CayleyMap(3)) +parametrize.register_parametrization(layer_orthogonal, "weight", Skew()) +parametrize.register_parametrization(layer_orthogonal, "weight", CayleyMap(3)) # Sample an orthogonal matrix with positive determinant X = torch.empty(3, 3) nn.init.orthogonal_(X) if X.det() < 0.: X[0].neg_() layer_orthogonal.weight = X -assert torch.allclose(X, layer_orthogonal.weight) # layer_orthogonal.weight == X +assert torch.allclose(layer_orthogonal.weight, X) # layer_orthogonal.weight == X ############################################################################### # This initialization step can be written more succinctly as @@ -304,7 +321,7 @@ def right_inverse(self, A): # The name of this method comes from the fact that we would often expect # that ``forward(right_inverse(X)) == X``. This is a direct way of rewriting that # the forward afer the initalization with value ``X`` should return the value ``X``. -# This constraint is not enforced in the code. In fact, at times, it might be of +# This constraint is not strongly enforced in practice. In fact, at times, it might be of # interest to relax this relation. For example, consider the following implementation # of a randomized pruning method: class PruningParametrization(nn.Module): @@ -320,34 +337,48 @@ def right_inverse(self, A): return A ############################################################################### -# In this case, it is not true that ``forward(right_inverse(X)) == X``. This is -# only true when the matrix ``A`` passed to ``right_inverse`` has zeros in the -# same positions as the mask. Even then, if we assign a tensor to a pruned parameter, -# it will comes as no surprise that tensor will be, in fact, pruned.: -# +# In this case, it is not true that for every matrix A ``forward(right_inverse(A)) == A``. +# This is only true when the matrix ``A`` has zeros in the same positions as the mask. +# Even then, if we assign a tensor to a pruned parameter, it will comes as no surprise +# that tensor will be, in fact, pruned: +layer = nn.Linear(3, 4) +parametrize.register_parametrization(layer, "weight", PruningParametrization(layer.weight)) +X = torch.rand(4, 3) +print(X) +layer.weight = X +print(layer.weight) + # Removing a Parametrization # -------------------------- -# We may remove a parametrization from a module by using ``P.remove_parametrization()`` +# +# We may remove all the parametrizations from a parameter or a buffer in a module +# by using ``parametrize.remove_parametrizations()`` layer = nn.Linear(3, 3) +print("Before:") print(layer) print(layer.weight) -P.register_parametrization(layer, "weight", Skew()) +parametrize.register_parametrization(layer, "weight", Skew()) +print("Parametrized:") print(layer) print(layer.weight) -P.remove_parametrization(layer, "weight") +parametrize.remove_parametrizations(layer, "weight") +print("After. Left with the skew-symmetric values but unconstrained:") print(layer) print(layer.weight) ############################################################################### -# While doing so, we may choose to leave the original parameter (i.e. that in -# ``layer.parametriations.weight.original``) rather than its parametrized version -# by setting the flag ``leave_parametrized=False`` +# When removing a parametrization, we may choose to leave the original parameter (i.e. that in +# ``layer.parametriations.weight.original``) rather than its parametrized version by setting +# the flag ``leave_parametrized=False`` layer = nn.Linear(3, 3) +print("Before:") print(layer) print(layer.weight) -P.register_parametrization(layer, "weight", Skew()) +parametrize.register_parametrization(layer, "weight", Skew()) +print("Parametrized:") print(layer) print(layer.weight) -P.remove_parametrization(layer, "weight", leave_parametrized=False) +parametrize.remove_parametrizations(layer, "weight", leave_parametrized=False) +print("After. Same as Before:") print(layer) print(layer.weight) From b952c785d0a32a651aabe7131cae77757293afbd Mon Sep 17 00:00:00 2001 From: lezcano Date: Wed, 7 Apr 2021 15:48:20 +0100 Subject: [PATCH 11/17] Minor --- index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.rst b/index.rst index dbb602cae00..24ec12ca659 100644 --- a/index.rst +++ b/index.rst @@ -326,7 +326,7 @@ Welcome to PyTorch Tutorials .. customcarditem:: :header: Parametrizations Tutorial - :card_description: Learn how to use torch.nn.utils.parametrizations to put constriants on your parameters (e.g. make them orthogonal, symmetric positive definite, low-rank...) + :card_description: Learn how to use torch.nn.utils.parametrize to put constriants on your parameters (e.g. make them orthogonal, symmetric positive definite, low-rank...) :image: _static/img/thumbnails/cropped/parametrizations.png :link: intermediate/parametrizations.html :tags: Model-Optimization,Best-Practice From f0e55b34b2a06cd7c4958e1053693f8a1e1b59fc Mon Sep 17 00:00:00 2001 From: lezcano Date: Wed, 7 Apr 2021 16:01:08 +0100 Subject: [PATCH 12/17] Add more comments --- intermediate_source/parametrizations.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py index bbfae335faf..b5a1586df87 100644 --- a/intermediate_source/parametrizations.py +++ b/intermediate_source/parametrizations.py @@ -344,8 +344,10 @@ def right_inverse(self, A): layer = nn.Linear(3, 4) parametrize.register_parametrization(layer, "weight", PruningParametrization(layer.weight)) X = torch.rand(4, 3) +print("Initialization matrix:") print(X) layer.weight = X +print("Layer after initialized:") print(layer.weight) # Removing a Parametrization From d1dd542ee33ad97e2040949b65d7f2390b09c403 Mon Sep 17 00:00:00 2001 From: lezcano Date: Wed, 7 Apr 2021 16:01:40 +0100 Subject: [PATCH 13/17] Minor --- intermediate_source/parametrizations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py index b5a1586df87..7271a7c8660 100644 --- a/intermediate_source/parametrizations.py +++ b/intermediate_source/parametrizations.py @@ -347,7 +347,7 @@ def right_inverse(self, A): print("Initialization matrix:") print(X) layer.weight = X -print("Layer after initialized:") +print("Initialized weight:") print(layer.weight) # Removing a Parametrization From 58b96112baa88f9b180abcf0a99e3bd8eec4bfc1 Mon Sep 17 00:00:00 2001 From: lezcano Date: Wed, 7 Apr 2021 17:50:23 +0100 Subject: [PATCH 14/17] Prefer unicode over math --- intermediate_source/parametrizations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py index 7271a7c8660..1bb63cf928a 100644 --- a/intermediate_source/parametrizations.py +++ b/intermediate_source/parametrizations.py @@ -43,7 +43,7 @@ ------------------------------------- Assume that we want to have a square linear layer with symmetric weights, that is, -with weights :math:`X` such that :math:`X = X^{\intercal}`. One way to do so is +with weights ``X`` such that ``X = Xᵀ``. One way to do so is to copy the upper triangular part of the matrix into its lower triangular part """ From d2bbbcfa4e402e6fc21368591270f09b4e605986 Mon Sep 17 00:00:00 2001 From: lezcano Date: Wed, 7 Apr 2021 17:52:56 +0100 Subject: [PATCH 15/17] Minor --- intermediate_source/parametrizations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py index 1bb63cf928a..a0a364ca215 100644 --- a/intermediate_source/parametrizations.py +++ b/intermediate_source/parametrizations.py @@ -364,7 +364,7 @@ def right_inverse(self, A): print(layer) print(layer.weight) parametrize.remove_parametrizations(layer, "weight") -print("After. Left with the skew-symmetric values but unconstrained:") +print("After. Weight has skew-symmetric values but it is unconstrained:") print(layer) print(layer.weight) From 9f0546eab1f7a2c02f09e611a24783cc9cf7998b Mon Sep 17 00:00:00 2001 From: lezcano Date: Thu, 8 Apr 2021 11:59:23 +0100 Subject: [PATCH 16/17] minor --- intermediate_source/parametrizations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py index a0a364ca215..a3fa07af2b0 100644 --- a/intermediate_source/parametrizations.py +++ b/intermediate_source/parametrizations.py @@ -86,7 +86,7 @@ def forward(self, x): # in. # 3) It recomputes the parametrization everytime forward is called. If we used the layer # several times during the forward pass, (imagine the recurrent kernel of an RNN) we would -# be recomputing the same ``A`` every time the layer is called. +# be recomputing the same ``A`` every time that the layer is called. # # Introduction to Parametrizations # -------------------------------- @@ -212,7 +212,7 @@ def forward(self, X): # `Cayley map `_ # maps the skew-symmetric matrices to the orthogonal matrices of positive determinant. We can # concatenate ``Skew`` and a parametrization that implements the Cayley map to get a layer with -# orthogonal weight +# orthogonal weights class CayleyMap(nn.Module): def __init__(self, n): super().__init__() From 8f926c0442e9c5e6c442d89d6bfe351940bd9d05 Mon Sep 17 00:00:00 2001 From: lezcano Date: Mon, 12 Apr 2021 10:10:23 +0100 Subject: [PATCH 17/17] Corrections --- intermediate_source/parametrizations.py | 47 +++++++++++++------------ 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py index a3fa07af2b0..a35f02f4f81 100644 --- a/intermediate_source/parametrizations.py +++ b/intermediate_source/parametrizations.py @@ -9,7 +9,7 @@ on deep models due to the complexity of the function being optimized. This is particularly problematic when working with ill-conditioned models. Examples of these are RNNs trained on long sequences and GANs. A number -of techniques have been proposed in the recent years to regularize these +of techniques have been proposed in recent years to regularize these models and improve their convergence. On recurrent models, it has been proposed to control the singular values of the recurrent kernel for the RNN to be well-conditioned. This can be achieved, for example, by making @@ -17,16 +17,16 @@ Another way to regularize recurrent models is via "`weight normalization `_". This approach proposes to decouple the learning of the parameters from the -learning of their norm. To do so, the parameter is divided by its +learning of their norms. To do so, the parameter is divided by its `Frobenius norm `_ -and a separate parameter encoding their norm is learnt. +and a separate parameter encoding its norm is learnt. A similar regularization was proposed for GANs under the name of "`spectral normalization `_". This method controls the Lipschitz constant of the network by dividing its parameters by their `spectral norm `_, rather than their Frobenius norm. -All these methods have a pattern in common. They all transform a parameter +All these methods have a common pattern. They all transform a parameter in an appropriate way before using it. In the first case, they make it orthogonal by using a function that maps matrices to orthogonal matrices. In the case of weight and spectral normalization, they divide the original parameter by its norm. @@ -34,8 +34,8 @@ More generally, all these examples use a function to put extra structure on the parameters. In other words, they use a function to constrain the parameters. -In this tutorial, you will learn how to implement and use this pattern to write and -put constraints on your model. Doing so is as easy as writing your own ``nn.Module``. +In this tutorial, you will learn how to implement and use this pattern to put +constraints on your model. Doing so is as easy as writing your own ``nn.Module``. Requirements: ``torch>=1.9.0`` @@ -44,7 +44,7 @@ Assume that we want to have a square linear layer with symmetric weights, that is, with weights ``X`` such that ``X = Xᵀ``. One way to do so is -to copy the upper triangular part of the matrix into its lower triangular part +to copy the upper-triangular part of the matrix into its lower-triangular part """ import torch @@ -60,7 +60,7 @@ def symmetric(X): print(A) # Quick visual check ############################################################################### -# We can then use this idea to implement a linear layer with symmetric weights: +# We can then use this idea to implement a linear layer with symmetric weights class LinearSymmetric(nn.Module): def __init__(self, n_features): super().__init__() @@ -84,14 +84,14 @@ def forward(self, x): # 2) It does not separate the layer and the parametrization. If the parametrization were # more difficult, we would have to rewrite its code for each layer that we want to use it # in. -# 3) It recomputes the parametrization everytime forward is called. If we used the layer -# several times during the forward pass, (imagine the recurrent kernel of an RNN) we would -# be recomputing the same ``A`` every time that the layer is called. +# 3) It recomputes the parametrization everytime we use the layer. If we use the layer +# several times during the forward pass, (imagine the recurrent kernel of an RNN), it +# would compute the same ``A`` every time that the layer is called. # # Introduction to Parametrizations # -------------------------------- # -# Parametrizations come to solve all these and other problems. +# Parametrizations can solve all these problems as well as others. # # Let's start by reimplementing the code above using ``torch.nn.utils.parametrize``. # The only thing that we have to do is to write the parametrization as a regular ``nn.Module`` @@ -114,8 +114,8 @@ def forward(self, X): ############################################################################### # We can do the same thing with any other layer. For example, we can create a CNN with # `skew-symmetric `_ kernels. -# We use a similar parametrization, copying minus the upper triangular part into the -# lower-triangular part +# We use a similar parametrization, copying the upper-triangular part with signs +# reversed into the lower-triangular part class Skew(nn.Module): def forward(self, X): A = X.triu(1) @@ -123,7 +123,7 @@ def forward(self, X): cnn = nn.Conv2d(in_channels=5, out_channels=8, kernel_size=3) -parametrize.register_parametrization(layer, "weight", Skew()) +parametrize.register_parametrization(cnn, "weight", Skew()) # Print a few kernels print(cnn.weight[0, 1]) print(cnn.weight[2, 2]) @@ -153,11 +153,12 @@ def forward(self, X): print(layer.parametrizations.weight) ############################################################################### -# Note that each element in the ``ModuleDict`` is of type ``ParametrizationList``. -# This ``ParametrizationList`` behaves like an ``nn.Sequential`` that also holds the weight. -# element of this list. It will be clear later the reason for this, when we see how to concatenate -# parametrizations. -# +# Each element of this ``ParametrizationList`` behaves like an ``nn.Sequential`` module +# that also holds the weight. This list will allow us to concatenate parametrizations on +# one weight. Since this is a list, we can access the parametrizations indexing it +print(layer.parametrizations.weight[0]) + +############################################################################### # The other thing that we notice is that, if we print the parameters, we see that the # parameter ``weight`` has been moved print(dict(layer.named_parameters())) @@ -208,7 +209,7 @@ def forward(self, X): # ------------------------------ # # Concatenating two parametrizations is as easy as registering them on the same tensor. -# We may use this to create complex parametrizations from simpler ones. For example, the +# We may use this to create more complex parametrizations from simpler ones. For example, the # `Cayley map `_ # maps the skew-symmetric matrices to the orthogonal matrices of positive determinant. We can # concatenate ``Skew`` and a parametrization that implements the Cayley map to get a layer with @@ -232,7 +233,7 @@ def forward(self, X): # This may also be used to prune a parametrized module, or to reuse parametrizations. For example, # the matrix exponential maps the symmetric matrices to the Symmetric Positive Definite (SPD) matrices # But the matrix exponential also maps the skew-symmetric matrices to the orthogonal matrices. -# Using these two facts, we may reuse the parametrizations before to our advantage: +# Using these two facts, we may reuse the parametrizations before to our advantage class MatrixExponential(nn.Module): def forward(self, X): return torch.matrix_exp(X) @@ -340,7 +341,7 @@ def right_inverse(self, A): # In this case, it is not true that for every matrix A ``forward(right_inverse(A)) == A``. # This is only true when the matrix ``A`` has zeros in the same positions as the mask. # Even then, if we assign a tensor to a pruned parameter, it will comes as no surprise -# that tensor will be, in fact, pruned: +# that tensor will be, in fact, pruned layer = nn.Linear(3, 4) parametrize.register_parametrization(layer, "weight", PruningParametrization(layer.weight)) X = torch.rand(4, 3)