@@ -130,25 +130,6 @@ def test_elemwise_runtime_broadcast():
130
130
check_elemwise_runtime_broadcast (get_mode ("NUMBA" ))
131
131
132
132
133
- def test_elemwise_speed (benchmark ):
134
- x = pt .dmatrix ("y" )
135
- y = pt .dvector ("z" )
136
-
137
- out = np .exp (2 * x * y + y )
138
-
139
- rng = np .random .default_rng (42 )
140
-
141
- x_val = rng .normal (size = (200 , 500 ))
142
- y_val = rng .normal (size = 500 )
143
-
144
- func = function ([x , y ], out , mode = "NUMBA" )
145
- func = func .vm .jit_fn
146
- (out ,) = func (x_val , y_val )
147
- np .testing .assert_allclose (np .exp (2 * x_val * y_val + y_val ), out )
148
-
149
- benchmark (func , x_val , y_val )
150
-
151
-
152
133
@pytest .mark .parametrize (
153
134
"v, new_order" ,
154
135
[
@@ -631,41 +612,6 @@ def test_Argmax(x, axes, exc):
631
612
)
632
613
633
614
634
- @pytest .mark .parametrize ("size" , [(10 , 10 ), (1000 , 1000 ), (10000 , 10000 )])
635
- @pytest .mark .parametrize ("axis" , [0 , 1 ])
636
- def test_logsumexp_benchmark (size , axis , benchmark ):
637
- X = pt .matrix ("X" )
638
- X_max = pt .max (X , axis = axis , keepdims = True )
639
- X_max = pt .switch (pt .isinf (X_max ), 0 , X_max )
640
- X_lse = pt .log (pt .sum (pt .exp (X - X_max ), axis = axis , keepdims = True )) + X_max
641
-
642
- rng = np .random .default_rng (23920 )
643
- X_val = rng .normal (size = size )
644
-
645
- X_lse_fn = pytensor .function ([X ], X_lse , mode = "NUMBA" )
646
-
647
- # JIT compile first
648
- res = X_lse_fn (X_val )
649
- exp_res = scipy .special .logsumexp (X_val , axis = axis , keepdims = True )
650
- np .testing .assert_array_almost_equal (res , exp_res )
651
- benchmark (X_lse_fn , X_val )
652
-
653
-
654
- def test_fused_elemwise_benchmark (benchmark ):
655
- rng = np .random .default_rng (123 )
656
- size = 100_000
657
- x = pytensor .shared (rng .normal (size = size ), name = "x" )
658
- mu = pytensor .shared (rng .normal (size = size ), name = "mu" )
659
-
660
- logp = - ((x - mu ) ** 2 ) / 2
661
- grad_logp = grad (logp .sum (), x )
662
-
663
- func = pytensor .function ([], [logp , grad_logp ], mode = "NUMBA" )
664
- # JIT compile first
665
- func ()
666
- benchmark (func )
667
-
668
-
669
615
def test_elemwise_out_type ():
670
616
# Create a graph with an elemwise
671
617
# Ravel failes if the elemwise output type is reported incorrectly
@@ -681,22 +627,6 @@ def test_elemwise_out_type():
681
627
assert func (x_val ).shape == (18 ,)
682
628
683
629
684
- @pytest .mark .parametrize (
685
- "axis" ,
686
- (0 , 1 , 2 , (0 , 1 ), (0 , 2 ), (1 , 2 ), None ),
687
- ids = lambda x : f"axis={ x } " ,
688
- )
689
- @pytest .mark .parametrize (
690
- "c_contiguous" ,
691
- (True , False ),
692
- ids = lambda x : f"c_contiguous={ x } " ,
693
- )
694
- def test_numba_careduce_benchmark (axis , c_contiguous , benchmark ):
695
- return careduce_benchmark_tester (
696
- axis , c_contiguous , mode = "NUMBA" , benchmark = benchmark
697
- )
698
-
699
-
700
630
def test_scalar_loop ():
701
631
a = float64 ("a" )
702
632
scalar_loop = pytensor .scalar .ScalarLoop ([a ], [a + a ])
@@ -709,3 +639,71 @@ def test_scalar_loop():
709
639
([x ], [elemwise_loop ]),
710
640
(np .array ([1 , 2 , 3 ], dtype = "float64" ),),
711
641
)
642
+
643
+
644
+ class TestsBenchmark :
645
+ def test_elemwise_speed (self , benchmark ):
646
+ x = pt .dmatrix ("y" )
647
+ y = pt .dvector ("z" )
648
+
649
+ out = np .exp (2 * x * y + y )
650
+
651
+ rng = np .random .default_rng (42 )
652
+
653
+ x_val = rng .normal (size = (200 , 500 ))
654
+ y_val = rng .normal (size = 500 )
655
+
656
+ func = function ([x , y ], out , mode = "NUMBA" )
657
+ func = func .vm .jit_fn
658
+ (out ,) = func (x_val , y_val )
659
+ np .testing .assert_allclose (np .exp (2 * x_val * y_val + y_val ), out )
660
+
661
+ benchmark (func , x_val , y_val )
662
+
663
+ def test_fused_elemwise_benchmark (self , benchmark ):
664
+ rng = np .random .default_rng (123 )
665
+ size = 100_000
666
+ x = pytensor .shared (rng .normal (size = size ), name = "x" )
667
+ mu = pytensor .shared (rng .normal (size = size ), name = "mu" )
668
+
669
+ logp = - ((x - mu ) ** 2 ) / 2
670
+ grad_logp = grad (logp .sum (), x )
671
+
672
+ func = pytensor .function ([], [logp , grad_logp ], mode = "NUMBA" )
673
+ # JIT compile first
674
+ func ()
675
+ benchmark (func )
676
+
677
+ @pytest .mark .parametrize ("size" , [(10 , 10 ), (1000 , 1000 ), (10000 , 10000 )])
678
+ @pytest .mark .parametrize ("axis" , [0 , 1 ])
679
+ def test_logsumexp_benchmark (self , size , axis , benchmark ):
680
+ X = pt .matrix ("X" )
681
+ X_max = pt .max (X , axis = axis , keepdims = True )
682
+ X_max = pt .switch (pt .isinf (X_max ), 0 , X_max )
683
+ X_lse = pt .log (pt .sum (pt .exp (X - X_max ), axis = axis , keepdims = True )) + X_max
684
+
685
+ rng = np .random .default_rng (23920 )
686
+ X_val = rng .normal (size = size )
687
+
688
+ X_lse_fn = pytensor .function ([X ], X_lse , mode = "NUMBA" )
689
+
690
+ # JIT compile first
691
+ res = X_lse_fn (X_val )
692
+ exp_res = scipy .special .logsumexp (X_val , axis = axis , keepdims = True )
693
+ np .testing .assert_array_almost_equal (res , exp_res )
694
+ benchmark (X_lse_fn , X_val )
695
+
696
+ @pytest .mark .parametrize (
697
+ "axis" ,
698
+ (0 , 1 , 2 , (0 , 1 ), (0 , 2 ), (1 , 2 ), None ),
699
+ ids = lambda x : f"axis={ x } " ,
700
+ )
701
+ @pytest .mark .parametrize (
702
+ "c_contiguous" ,
703
+ (True , False ),
704
+ ids = lambda x : f"c_contiguous={ x } " ,
705
+ )
706
+ def test_numba_careduce_benchmark (self , axis , c_contiguous , benchmark ):
707
+ return careduce_benchmark_tester (
708
+ axis , c_contiguous , mode = "NUMBA" , benchmark = benchmark
709
+ )
0 commit comments