@@ -130,25 +130,6 @@ def test_elemwise_runtime_broadcast():
130130 check_elemwise_runtime_broadcast (get_mode ("NUMBA" ))
131131
132132
133- def test_elemwise_speed (benchmark ):
134- x = pt .dmatrix ("y" )
135- y = pt .dvector ("z" )
136-
137- out = np .exp (2 * x * y + y )
138-
139- rng = np .random .default_rng (42 )
140-
141- x_val = rng .normal (size = (200 , 500 ))
142- y_val = rng .normal (size = 500 )
143-
144- func = function ([x , y ], out , mode = "NUMBA" )
145- func = func .vm .jit_fn
146- (out ,) = func (x_val , y_val )
147- np .testing .assert_allclose (np .exp (2 * x_val * y_val + y_val ), out )
148-
149- benchmark (func , x_val , y_val )
150-
151-
152133@pytest .mark .parametrize (
153134 "v, new_order" ,
154135 [
@@ -631,41 +612,6 @@ def test_Argmax(x, axes, exc):
631612 )
632613
633614
634- @pytest .mark .parametrize ("size" , [(10 , 10 ), (1000 , 1000 ), (10000 , 10000 )])
635- @pytest .mark .parametrize ("axis" , [0 , 1 ])
636- def test_logsumexp_benchmark (size , axis , benchmark ):
637- X = pt .matrix ("X" )
638- X_max = pt .max (X , axis = axis , keepdims = True )
639- X_max = pt .switch (pt .isinf (X_max ), 0 , X_max )
640- X_lse = pt .log (pt .sum (pt .exp (X - X_max ), axis = axis , keepdims = True )) + X_max
641-
642- rng = np .random .default_rng (23920 )
643- X_val = rng .normal (size = size )
644-
645- X_lse_fn = pytensor .function ([X ], X_lse , mode = "NUMBA" )
646-
647- # JIT compile first
648- res = X_lse_fn (X_val )
649- exp_res = scipy .special .logsumexp (X_val , axis = axis , keepdims = True )
650- np .testing .assert_array_almost_equal (res , exp_res )
651- benchmark (X_lse_fn , X_val )
652-
653-
654- def test_fused_elemwise_benchmark (benchmark ):
655- rng = np .random .default_rng (123 )
656- size = 100_000
657- x = pytensor .shared (rng .normal (size = size ), name = "x" )
658- mu = pytensor .shared (rng .normal (size = size ), name = "mu" )
659-
660- logp = - ((x - mu ) ** 2 ) / 2
661- grad_logp = grad (logp .sum (), x )
662-
663- func = pytensor .function ([], [logp , grad_logp ], mode = "NUMBA" )
664- # JIT compile first
665- func ()
666- benchmark (func )
667-
668-
669615def test_elemwise_out_type ():
670616 # Create a graph with an elemwise
671617 # Ravel failes if the elemwise output type is reported incorrectly
@@ -681,22 +627,6 @@ def test_elemwise_out_type():
681627 assert func (x_val ).shape == (18 ,)
682628
683629
684- @pytest .mark .parametrize (
685- "axis" ,
686- (0 , 1 , 2 , (0 , 1 ), (0 , 2 ), (1 , 2 ), None ),
687- ids = lambda x : f"axis={ x } " ,
688- )
689- @pytest .mark .parametrize (
690- "c_contiguous" ,
691- (True , False ),
692- ids = lambda x : f"c_contiguous={ x } " ,
693- )
694- def test_numba_careduce_benchmark (axis , c_contiguous , benchmark ):
695- return careduce_benchmark_tester (
696- axis , c_contiguous , mode = "NUMBA" , benchmark = benchmark
697- )
698-
699-
700630def test_scalar_loop ():
701631 a = float64 ("a" )
702632 scalar_loop = pytensor .scalar .ScalarLoop ([a ], [a + a ])
@@ -709,3 +639,71 @@ def test_scalar_loop():
709639 ([x ], [elemwise_loop ]),
710640 (np .array ([1 , 2 , 3 ], dtype = "float64" ),),
711641 )
642+
643+
644+ class TestsBenchmark :
645+ def test_elemwise_speed (self , benchmark ):
646+ x = pt .dmatrix ("y" )
647+ y = pt .dvector ("z" )
648+
649+ out = np .exp (2 * x * y + y )
650+
651+ rng = np .random .default_rng (42 )
652+
653+ x_val = rng .normal (size = (200 , 500 ))
654+ y_val = rng .normal (size = 500 )
655+
656+ func = function ([x , y ], out , mode = "NUMBA" )
657+ func = func .vm .jit_fn
658+ (out ,) = func (x_val , y_val )
659+ np .testing .assert_allclose (np .exp (2 * x_val * y_val + y_val ), out )
660+
661+ benchmark (func , x_val , y_val )
662+
663+ def test_fused_elemwise_benchmark (self , benchmark ):
664+ rng = np .random .default_rng (123 )
665+ size = 100_000
666+ x = pytensor .shared (rng .normal (size = size ), name = "x" )
667+ mu = pytensor .shared (rng .normal (size = size ), name = "mu" )
668+
669+ logp = - ((x - mu ) ** 2 ) / 2
670+ grad_logp = grad (logp .sum (), x )
671+
672+ func = pytensor .function ([], [logp , grad_logp ], mode = "NUMBA" )
673+ # JIT compile first
674+ func ()
675+ benchmark (func )
676+
677+ @pytest .mark .parametrize ("size" , [(10 , 10 ), (1000 , 1000 ), (10000 , 10000 )])
678+ @pytest .mark .parametrize ("axis" , [0 , 1 ])
679+ def test_logsumexp_benchmark (self , size , axis , benchmark ):
680+ X = pt .matrix ("X" )
681+ X_max = pt .max (X , axis = axis , keepdims = True )
682+ X_max = pt .switch (pt .isinf (X_max ), 0 , X_max )
683+ X_lse = pt .log (pt .sum (pt .exp (X - X_max ), axis = axis , keepdims = True )) + X_max
684+
685+ rng = np .random .default_rng (23920 )
686+ X_val = rng .normal (size = size )
687+
688+ X_lse_fn = pytensor .function ([X ], X_lse , mode = "NUMBA" )
689+
690+ # JIT compile first
691+ res = X_lse_fn (X_val )
692+ exp_res = scipy .special .logsumexp (X_val , axis = axis , keepdims = True )
693+ np .testing .assert_array_almost_equal (res , exp_res )
694+ benchmark (X_lse_fn , X_val )
695+
696+ @pytest .mark .parametrize (
697+ "axis" ,
698+ (0 , 1 , 2 , (0 , 1 ), (0 , 2 ), (1 , 2 ), None ),
699+ ids = lambda x : f"axis={ x } " ,
700+ )
701+ @pytest .mark .parametrize (
702+ "c_contiguous" ,
703+ (True , False ),
704+ ids = lambda x : f"c_contiguous={ x } " ,
705+ )
706+ def test_numba_careduce_benchmark (self , axis , c_contiguous , benchmark ):
707+ return careduce_benchmark_tester (
708+ axis , c_contiguous , mode = "NUMBA" , benchmark = benchmark
709+ )
0 commit comments