pymc-devs · ricardoV94 · Apr 9, 2025 · Apr 8, 2025 · Apr 9, 2025 · Apr 8, 2025
diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
@@ -710,6 +710,17 @@ def c_code_cache_version(self):
 scalar_from_tensor = ScalarFromTensor()
 
 
+@_vectorize_node.register(ScalarFromTensor)
+def vectorize_scalar_from_tensor(op, node, batch_x):
+    if batch_x.ndim == 0:
+        return scalar_from_tensor(batch_x).owner
+    if batch_x.owner is not None:
+        return batch_x.owner
+
+    # Needed until we fix https://github.com/pymc-devs/pytensor/issues/902
+    return batch_x.copy().owner
+
+
 # to be removed as we get the epydoc routine-documenting thing going
 # -JB 20080924
 def _conversion(real_value: Op, name: str) -> Op:

diff --git a/pytensor/tensor/blockwise.py b/pytensor/tensor/blockwise.py
@@ -7,7 +7,7 @@
 from pytensor.compile.builders import OpFromGraph
 from pytensor.gradient import DisconnectedType
 from pytensor.graph import FunctionGraph
-from pytensor.graph.basic import Apply, Constant, ancestors
+from pytensor.graph.basic import Apply, Constant, explicit_graph_inputs
 from pytensor.graph.null_type import NullType
 from pytensor.graph.op import Op
 from pytensor.graph.replace import (
@@ -190,7 +190,7 @@ def infer_shape(
         core_op_infer_shape = getattr(self.core_op, "infer_shape", None)
         if core_op_infer_shape is not None:
             dummy_core_node = self._create_dummy_core_node(node.inputs)
-            dummy_core_inputs = dummy_core_node.inputs
+            dummy_core_inputs = tuple(explicit_graph_inputs(dummy_core_node.inputs))
             dummy_fgraph = FunctionGraph(outputs=dummy_core_node.outputs, clone=False)
             core_input_shapes = [
                 input_shape[batch_ndims:] for input_shape in input_shapes
@@ -214,7 +214,8 @@ def infer_shape(
                         # of the core_node as the value is not constant across batch dims of the Blockwise
                         core_out_dim = core_output_shapes[o][i]
                         if not (
-                            set(dummy_core_inputs) & set(ancestors([core_out_dim]))
+                            set(dummy_core_inputs)
+                            & set(explicit_graph_inputs([core_out_dim]))
                         ):
                             core_out_shape.append(core_out_dim)
                             continue

diff --git a/pytensor/tensor/rewriting/blockwise.py b/pytensor/tensor/rewriting/blockwise.py
@@ -14,7 +14,12 @@
     register_stabilize,
 )
 from pytensor.tensor.shape import Reshape
-from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedSubtensor, Subtensor
+from pytensor.tensor.subtensor import (
+    AdvancedIncSubtensor,
+    AdvancedSubtensor,
+    Subtensor,
+    indices_from_subtensor,
+)
 
 
 @node_rewriter([Blockwise])
@@ -216,9 +221,9 @@ def local_blockwise_reshape(fgraph, node):
 
     Reshape is tricky to vectorize eagerly, because a graph like
     `x.reshape([x.shape[0] * x.shape[1], -1])` has many operations
-    that must be vectorized before we arrize at the reshape operation.
+    that must be vectorized before we arrive at the reshape operation.
 
-    For the square Reshape case, we must wait for all the intemediate
+    For the square Reshape case, we must wait for all the intermediate
     operations to be lifted as Allocs
     """
     if not isinstance(node.op.core_op, Reshape):
@@ -234,6 +239,29 @@ def local_blockwise_reshape(fgraph, node):
         return [new_out]
 
 
+@register_stabilize
+@register_specialize
+@node_rewriter([Blockwise])
+def local_blockwise_of_subtensor(fgraph, node):
+    """Rewrite Blockwise of Subtensor, where the only batch input is the indexed tensor.
+
+    Blockwise(Subtensor{a: b})(x, a, b) -> x[:, a:b] when x has one batch dimension, and a/b none
+    """
+    if not isinstance(node.op.core_op, Subtensor):
+        return
+
+    x, *idxs = node.inputs
+    if not all(all(idx.type.broadcastable) for idx in idxs):
+        return
+
+    core_idxs = indices_from_subtensor(
+        [idx.squeeze() for idx in idxs], node.op.core_op.idx_list
+    )
+    # Add empty slices for the batch dims
+    none_slices = (slice(None),) * node.op.batch_ndim(node)
+    return [x[(*none_slices, *core_idxs)]]
+
+
 @node_rewriter(tracks=[Blockwise], inplace=True)
 def blockwise_inplace(fgraph, node):
     blockwise_op = node.op

diff --git a/tests/tensor/signal/test_conv.py b/tests/tensor/signal/test_conv.py
@@ -4,9 +4,11 @@
 import pytest
 from scipy.signal import convolve as scipy_convolve
 
-from pytensor import config, function
+from pytensor import config, function, grad
+from pytensor.graph import ancestors, rewrite_graph
 from pytensor.tensor import matrix, vector
-from pytensor.tensor.signal.conv import convolve1d
+from pytensor.tensor.blockwise import Blockwise
+from pytensor.tensor.signal.conv import Conv1d, convolve1d
 from tests import unittest_tools as utt
 
 
@@ -60,3 +62,23 @@ def test_convolve1d_batch_same():
 
     res = out.eval({x: x_test, y: y_test})
     assert res.shape == (2, 8)
+
+
+@pytest.mark.parametrize("mode", ("full", "valid", "same"))
+def test_convolve1d_batch_graph(mode):
+    """Test that we don't have slow Blockwise Subtensors in graph of a batched convolve1d"""
+    x = matrix("x")
+    y = matrix("y")
+    out = convolve1d(x, y, mode=mode)
+    grads = grad(out.sum(), wrt=[x, y])
+    final_grads = rewrite_graph(
+        grads, include=("ShapeOpt", "canonicalize", "stabilize", "specialize")
+    )
+
+    blockwise_nodes = [
+        var.owner
+        for var in ancestors(final_grads)
+        if var.owner is not None and isinstance(var.owner.op, Blockwise)
+    ]
+    # Check any Blockwise are just Conv1d
+    assert all(isinstance(node.op.core_op, Conv1d) for node in blockwise_nodes)
diff --git a/tests/tensor/test_blockwise.py b/tests/tensor/test_blockwise.py
@@ -264,9 +264,13 @@ class TestOpWithInferShape(Op):
         def make_node(self, a, b):
             assert a.type.ndim == 1
             assert b.type.ndim == 1
+            # Simulate make_node that introduces operations on inputs
+            a_identity = a.copy()
+            b_identity = b.copy()
+
             c = tensor(shape=(None,))
             d = tensor(shape=(None,))
-            return Apply(self, [a, b], [c, d])
+            return Apply(self, [a_identity, b_identity], [c, d])
 
         def perform(self, node, inputs, outputs):
             a, b = inputs
@@ -277,9 +281,12 @@ def perform(self, node, inputs, outputs):
         def infer_shape(self, fgraph, node, input_shapes):
             # First output shape depends only on input_shapes
             # Second output shape depends on input values
-            x, y = node.inputs
-            [(x_shape,), (y_shape,)] = input_shapes
-            return (x_shape + y_shape,), (x.sum() + y.sum(),)
+            a_identity, b_identity = node.inputs
+            # Simulate shape depending on original inputs, not the ones that go directly into the node
+            a = a_identity.owner.inputs[0]
+            b = b_identity.owner.inputs[0]
+            [(a_shape,), (b_shape,)] = input_shapes
+            return (a_shape + b_shape,), (a.sum() + b.sum(),)
 
     blockwise_op = Blockwise(
         core_op=TestOpWithInferShape(), signature="(a),(b)->(c),(d)"