Handle invalid BroadcastTo shape in C backend

ricardoV94 · lucianopaz · commit 65826e7e18fb · 2023-01-08T14:25:43.000+01:00
diff --git a/pytensor/tensor/extra_ops.py b/pytensor/tensor/extra_ops.py
@@ -1643,6 +1643,11 @@ def make_node(self, a, *shape):
 
         shape, static_shape = at.infer_static_shape(shape)
 
+        if len(shape) < a.ndim:
+            raise ValueError(
+                f"Broadcast target shape has {len(shape)} dims, which is shorter than input with {a.ndim} dims"
+            )
+
         out = TensorType(dtype=a.type.dtype, shape=static_shape)()
 
         # Attempt to prevent in-place operations on this view-based output
@@ -1686,9 +1691,12 @@ def infer_shape(self, fgraph, node, ins_shapes):
         return [node.inputs[1:]]
 
     def c_code(self, node, name, inputs, outputs, sub):
+        inp_dims = node.inputs[0].ndim
+        out_dims = node.outputs[0].ndim
+        new_dims = out_dims - inp_dims
+
         (x, *shape) = inputs
         (out,) = outputs
-        ndims = len(shape)
         fail = sub["fail"]
 
         # TODO: Could just use `PyArray_Return`, no?
@@ -1701,20 +1709,34 @@ def c_code(self, node, name, inputs, outputs, sub):
 
         src = (
             """
-            npy_intp itershape[%(ndims)s] = {%(dims_array)s};
+            npy_intp itershape[%(out_dims)s] = {%(dims_array)s};
 
+            NpyIter *iter;
             PyArrayObject *ops[1] = {%(x)s};
             npy_uint32 flags = NPY_ITER_MULTI_INDEX | NPY_ITER_REFS_OK | NPY_ITER_ZEROSIZE_OK;
             npy_uint32 op_flags[1] = {NPY_ITER_READONLY};
             PyArray_Descr *op_dtypes[1] = {NULL};
-            int oa_ndim = %(ndims)s;
+            int oa_ndim = %(out_dims)s;
             int* op_axes[1] = {NULL};
             npy_intp buffersize = 0;
 
-            NpyIter *iter = NpyIter_AdvancedNew(
+            for(int i = 0; i < %(inp_dims)s; i++)
+            {
+                if ((PyArray_DIMS(%(x)s)[i] != 1) && (PyArray_DIMS(%(x)s)[i] != itershape[i + %(new_dims)s]))
+                {
+                    PyErr_Format(PyExc_ValueError,
+                                 "Shape mismatch in broadcast_to: target shape[%%i] = %%lld is incompatible with input shape = %%lld.",
+                                 i,
+                                 (long long int) itershape[i + %(new_dims)s],
+                                 (long long int) PyArray_DIMS(%(x)s)[i]
+                    );
+                    %(fail)s
+                }
+            }
+
+            iter = NpyIter_AdvancedNew(
                 1, ops, flags, NPY_CORDER, NPY_NO_CASTING, op_flags, op_dtypes, oa_ndim, op_axes, itershape, buffersize
             );
-
             %(out)s = NpyIter_GetIterView(iter, 0);
 
             if(%(out)s == NULL){
@@ -1733,7 +1755,7 @@ def c_code(self, node, name, inputs, outputs, sub):
         return src
 
     def c_code_cache_version(self):
-        return (1,)
+        return (2,)
 
 
 broadcast_to_ = BroadcastTo()
diff --git a/tests/tensor/test_extra_ops.py b/tests/tensor/test_extra_ops.py
@@ -1253,41 +1253,52 @@ def test_avoid_useless_subtensors(self):
     @pytest.mark.parametrize("linker", ["cvm", "py"])
     def test_perform(self, linker):
 
-        a = pytensor.shared(5)
+        a = pytensor.shared(np.full((3, 1, 1), 5))
+        s_0 = iscalar("s_0")
         s_1 = iscalar("s_1")
-        shape = (s_1, 1)
+        shape = (s_0, s_1, 1)
 
         bcast_res = broadcast_to(a, shape)
-        assert bcast_res.broadcastable == (False, True)
+        assert bcast_res.broadcastable == (False, False, True)
 
         bcast_fn = pytensor.function(
-            [s_1], bcast_res, mode=Mode(optimizer=None, linker=linker)
+            [s_0, s_1], bcast_res, mode=Mode(optimizer=None, linker=linker)
         )
         bcast_fn.vm.allow_gc = False
 
-        bcast_at = bcast_fn(4)
-        bcast_np = np.broadcast_to(5, (4, 1))
+        bcast_at = bcast_fn(3, 4)
+        bcast_np = np.broadcast_to(5, (3, 4, 1))
 
         assert np.array_equal(bcast_at, bcast_np)
 
-        bcast_var = bcast_fn.maker.fgraph.outputs[0].owner.inputs[0]
-        bcast_in = bcast_fn.vm.storage_map[a]
-        bcast_out = bcast_fn.vm.storage_map[bcast_var]
+        with pytest.raises(ValueError):
+            bcast_fn(5, 4)
 
         if linker != "py":
+            bcast_var = bcast_fn.maker.fgraph.outputs[0].owner.inputs[0]
+            bcast_in = bcast_fn.vm.storage_map[a]
+            bcast_out = bcast_fn.vm.storage_map[bcast_var]
             assert np.shares_memory(bcast_out[0], bcast_in[0])
 
+    def test_make_node_error_handling(self):
+        with pytest.raises(
+            ValueError,
+            match="Broadcast target shape has 1 dims, which is shorter than input with 2 dims",
+        ):
+            broadcast_to(at.zeros((3, 4)), (5,))
+
     @pytest.mark.skipif(
         not config.cxx, reason="G++ not available, so we need to skip this test."
     )
-    def test_memory_leak(self):
+    @pytest.mark.parametrize("valid", (True, False))
+    def test_memory_leak(self, valid):
         import gc
         import tracemalloc
 
         from pytensor.link.c.cvm import CVM
 
         n = 100_000
-        x = pytensor.shared(np.ones(n, dtype=np.float64))
+        x = pytensor.shared(np.ones((1, n), dtype=np.float64))
         y = broadcast_to(x, (5, n))
 
         f = pytensor.function([], y, mode=Mode(optimizer=None, linker="cvm"))
@@ -1303,8 +1314,17 @@ def test_memory_leak(self):
         blocks_last = None
         block_diffs = []
         for i in range(1, 50):
-            x.set_value(np.ones(n))
-            _ = f()
+            if valid:
+                x.set_value(np.ones((1, n)))
+                _ = f()
+            else:
+                x.set_value(np.ones((2, n)))
+                try:
+                    _ = f()
+                except ValueError:
+                    pass
+                else:
+                    raise RuntimeError("Should have failed")
             _ = gc.collect()
             blocks_i, _ = tracemalloc.get_traced_memory()
             if blocks_last is not None:
@@ -1313,7 +1333,7 @@ def test_memory_leak(self):
             blocks_last = blocks_i
 
         tracemalloc.stop()
-        assert np.allclose(np.mean(block_diffs), 0)
+        assert np.all(np.array(block_diffs) <= (0 + 1e-8))
 
     @pytest.mark.parametrize(
         "fn,input_dims",