Open
Description
It's occurred to me that @. cache.b₂′ = b₂ - A₂₁ * x₁
lowers to this:
function Base.Broadcast.materialize!(
dest::FieldNameDict,
vector_or_matrix::FieldNameDict,
)
!is_lazy(dest) || error("Cannot materialize into a lazy FieldNameDict")
is_subset_that_covers_set(keys(vector_or_matrix), keys(dest)) || error(
"Broadcast result and destination keys are incompatible: \
$(set_string(keys(vector_or_matrix))) vs. $(set_string(keys(dest)))",
) # It is not always the case that keys(vector_or_matrix) == keys(dest).
foreach(keys(vector_or_matrix)) do key
entry = vector_or_matrix[key]
if dest[key] isa UniformScaling
dest[key] == entry || error("UniformScaling is immutable")
elseif entry isa UniformScaling
dest[key] .= (entry,)
else
dest[key] .= entry
end
end
end
So, there are even more kernel launches than I realized in run_field_matrix_solver!
. We should fuse this loop into a single kernel launch, and parallelize if possible.