Skip to content

Fix performance regression in hvcat of simple matrices #57422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
53 changes: 28 additions & 25 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2376,9 +2376,7 @@ end
function _typed_hvncat(T::Type, ::Val{N}, xs::Number...) where N
N < 0 &&
throw(ArgumentError("concatenation dimension must be non-negative"))
A = cat_similar(xs[1], T, (ntuple(x -> 1, Val(N - 1))..., length(xs)))
hvncat_fill!(A, false, xs)
return A
return reshape([xs...], (ntuple(Returns(1), Val(N - 1))..., length(xs)))
end

function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
Expand Down Expand Up @@ -2691,55 +2689,60 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::

# copy into final array
A = cat_similar(as[1], T, ntuple(i -> outdims[i], nd))
hvncat_fill!(A, currentdims, blockcounts, d1, d2, as)
if !any(==(0), outdims)
hvncat_fill!(A, currentdims, blockcounts, d1, d2, as)
end
return A
end

function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int},
d1::Int, d2::Int, as::Tuple) where {T, N}
function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple) where {T, N}
N > 1 || throw(ArgumentError("dimensions of the destination array must be at least 2"))
length(scratch1) == length(scratch2) == N ||
throw(ArgumentError("scratch vectors must have as many elements as the destination array has dimensions"))
0 < d1 < 3 &&
0 < d2 < 3 &&
d1 != d2 ||
throw(ArgumentError("d1 and d2 must be either 1 or 2, exclusive."))
outdims = size(A)
outdimsprod = cumprod(size(A))
offsets = scratch1
inneroffsets = scratch2
for a ∈ as
startindex = CartesianIndex(ntuple(i -> offsets[i] + 1, Val(N)))
if isa(a, AbstractArray)
for ai ∈ a
@inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
A[Ai] = ai

@inbounds for j ∈ 1:N
inneroffsets[j] += 1
inneroffsets[j] < cat_size(a, j) && break
inneroffsets[j] = 0
if !isempty(a)
if length(a) > 4
endindex = CartesianIndex(ntuple(i -> offsets[i] + cat_size(a, i), Val(N)))
@inbounds A[startindex:endindex] = a
else
for ai ∈ a
@inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprod, N)
@inbounds A[Ai] = ai
@inbounds for j ∈ 1:N
inneroffsets[j] += 1
inneroffsets[j] < cat_size(a, j) && break
inneroffsets[j] = 0
end
end
end
end
else
@inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
A[Ai] = a
@inbounds A[startindex] = a
end

@inbounds for j ∈ (d1, d2, 3:N...)
offsets[j] += cat_size(a, j)
offsets[j] < outdims[j] && break
offsets[j] = 0
@inbounds for i ∈ (d1, d2, 3:N...)
offsets[i] += cat_size(a, i)
offsets[i] < cat_size(A, i) && break
offsets[i] = 0
end
end
end

@propagate_inbounds function hvncat_calcindex(offsets::Vector{Int}, inneroffsets::Vector{Int},
outdims::Tuple{Vararg{Int}}, nd::Int)
outdimsprod::NTuple{N, Int}, nd::Int) where {N}
Ai = inneroffsets[1] + offsets[1] + 1
for j ∈ 2:nd
increment = inneroffsets[j] + offsets[j]
for k ∈ 1:j-1
increment *= outdims[k]
end
increment *= outdimsprod[j - 1]
Ai += increment
end
Ai
Expand Down
1 change: 1 addition & 0 deletions base/indices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ end
# those are the permutations that preserve the order of the non-singleton
# dimensions.
function setindex_shape_check(X::AbstractArray, I::Integer...)
@inline
li = ndims(X)
lj = length(I)
i = j = 1
Expand Down