Skip to content

Commit 536d21b

Browse files
authored
Improve cache localization in default_blasmul (#130)
* improve cache localization in default_blasmul * avoid code duplication * remove redundant inbounds * version bump to v1.0.3
1 parent 11b024f commit 536d21b

File tree

2 files changed

+20
-7
lines changed

2 files changed

+20
-7
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "ArrayLayouts"
22
uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
33
authors = ["Sheehan Olver <solver@mac.com>"]
4-
version = "1.0.2"
4+
version = "1.0.3"
55

66
[deps]
77
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"

src/muladd.jl

+19-6
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,15 @@ function tiled_blasmul!(tile_size, α, A::AbstractMatrix{T}, B::AbstractMatrix{S
154154
C
155155
end
156156

157+
@inline function _default_blasmul_loop!(α, A, B, β, C, k, j)
158+
z2 = @inbounds zero(A[k, 1]*B[1, j] + A[k, 1]*B[1, j])
159+
Ctmp = convert(promote_type(eltype(C), typeof(z2)), z2)
160+
@simd for ν = rowsupport(A,k) colsupport(B,j)
161+
Ctmp = @inbounds muladd(A[k, ν],B[ν, j],Ctmp)
162+
end
163+
@inbounds C[k,j] = muladd(α,Ctmp, C[k,j])
164+
end
165+
157166
function default_blasmul!(α, A::AbstractMatrix, B::AbstractMatrix, β, C::AbstractMatrix)
158167
mA, nA = size(A)
159168
mB, nB = size(B)
@@ -165,13 +174,17 @@ function default_blasmul!(α, A::AbstractMatrix, B::AbstractMatrix, β, C::Abstr
165174
(iszero(mA) || iszero(nB)) && return C
166175
iszero(nA) && return C
167176

168-
@inbounds for k in colsupport(A), j in rowsupport(B,rowsupport(A,k))
169-
z2 = zero(A[k, 1]*B[1, j] + A[k, 1]*B[1, j])
170-
Ctmp = convert(promote_type(eltype(C), typeof(z2)), z2)
171-
@simd for ν = rowsupport(A,k) colsupport(B,j)
172-
Ctmp = muladd(A[k, ν],B[ν, j],Ctmp)
177+
r = rowsupport(B,rowsupport(A,first(colsupport(A))))
178+
jindsid = all(k -> rowsupport(B,rowsupport(A,k)) == r, colsupport(A))
179+
180+
if jindsid
181+
for j in rowsupport(B,rowsupport(A,1)), k in colsupport(A)
182+
_default_blasmul_loop!(α, A, B, β, C, k, j)
183+
end
184+
else
185+
for k in colsupport(A), j in rowsupport(B,rowsupport(A,k))
186+
_default_blasmul_loop!(α, A, B, β, C, k, j)
173187
end
174-
C[k,j] = muladd(α,Ctmp, C[k,j])
175188
end
176189
C
177190
end

0 commit comments

Comments
 (0)