@@ -154,6 +154,15 @@ function tiled_blasmul!(tile_size, α, A::AbstractMatrix{T}, B::AbstractMatrix{S
154
154
C
155
155
end
156
156
157
+ @inline function _default_blasmul_loop! (α, A, B, β, C, k, j)
158
+ z2 = @inbounds zero (A[k, 1 ]* B[1 , j] + A[k, 1 ]* B[1 , j])
159
+ Ctmp = convert (promote_type (eltype (C), typeof (z2)), z2)
160
+ @simd for ν = rowsupport (A,k) ∩ colsupport (B,j)
161
+ Ctmp = @inbounds muladd (A[k, ν],B[ν, j],Ctmp)
162
+ end
163
+ @inbounds C[k,j] = muladd (α,Ctmp, C[k,j])
164
+ end
165
+
157
166
function default_blasmul! (α, A:: AbstractMatrix , B:: AbstractMatrix , β, C:: AbstractMatrix )
158
167
mA, nA = size (A)
159
168
mB, nB = size (B)
@@ -165,13 +174,17 @@ function default_blasmul!(α, A::AbstractMatrix, B::AbstractMatrix, β, C::Abstr
165
174
(iszero (mA) || iszero (nB)) && return C
166
175
iszero (nA) && return C
167
176
168
- @inbounds for k in colsupport (A), j in rowsupport (B,rowsupport (A,k))
169
- z2 = zero (A[k, 1 ]* B[1 , j] + A[k, 1 ]* B[1 , j])
170
- Ctmp = convert (promote_type (eltype (C), typeof (z2)), z2)
171
- @simd for ν = rowsupport (A,k) ∩ colsupport (B,j)
172
- Ctmp = muladd (A[k, ν],B[ν, j],Ctmp)
177
+ r = rowsupport (B,rowsupport (A,first (colsupport (A))))
178
+ jindsid = all (k -> rowsupport (B,rowsupport (A,k)) == r, colsupport (A))
179
+
180
+ if jindsid
181
+ for j in rowsupport (B,rowsupport (A,1 )), k in colsupport (A)
182
+ _default_blasmul_loop! (α, A, B, β, C, k, j)
183
+ end
184
+ else
185
+ for k in colsupport (A), j in rowsupport (B,rowsupport (A,k))
186
+ _default_blasmul_loop! (α, A, B, β, C, k, j)
173
187
end
174
- C[k,j] = muladd (α,Ctmp, C[k,j])
175
188
end
176
189
C
177
190
end
0 commit comments