Skip to content

Whole-array reductions always use init to start each reduction chain #58241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ end
function reduce(::typeof(merge), items::Vector{<:Dict})
K = mapreduce(keytype, promote_type, items)
V = mapreduce(valtype, promote_type, items)
return reduce(merge!, items; init=Dict{K,V}())
return foldl(merge!, items; init=Dict{K,V}())
end

function map!(f, iter::ValueIterator{<:Dict})
Expand Down
36 changes: 17 additions & 19 deletions base/missing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,10 @@ end
# Optimized mapreduce implementation
# The generic method is faster when !(eltype(A) >: Missing) since it does not need
# additional loops to identify the two first non-missing values of each block
mapreduce(f, op, itr::SkipMissing{<:AbstractArray}) =
_mapreduce(f, op, IndexStyle(itr.x), eltype(itr.x) >: Missing ? itr : itr.x)
mapreduce(f, op, itr::SkipMissing{<:AbstractArray}; init=Base._InitialValue()) =
_mapreduce(f, op, IndexStyle(itr.x), eltype(itr.x) >: Missing ? itr : itr.x, init)

function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray}, init=_InitialValue())
A = itr.x
ai = missing
inds = LinearIndices(A)
Expand All @@ -283,37 +283,35 @@ function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
@inbounds ai = A[i]
!ismissing(ai) && break
end
ismissing(ai) && return mapreduce_empty(f, op, eltype(itr))
ismissing(ai) && return _mapreduce_start(f, op, itr, init)
a1::eltype(itr) = ai
i == typemax(typeof(i)) && return mapreduce_first(f, op, a1)
i == typemax(typeof(i)) && return _mapreduce_start(f, op, itr, init, a1)
i += 1
ai = missing
for outer i in i:ilast
@inbounds ai = A[i]
!ismissing(ai) && break
end
ismissing(ai) && return mapreduce_first(f, op, a1)
ismissing(ai) && return _mapreduce_start(f, op, itr, init, a1)
# We know A contains at least two non-missing entries: the result cannot be nothing
something(mapreduce_impl(f, op, itr, first(inds), last(inds)))
something(mapreduce_impl(f, op, itr, first(inds), last(inds), init))
end

_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapfoldl(f, op, itr)

mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing, init=_InitialValue()) = mapfoldl(f, op, itr; init)

# Returns nothing when the input contains only missing values, and Some(x) otherwise
@noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray},
ifirst::Integer, ilast::Integer, blksize::Int)
ifirst::Integer, ilast::Integer, init=_InitialValue())
A = itr.x
blksize = pairwise_blocksize(f, op)
if ifirst > ilast
return nothing
elseif ifirst == ilast
@inbounds a1 = A[ifirst]
if ismissing(a1)
return nothing
else
return Some(mapreduce_first(f, op, a1))
return Some(_mapreduce_start(f, op, itr, init, a1))
end
elseif ilast - ifirst < blksize
# sequential portion
Expand All @@ -325,18 +323,18 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
end
ismissing(ai) && return nothing
a1 = ai::eltype(itr)
i == typemax(typeof(i)) && return Some(mapreduce_first(f, op, a1))
i == typemax(typeof(i)) && return Some(_mapreduce_start(f, op, itr, init, a1))
i += 1
ai = missing
for outer i in i:ilast
@inbounds ai = A[i]
!ismissing(ai) && break
end
ismissing(ai) && return Some(mapreduce_first(f, op, a1))
ismissing(ai) && return Some(_mapreduce_start(f, op, itr, init, a1))
a2 = ai::eltype(itr)
i == typemax(typeof(i)) && return Some(op(f(a1), f(a2)))
i == typemax(typeof(i)) && return Some(op(_mapreduce_start(f, op, itr, init, a1), f(a2)))
i += 1
v = op(f(a1), f(a2))
v = op(_mapreduce_start(f, op, itr, init, a1), f(a2))
@simd for i = i:ilast
@inbounds ai = A[i]
if !ismissing(ai)
Expand All @@ -347,8 +345,8 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
else
# pairwise portion
imid = ifirst + (ilast - ifirst) >> 1
v1 = mapreduce_impl(f, op, itr, ifirst, imid, blksize)
v2 = mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
v1 = mapreduce_impl(f, op, itr, ifirst, imid, init)
v2 = mapreduce_impl(f, op, itr, imid+1, ilast, init)
if v1 === nothing && v2 === nothing
return nothing
elseif v1 === nothing
Expand Down
62 changes: 39 additions & 23 deletions base/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,22 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)

## reduce & mapreduce

_empty_eltype(x) = _empty_eltype(x, IteratorEltype(x))
_empty_eltype(x, ::HasEltype) = eltype(x)
_empty_eltype(_, _) = _empty_reduce_error()
"""
_mapreduce_start(f, op, A, init, [a1])

Perform the first step in a mapped reduction over `A` with 0 or one or more elements.
The one-element method may be called multiple times within a single reduction at
the start of each new chain of `op` calls.
"""
_mapreduce_start(f, op, A, ::_InitialValue) = mapreduce_empty(f, op, _empty_eltype(A))
_mapreduce_start(f, op, A, ::_InitialValue, a1) = mapreduce_first(f, op, a1)
_mapreduce_start(f, op, A, init) = init
_mapreduce_start(f, op, A, init, a1) = op(init, mapreduce_first(f, op, a1))


# `mapreduce_impl()` is called by `mapreduce()` (via `_mapreduce()`, when `A`
# supports linear indexing) and does actual calculations (for `A[ifirst:ilast]` subset).
# For efficiency, no parameter validity checks are done, it's the caller's responsibility.
Expand All @@ -243,15 +259,15 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
# This is a generic implementation of `mapreduce_impl()`,
# certain `op` (e.g. `min` and `max`) may have their own specialized versions.
@noinline function mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted,
ifirst::Integer, ilast::Integer, blksize::Int)
ifirst::Integer, ilast::Integer, init=_InitialValue())
blksize = pairwise_blocksize(f, op)
if ifirst == ilast
@inbounds a1 = A[ifirst]
return mapreduce_first(f, op, a1)
return _mapreduce_start(f, op, A, init, @inbounds(A[ifirst]))
elseif ilast - ifirst < blksize
# sequential portion
@inbounds a1 = A[ifirst]
@inbounds a2 = A[ifirst+1]
v = op(f(a1), f(a2))
v = op(_mapreduce_start(f, op, A, init, a1), f(a2))
@simd for i = ifirst + 2 : ilast
@inbounds ai = A[i]
v = op(v, f(ai))
Expand All @@ -260,15 +276,12 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
else
# pairwise portion
imid = ifirst + (ilast - ifirst) >> 1
v1 = mapreduce_impl(f, op, A, ifirst, imid, blksize)
v2 = mapreduce_impl(f, op, A, imid+1, ilast, blksize)
v1 = mapreduce_impl(f, op, A, ifirst, imid, init)
v2 = mapreduce_impl(f, op, A, imid+1, ilast, init)
return op(v1, v2)
end
end

mapreduce_impl(f, op, A::AbstractArrayOrBroadcasted, ifirst::Integer, ilast::Integer) =
mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))

"""
mapreduce(f, op, itrs...; [init])

Expand Down Expand Up @@ -412,34 +425,34 @@ The default is `reduce_first(op, f(x))`.
"""
mapreduce_first(f, op, x) = reduce_first(op, f(x))

_mapreduce(f, op, A::AbstractArrayOrBroadcasted) = _mapreduce(f, op, IndexStyle(A), A)
_mapreduce(f, op, A::AbstractArrayOrBroadcasted, init=_InitialValue()) = _mapreduce(f, op, IndexStyle(A), A, init)

function _mapreduce(f, op, ::IndexLinear, A::AbstractArrayOrBroadcasted)
function _mapreduce(f, op, ::IndexLinear, A::AbstractArrayOrBroadcasted, init=_InitialValue())
inds = LinearIndices(A)
n = length(inds)
if n == 0
return mapreduce_empty_iter(f, op, A, IteratorEltype(A))
return _mapreduce_start(f, op, A, init)
elseif n == 1
@inbounds a1 = A[first(inds)]
return mapreduce_first(f, op, a1)
return _mapreduce_start(f, op, A, init, a1)
elseif n < 16 # process short array here, avoid mapreduce_impl() compilation
@inbounds i = first(inds)
@inbounds a1 = A[i]
@inbounds a2 = A[i+=1]
s = op(f(a1), f(a2))
s = op(_mapreduce_start(f, op, A, init, a1), f(a2))
while i < last(inds)
@inbounds Ai = A[i+=1]
s = op(s, f(Ai))
end
return s
else
return mapreduce_impl(f, op, A, first(inds), last(inds))
return mapreduce_impl(f, op, A, first(inds), last(inds), init)
end
end

mapreduce(f, op, a::Number) = mapreduce_first(f, op, a)
mapreduce(f, op, a::Number; init=Base._InitialValue()) = _mapreduce_start(f, op, a, init, a)

_mapreduce(f, op, ::IndexCartesian, A::AbstractArrayOrBroadcasted) = mapfoldl(f, op, A)
_mapreduce(f, op, ::IndexCartesian, A::AbstractArrayOrBroadcasted, init=_InitialValue()) = mapfoldl(f, op, A; init)

"""
reduce(op, itr; [init])
Expand Down Expand Up @@ -630,9 +643,10 @@ isgoodzero(::typeof(max), x) = isbadzero(min, x)
isgoodzero(::typeof(min), x) = isbadzero(max, x)

function mapreduce_impl(f, op::Union{typeof(max), typeof(min)},
A::AbstractArrayOrBroadcasted, first::Int, last::Int)
A::AbstractArrayOrBroadcasted, first::Integer, last::Integer, init)
a1 = @inbounds A[first]
v1 = mapreduce_first(f, op, a1)
v1 = _mapreduce_start(f, op, A, init, a1)
last == first && return v1
v2 = v3 = v4 = v1
chunk_len = 256
start = first + 1
Expand Down Expand Up @@ -1130,8 +1144,8 @@ julia> count(i->(4<=i<=6), [2,3,4,5,6])
julia> count([true, false, true, true])
3

julia> count(>(3), 1:7, init=0x03)
0x07
julia> count(>(3), 1:7, init=UInt(0))
0x0000000000000004
```
"""
count(itr; init=0) = count(identity, itr; init)
Expand All @@ -1140,8 +1154,10 @@ count(f, itr; init=0) = _simple_count(f, itr, init)

_simple_count(pred, itr, init) = sum(_bool(pred), itr; init)

function _simple_count(::typeof(identity), x::Array{Bool}, init::T=0) where {T}
n::T = init
function _simple_count(::typeof(identity), x::Array{Bool}, init=0)
v0 = _mapreduce_start(identity, Base.add_sum, x, init, false)
T = typeof(v0)
n::T = v0
chunks = length(x) ÷ sizeof(UInt)
mask = 0x0101010101010101 % UInt
GC.@preserve x begin
Expand Down
6 changes: 3 additions & 3 deletions base/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
nslices = div(length(A), lsiz)
ibase = first(LinearIndices(A))-1
for i in eachindex(R)
r = op(@inbounds(R[i]), mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
r = op(@inbounds(R[i]), mapreduce_impl(f, op, A, ibase+1, ibase+lsiz, _InitialValue()))
@inbounds R[i] = r
ibase += lsiz
end
Expand Down Expand Up @@ -329,9 +329,9 @@ mapreduce(f, op, A::AbstractArrayOrBroadcasted, B::AbstractArrayOrBroadcasted...
reduce(op, map(f, A, B...); kw...)

_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, ::Colon) =
mapfoldl_impl(f, op, nt, A)
_mapreduce(f, op, IndexStyle(A), A, nt)

_mapreduce_dim(f, op, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) =
_mapreduce_dim(f, op, nt::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) =
_mapreduce(f, op, IndexStyle(A), A)

_mapreduce_dim(f, op, nt, A::AbstractArrayOrBroadcasted, dims) =
Expand Down
18 changes: 9 additions & 9 deletions base/reinterpretarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -905,18 +905,21 @@ end

# Reductions with IndexSCartesian2

function _mapreduce(f::F, op::OP, style::IndexSCartesian2{K}, A::AbstractArrayOrBroadcasted) where {F,OP,K}
function _mapreduce(f::F, op::OP, style::IndexSCartesian2{K}, A::AbstractArrayOrBroadcasted, init=_InitialValue()) where {F,OP,K}
inds = eachindex(style, A)
n = size(inds)[2]
if n == 0
return mapreduce_empty_iter(f, op, A, IteratorEltype(A))
return _mapreduce_start(f, op, A, init)
elseif n == 1
return _mapreduce_start(f, op, A, init, first(A))
else
return mapreduce_impl(f, op, A, first(inds), last(inds))
return mapreduce_impl(f, op, A, first(inds), last(inds), init)
end
end

@noinline function mapreduce_impl(f::F, op::OP, A::AbstractArrayOrBroadcasted,
ifirst::SCI, ilast::SCI, blksize::Int) where {F,OP,SCI<:SCartesianIndex2{K}} where K
ifirst::SCI, ilast::SCI, init=_InitialValue()) where {F,OP,SCI<:SCartesianIndex2{K}} where K
blksize = pairwise_blocksize(f, op)
if ilast.j - ifirst.j < blksize
# sequential portion
@inbounds a1 = A[ifirst]
Expand All @@ -937,11 +940,8 @@ end
else
# pairwise portion
jmid = ifirst.j + (ilast.j - ifirst.j) >> 1
v1 = mapreduce_impl(f, op, A, ifirst, SCI(K,jmid), blksize)
v2 = mapreduce_impl(f, op, A, SCI(1,jmid+1), ilast, blksize)
v1 = mapreduce_impl(f, op, A, ifirst, SCI(K,jmid), init)
v2 = mapreduce_impl(f, op, A, SCI(1,jmid+1), ilast, init)
return op(v1, v2)
end
end

mapreduce_impl(f::F, op::OP, A::AbstractArrayOrBroadcasted, ifirst::SCartesianIndex2, ilast::SCartesianIndex2) where {F,OP} =
mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
8 changes: 4 additions & 4 deletions contrib/juliac-buildscript.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ end
reduce(op, map(f, A...); kw...)

_mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
mapfoldl_impl(f, op, nt, A)
_mapreduce(f, op, IndexStyle(A), A, nt)

_mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
_mapreduce(f, op, IndexStyle(A), A)
_mapreduce_dim(f::F, op::F2, nt::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
_mapreduce(f, op, IndexStyle(A), A, nt)

_mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, dims) where {F, F2} =
mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
Expand All @@ -121,7 +121,7 @@ end
reduce_empty_iter(MappingRF(f, op), itr, ItrEltype)
mapreduce_first(f::F, op::F2, x) where {F,F2} = reduce_first(op, f(x))

_mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted) where {F,F2} = _mapreduce(f, op, IndexStyle(A), A)
_mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted, init) where {F,F2} = _mapreduce(f, op, IndexStyle(A), A, init)
mapreduce_empty(::typeof(identity), op::F, T) where {F} = reduce_empty(op, T)
mapreduce_empty(::typeof(abs), op::F, T) where {F} = abs(reduce_empty(op, T))
mapreduce_empty(::typeof(abs2), op::F, T) where {F} = abs2(reduce_empty(op, T))
Expand Down
5 changes: 3 additions & 2 deletions test/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -570,8 +570,9 @@ struct NonFunctionIsZero end

@test count(Iterators.repeated(true, 3), init=0x04) === 0x07
@test count(!=(2), Iterators.take(1:7, 3), init=Int32(0)) === Int32(2)
@test count(identity, [true, false], init=Int8(5)) === Int8(6)
@test count(!, [true false; false true], dims=:, init=Int16(0)) === Int16(2)
@test count(identity, [true, false], init=Int8(5)) === 6
@test @inferred(count(identity, [true, false], init=Int8(5))) === 6
@test count(!, [true false; false true], dims=:, init=Int16(0)) === 2
@test isequal(count(identity, [true false; false true], dims=2, init=UInt(4)), reshape(UInt[5, 5], 2, 1))

## cumsum, cummin, cummax
Expand Down
3 changes: 2 additions & 1 deletion test/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,8 @@ end
@test_throws TypeError count!([1], [1])
end

@test @inferred(count(false:true, dims=:, init=0x0004)) === 0x0005
@test count(false:true, dims=:, init=0x0004) === 5
@test_broken @inferred(count(false:true, dims=:, init=0x0004)) === 5
@test @inferred(count(isodd, reshape(1:9, 3, 3), dims=:, init=Int128(0))) === Int128(5)

@testset "reduced_index for BigInt (issue #39995)" begin
Expand Down