Skip to content

Commit c5db6f6

Browse files
committed
adding atomics via CUDA and Core.Intrinsics
1 parent d52a6f3 commit c5db6f6

File tree

6 files changed

+475
-0
lines changed

6 files changed

+475
-0
lines changed

lib/CUDAKernels/src/CUDAKernels.jl

+26
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ else
359359
end
360360

361361
import KernelAbstractions: ConstAdaptor, SharedMemory, Scratchpad, __synchronize, __size
362+
import KernelAbstractions: atomic_add!, atomic_and!, atomic_cas!, atomic_dec!, atomic_inc!, atomic_max!, atomic_min!, atomic_op!, atomic_or!, atomic_sub!, atomic_xchg!, atomic_xor!
362363

363364
###
364365
# GPU implementation of shared memory
@@ -395,4 +396,29 @@ Adapt.adapt_storage(to::ConstAdaptor, a::CUDA.CuDeviceArray) = Base.Experimental
395396
# Argument conversion
396397
KernelAbstractions.argconvert(k::Kernel{CUDADevice}, arg) = CUDA.cudaconvert(arg)
397398

399+
400+
###
401+
# GPU implementation of atomics
402+
###
403+
404+
afxs = Dict(
405+
atomic_add! => CUDA.atomic_add!,
406+
atomic_and! => CUDA.atomic_and!,
407+
atomic_cas! => CUDA.atomic_cas!,
408+
atomic_dec! => CUDA.atomic_dec!,
409+
atomic_inc! => CUDA.atomic_inc!,
410+
atomic_max! => CUDA.atomic_max!,
411+
atomic_min! => CUDA.atomic_min!,
412+
atomic_op! => CUDA.atomic_op!,
413+
atomic_or! => CUDA.atomic_or!,
414+
atomic_sub! => CUDA.atomic_sub!,
415+
atomic_xchg! => CUDA.atomic_xchg!,
416+
atomic_xor! => CUDA.atomic_xor!
417+
)
418+
419+
for (afx, cfx) in afxs
420+
@device_override @inline function afx(args...)
421+
cfx(args...)
422+
end
423+
end
398424
end

src/KernelAbstractions.jl

+4
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,10 @@ include("extras/extras.jl")
496496

497497
include("reflection.jl")
498498

499+
# Atomics
500+
501+
include("atomics.jl")
502+
499503
# CPU backend
500504

501505
include("cpu.jl")

src/atomics.jl

+203
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
###
2+
# Atomics
3+
###
4+
5+
export atomic_add!, atomic_sub!, atomic_and!, atomic_or!, atomic_xor!,
6+
atomic_min!, atomic_max!, atomic_inc!, atomic_dec!, atomic_xchg!,
7+
atomic_op!, atomic_cas!
8+
9+
# helper functions for inc(rement) and dec(rement)
10+
function dec(a::T,b::T) where T
11+
((a == 0) | (a > b)) ? b : (a-T(1))
12+
end
13+
14+
function inc(a::T,b::T) where T
15+
(a >= b) ? T(0) : (a+T(1))
16+
end
17+
18+
# arithmetic, bitwise, min/max, and inc/dec operations
19+
const ops = Dict(
20+
:atomic_add! => +,
21+
:atomic_sub! => -,
22+
:atomic_and! => &,
23+
:atomic_or! => |,
24+
:atomic_xor! => ,
25+
:atomic_min! => min,
26+
:atomic_max! => max,
27+
:atomic_inc! => inc,
28+
:atomic_dec! => dec,
29+
)
30+
31+
# Note: the type T prevents type convertion (for example, Float32 -> 64)
32+
# can lead to errors if b is chosen to be of a different, compatible type
33+
for (name, op) in ops
34+
@eval @inline function $name(ptr::Ptr{T}, b::T) where T
35+
Core.Intrinsics.atomic_pointermodify(ptr::Ptr{T}, $op, b::T, :monotonic)
36+
end
37+
end
38+
39+
"""
40+
atomic_cas!(ptr::Ptr{T}, cmp::T, val::T)
41+
42+
This is an atomic Compare And Swap (CAS).
43+
It reads the value `old` located at address `ptr` and compare with `cmp`.
44+
If `old` equals `cmp`, it stores `val` at the same address.
45+
Otherwise, doesn't change the value `old`.
46+
These operations are performed in one atomic transaction.
47+
The function returns `old`.
48+
49+
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
50+
Additionally, on GPU hardware with compute capability 7.0+, values of type UInt16 are supported.
51+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
52+
"""
53+
function atomic_cas!(ptr::Ptr{T}, old::T, new::T) where T
54+
Core.Intrinsics.atomic_pointerreplace(ptr, old, new, :acquire_release, :monotonic)
55+
end
56+
57+
"""
58+
atomic_xchg!(ptr::Ptr{T}, val::T)
59+
60+
This is an atomic exchange.
61+
It reads the value `old` located at address `ptr` and stores `val` at the same address.
62+
These operations are performed in one atomic transaction. The function returns `old`.
63+
64+
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
65+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
66+
"""
67+
function atomic_xchg!(ptr::Ptr{T}, b::T) where T
68+
Core.Intrinsics.atomic_pointerswap(ptr::Ptr{T}, b::T, :monotonic)
69+
end
70+
71+
"""
72+
atomic_op!(ptr::Ptr{T}, val::T)
73+
74+
This is an arbitrary atomic operation.
75+
It reads the value `old` located at address `ptr` and uses `val` in the operation `op` (defined elsewhere)
76+
These operations are performed in one atomic transaction. The function returns `old`.
77+
78+
This function is somewhat experimental.
79+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
80+
"""
81+
function atomic_op!(ptr::Ptr{T}, op, b::T) where T
82+
Core.Intrinsics.atomic_pointermodify(ptr::Ptr{T}, op, b::T, :monotonic)
83+
end
84+
85+
# Other Documentation
86+
87+
"""
88+
atomic_add!(ptr::Ptr{T}, val::T)
89+
90+
This is an atomic addition.
91+
It reads the value `old` located at address `ptr`, computes `old + val`, and stores the result back to memory at the same address.
92+
These operations are performed in one atomic transaction.
93+
The function returns `old`.
94+
95+
This operation is supported for values of type Int32, Int64, UInt32, UInt64, and Float32.
96+
Additionally, on GPU hardware with compute capability 6.0+, values of type Float64 are supported.
97+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
98+
"""
99+
atomic_add!
100+
101+
"""
102+
atomic_sub!(ptr::Ptr{T}, val::T)
103+
104+
This is an atomic subtraction.
105+
It reads the value `old` located at address `ptr`, computes `old - val`, and stores the result back to memory at the same address.
106+
These operations are performed in one atomic transaction.
107+
The function returns `old`.
108+
109+
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
110+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
111+
"""
112+
atomic_sub!
113+
114+
"""
115+
atomic_and!(ptr::Ptr{T}, val::T)
116+
117+
This is an atomic and.
118+
It reads the value `old` located at address `ptr`, computes `old & val`, and stores the result back to memory at the same address.
119+
These operations are performed in one atomic transaction.
120+
The function returns `old`.
121+
122+
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
123+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
124+
"""
125+
atomic_and!
126+
127+
"""
128+
atomic_or!(ptr::Ptr{T}, val::T)
129+
130+
This is an atomic or.
131+
It reads the value `old` located at address `ptr`, computes `old | val`, and stores the result back to memory at the same address.
132+
These operations are performed in one atomic transaction.
133+
The function returns `old`.
134+
135+
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
136+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
137+
"""
138+
atomic_or!
139+
140+
"""
141+
atomic_xor!(ptr::Ptr{T}, val::T)
142+
143+
This is an atomic xor.
144+
It reads the value `old` located at address `ptr`, computes `old ⊻ val`, and stores the result back to memory at the same address.
145+
These operations are performed in one atomic transaction.
146+
The function returns `old`.
147+
148+
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
149+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
150+
"""
151+
atomic_xor!
152+
153+
"""
154+
atomic_min!(ptr::Ptr{T}, val::T)
155+
156+
This is an atomic min.
157+
It reads the value `old` located at address `ptr`, computes `min(old, val)`, and st ores the result back to memory at the same address.
158+
These operations are performed in one atomic transaction.
159+
The function returns `old`.
160+
161+
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
162+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
163+
"""
164+
atomic_min!
165+
166+
"""
167+
atomic_max!(ptr::Ptr{T}, val::T)
168+
169+
This is an atomic max.
170+
It reads the value `old` located at address `ptr`, computes `max(old, val)`, and st ores the result back to memory at the same address.
171+
These operations are performed in one atomic transaction.
172+
The function returns `old`.
173+
174+
This operation is supported for values of type Int32, Int64, UInt32 and UInt64.
175+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
176+
"""
177+
atomic_max!
178+
179+
"""
180+
atomic_inc!(ptr::Ptr{T}, val::T)
181+
182+
This is an atomic increment function that counts up to a certain number before starting again at 0.
183+
It reads the value `old` located at address `ptr`, computes `((old >= val) ? 0 : (o ld+1))`, and stores the result back to memory at the same address.
184+
These three operations are performed in one atomic transaction.
185+
The function returns `old`.
186+
187+
This operation is only supported for values of type Int32.
188+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
189+
"""
190+
atomic_inc!
191+
192+
"""
193+
atomic_dec!(ptr::Ptr{T}, val::T)
194+
195+
This is an atomic decrement function that counts down to 0 from a defined value `val`.
196+
It reads the value `old` located at address `ptr`, computes `(((old == 0) | (old > val)) ? val : (old-1))`, and stores the result back to memory at the same address.
197+
These three operations are performed in one atomic transaction.
198+
The function returns `old`.
199+
200+
This operation is only supported for values of type Int32.
201+
Also: atomic operations for the CPU requires a Julia version of 1.7.0 or above.
202+
"""
203+
atomic_dec!

src/cpu.jl

+27
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,30 @@ end
234234

235235
# Argument conversion
236236
KernelAbstractions.argconvert(k::Kernel{CPU}, arg) = arg
237+
238+
###
239+
# CPU error handling if under 1.7
240+
###
241+
242+
if Base.VERSION < v"1.7.0"
243+
244+
import KernelAbstractions: atomic_add!, atomic_and!, atomic_cas!,
245+
atomic_dec!, atomic_inc!, atomic_max!,
246+
atomic_min!, atomic_op!, atomic_or!,
247+
atomic_sub!, atomic_xchg!, atomic_xor!
248+
249+
function atomic_error(args...)
250+
error("CPU Atomics are not allowed for julia version under 1.7!")
251+
end
252+
253+
afxs = [atomic_add!, atomic_and!, atomic_cas!, atomic_dec!,
254+
atomic_inc!, atomic_max!, atomic_min!, atomic_op!,
255+
atomic_or!, atomic_sub!, atomic_xchg!, atomic_xor!]
256+
257+
for afx in afxs
258+
@inline function afx(ctx, idx::CartesianIndex)
259+
atomic_error(args...)
260+
end
261+
end
262+
end
263+

0 commit comments

Comments
 (0)