Skip to content

Commit d7eefa7

Browse files
example of hacking spirv64-amd into this
1 parent d91671f commit d7eefa7

File tree

2 files changed

+24
-6
lines changed

2 files changed

+24
-6
lines changed

clang/lib/Headers/gpuintrin.h

+11-4
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,21 @@ __gpu_match_all_u64_impl(uint64_t __lane_mask, uint64_t __x);
5656
_Pragma("omp end declare variant");
5757
_Pragma("omp end declare target");
5858

59+
60+
5961
#if defined(__NVPTX__)
60-
#include <nvptxintrin.h>
62+
#include <nvptxintrin.h>
6163
#elif defined(__AMDGPU__)
62-
#include <amdgpuintrin.h>
64+
#if defined(__SPIRV64__)
65+
// the spirv64-amd-amdhsa triple claims to be amdgpu and spirv
66+
#include <spirvintrin.h>
67+
#else
68+
#include <amdgpuintrin.h>
69+
#endif
6370
#elif defined(__SPIRV64__)
64-
#include <spirvintrin.h>
71+
#include <spirvintrin.h>
6572
#elif !defined(_OPENMP)
66-
#error "This header is only meant to be used on GPU architectures."
73+
#error "This header is only meant to be used on GPU architectures."
6774
#endif
6875

6976
_Pragma("omp begin declare target device_type(nohost)");

clang/lib/Headers/spirvintrin.h

+13-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,13 @@
3737
#define __gpu_kernel
3838

3939
// Returns the number of workgroups in the 'x' dimension of the grid.
40-
_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_x(void);
40+
_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_x(void) {
41+
#if defined(__AMDGPU__)
42+
return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
43+
#else
44+
#error "Not yet implemented"
45+
#endif
46+
}
4147

4248
// Returns the number of workgroups in the 'y' dimension of the grid.
4349
_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_y(void);
@@ -83,7 +89,12 @@ _DEFAULT_FN_ATTRS uint64_t __gpu_lane_mask(void);
8389

8490
// Copies the value from the first active thread in the wave to the rest.
8591
_DEFAULT_FN_ATTRS uint32_t __gpu_read_first_lane_u32(uint64_t __lane_mask,
86-
uint32_t __x);
92+
uint32_t __x) {
93+
94+
uint64_t __gpu_read_first_lane_u32_impl(
95+
uint64_t __lane_mask, uint32_t __x) asm("llvm.spv.wave.readlane.i64");
96+
return __gpu_read_first_lane_u32_impl(__lane_mask, __x);
97+
}
8798

8899
// Returns a bitmask of threads in the current lane for which \p x is true.
89100
_DEFAULT_FN_ATTRS uint64_t __gpu_ballot(uint64_t __lane_mask, bool __x);

0 commit comments

Comments
 (0)