Skip to content

Commit 8fd3b0c

Browse files
author
anikelal
committed
add alwaysinline attribute to stubs
1 parent 15579a8 commit 8fd3b0c

File tree

6 files changed

+32
-35
lines changed

6 files changed

+32
-35
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -6174,6 +6174,19 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
61746174
CodeGenFunction(*this).GenerateCode(GD, Fn, FI);
61756175

61766176
setNonAliasAttributes(GD, Fn);
6177+
6178+
bool ShouldAddOptNone = !CodeGenOpts.DisableO0ImplyOptNone &&
6179+
(CodeGenOpts.OptimizationLevel == 0) &&
6180+
!D->hasAttr<MinSizeAttr>();
6181+
6182+
if (D->hasAttr<OpenCLKernelAttr>())
6183+
if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub &&
6184+
!D->hasAttr<NoInlineAttr>() &&
6185+
!Fn->hasFnAttribute(llvm::Attribute::NoInline) &&
6186+
!D->hasAttr<OptimizeNoneAttr>() &&
6187+
!Fn->hasFnAttribute(llvm::Attribute::OptimizeNone) && !ShouldAddOptNone)
6188+
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
6189+
61776190
SetLLVMFunctionAttributesForDefinition(D, Fn);
61786191

61796192
if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

+3-3
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ kernel void test_target_features_kernel(global int *i) {
492492
// GFX900-NEXT: ret void
493493
//
494494
//
495-
// GFX900: Function Attrs: convergent norecurse nounwind
495+
// GFX900: Function Attrs: alwaysinline convergent norecurse nounwind
496496
// GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
497497
// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] {
498498
// GFX900-NEXT: [[ENTRY:.*:]]
@@ -640,7 +640,7 @@ kernel void test_target_features_kernel(global int *i) {
640640
// GFX900-NEXT: ret void
641641
//
642642
//
643-
// GFX900: Function Attrs: convergent norecurse nounwind
643+
// GFX900: Function Attrs: alwaysinline convergent norecurse nounwind
644644
// GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel(
645645
// GFX900-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
646646
// GFX900-NEXT: [[ENTRY:.*:]]
@@ -832,7 +832,7 @@ kernel void test_target_features_kernel(global int *i) {
832832
// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
833833
// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
834834
// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
835-
// GFX900: attributes #[[ATTR3]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
835+
// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
836836
// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
837837
// GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
838838
// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }

clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl

-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
kernel void ker() {};
77
// CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]]
8-
// CHECK: call void @__clang_ocl_kern_imp_ker() #[[ATTR2:[0-9]+]]
98

109
// CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]]
1110

@@ -18,6 +17,3 @@ void foo() {};
1817

1918
// CHECK: attributes #[[ATTR1]]
2019
// CHECK-NOT: uniform-work-group-size
21-
22-
// CHECK: attributes #[[ATTR2]]
23-
// CHECK-NOT: uniform-work-group-size

clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

+9-15
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR
2-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR
3-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR
4-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR
5-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR
6-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR
7-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86
8-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86
9-
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLEX86
1+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
2+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
3+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
4+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
5+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
6+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
7+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
8+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
9+
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefix=CHECK-LIFETIMES
1010

1111
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
1212

@@ -39,12 +39,6 @@ void callee(int id, __global int *out) {
3939
out[id] = id;
4040
}
4141

42-
// TRIPLESPIR: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
43-
// TRIPLESPIR: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue({{.*}})
44-
45-
// TRIPLEX86: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
46-
// TRIPLEX86: call void @__clang_ocl_kern_imp_device_side_enqueue({{.*}})
47-
4842
// COMMON-LABEL: define{{.*}} void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
4943
kernel void device_side_enqueue(global int *a, global int *b, int i) {
5044
// SPIR: %default_queue = alloca target("spirv.Queue")

clang/test/CodeGenOpenCL/convergent.cl

+5-4
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ void test_not_unroll() {
127127
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
128128

129129
// CHECK-LABEL: @assume_convergent_asm
130-
// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
130+
// CHECK: tail call void asm sideeffect "s_barrier", ""() #6
131131
kernel void assume_convergent_asm()
132132
{
133133
__asm__ volatile("s_barrier");
@@ -138,6 +138,7 @@ kernel void assume_convergent_asm()
138138
// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
139139
// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
140140
// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
141-
// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
142-
// CHECK: attributes #6 = { {{[^}]*}}nounwind{{[^}]*}} }
143-
// CHECK: attributes #7 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} }
141+
// CHECK: attributes #5 = { {{[^}]*}}alwaysinline convergent{{[^}]*}} }
142+
// CHECK: attributes #6 = { {{[^}]*}}convergent{{[^}]*}} }
143+
// CHECK: attributes #7 = { {{[^}]*}}nounwind{{[^}]*}} }
144+
// CHECK: attributes #8 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} }

clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl

+2-9
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,8 @@
99
typedef struct {int a;} ndrange_t;
1010

1111
kernel void test(int i) {
12-
1312
// AMDGPU-LABEL: define {{.*}} amdgpu_kernel void @test
14-
// AMDGPU-LABEL: call void @__clang_ocl_kern_imp_test(i32 noundef %0)
15-
1613
// SPIR-LABEL: define {{.*}} spir_kernel void @test
17-
// SPIR-LABEL: call spir_func void @__clang_ocl_kern_imp_test(i32 noundef %0)
18-
19-
// AMDGPU-LABEL: define {{.*}} void @__clang_ocl_kern_imp_test
20-
// SPIR-LABEL: define {{.*}} spir_func void @__clang_ocl_kern_imp_test
2114

2215
// COMMON-LABEL: entry:
2316
// AMDGPU: %block_sizes = alloca [1 x i64]
@@ -44,5 +37,5 @@ kernel void test(int i) {
4437

4538
// CHECK-DEBUG: ![[TESTFILE:[0-9]+]] = !DIFile(filename: "<stdin>"
4639
// CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", linkageName: "__clang_ocl_kern_imp_test", {{.*}} file: ![[TESTFILE]]
47-
// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 33)
48-
// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 34, scope: ![[IFSCOPE]])
40+
// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 26)
41+
// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 27, scope: ![[IFSCOPE]])

0 commit comments

Comments
 (0)