Skip to content

Commit 41ea0f3

Browse files
author
anikelal
committed
add alwaysinline attribute to stubs
1 parent 15579a8 commit 41ea0f3

11 files changed

+976
-270
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -6174,6 +6174,16 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
61746174
CodeGenFunction(*this).GenerateCode(GD, Fn, FI);
61756175

61766176
setNonAliasAttributes(GD, Fn);
6177+
6178+
if (D->hasAttr<OpenCLKernelAttr>()) {
6179+
if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
6180+
if (!Fn->hasFnAttribute(llvm::Attribute::NoInline) &&
6181+
!Fn->hasFnAttribute(llvm::Attribute::InlineHint) &&
6182+
!Fn->hasFnAttribute(llvm::Attribute::OptimizeNone))
6183+
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
6184+
}
6185+
}
6186+
61776187
SetLLVMFunctionAttributesForDefinition(D, Fn);
61786188

61796189
if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())

clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

+349-82
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl

+66-10
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

+7-7
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ kernel void test_target_features_kernel(global int *i) {
111111
// NOCPU-NEXT: ret void
112112
//
113113
//
114-
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
114+
// NOCPU: Function Attrs: alwaysinline convergent norecurse nounwind
115115
// NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
116116
// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] {
117117
// NOCPU-NEXT: [[ENTRY:.*:]]
@@ -249,7 +249,7 @@ kernel void test_target_features_kernel(global int *i) {
249249
// NOCPU-NEXT: ret void
250250
//
251251
//
252-
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
252+
// NOCPU: Function Attrs: alwaysinline convergent norecurse nounwind
253253
// NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel(
254254
// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META10]] {
255255
// NOCPU-NEXT: [[ENTRY:.*:]]
@@ -492,7 +492,7 @@ kernel void test_target_features_kernel(global int *i) {
492492
// GFX900-NEXT: ret void
493493
//
494494
//
495-
// GFX900: Function Attrs: convergent norecurse nounwind
495+
// GFX900: Function Attrs: alwaysinline convergent norecurse nounwind
496496
// GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
497497
// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] {
498498
// GFX900-NEXT: [[ENTRY:.*:]]
@@ -640,7 +640,7 @@ kernel void test_target_features_kernel(global int *i) {
640640
// GFX900-NEXT: ret void
641641
//
642642
//
643-
// GFX900: Function Attrs: convergent norecurse nounwind
643+
// GFX900: Function Attrs: alwaysinline convergent norecurse nounwind
644644
// GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel(
645645
// GFX900-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] {
646646
// GFX900-NEXT: [[ENTRY:.*:]]
@@ -820,9 +820,9 @@ kernel void test_target_features_kernel(global int *i) {
820820
// NOCPU: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
821821
// NOCPU: attributes #[[ATTR1]] = { convergent noinline norecurse nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
822822
// NOCPU: attributes #[[ATTR2]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
823-
// NOCPU: attributes #[[ATTR3]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
823+
// NOCPU: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
824824
// NOCPU: attributes #[[ATTR4]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" "uniform-work-group-size"="false" }
825-
// NOCPU: attributes #[[ATTR5]] = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" }
825+
// NOCPU: attributes #[[ATTR5]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+s-memtime-inst" }
826826
// NOCPU: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
827827
// NOCPU: attributes #[[ATTR7]] = { convergent noinline nounwind optnone "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
828828
// NOCPU: attributes #[[ATTR8]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
@@ -832,7 +832,7 @@ kernel void test_target_features_kernel(global int *i) {
832832
// GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
833833
// GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
834834
// GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" }
835-
// GFX900: attributes #[[ATTR3]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
835+
// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }
836836
// GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
837837
// GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
838838
// GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" }

clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl

-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
kernel void ker() {};
77
// CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]]
8-
// CHECK: call void @__clang_ocl_kern_imp_ker() #[[ATTR2:[0-9]+]]
98

109
// CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]]
1110

@@ -18,6 +17,3 @@ void foo() {};
1817

1918
// CHECK: attributes #[[ATTR1]]
2019
// CHECK-NOT: uniform-work-group-size
21-
22-
// CHECK: attributes #[[ATTR2]]
23-
// CHECK-NOT: uniform-work-group-size

0 commit comments

Comments
 (0)