From 546fed81109e575b5b44693c3940e08ea0231ebc Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 10 Feb 2025 19:21:38 +0000 Subject: [PATCH 1/5] [AArch64] fix trampoline implementation: use X15 AAPCS64 reserves any of X9-X15 for this purpose, and says not to use X16 or X18 like GCC did. Simply choosing a different register fixes the problem of this being broken on any platform that actually follows the platform ABI. As a side benefit, also generate slightly better code by following the XCore implementation instead of PPC (although following the RISCV might have been slightly more readable in hindsight). --- compiler-rt/lib/builtins/README.txt | 5 - compiler-rt/lib/builtins/trampoline_setup.c | 42 --- .../builtins/Unit/trampoline_setup_test.c | 2 +- .../lib/Optimizer/CodeGen/BoxedProcedure.cpp | 4 +- .../AArch64/AArch64CallingConvention.td | 36 ++- .../Target/AArch64/AArch64FrameLowering.cpp | 26 ++ .../Target/AArch64/AArch64ISelLowering.cpp | 85 +++--- llvm/test/CodeGen/AArch64/nest-register.ll | 16 +- .../CodeGen/AArch64/preserve_nonecc_call.ll | 116 ++++---- .../AArch64/statepoint-call-lowering.ll | 2 +- llvm/test/CodeGen/AArch64/trampoline.ll | 257 +++++++++++++++++- llvm/test/CodeGen/AArch64/win64cc-x18.ll | 27 +- .../CodeGen/AArch64/zero-call-used-regs.ll | 16 +- 13 files changed, 435 insertions(+), 199 deletions(-) diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt index 19f26c92a0f94..2d213d95f333a 100644 --- a/compiler-rt/lib/builtins/README.txt +++ b/compiler-rt/lib/builtins/README.txt @@ -272,11 +272,6 @@ switch32 switch8 switchu8 -// This function generates a custom trampoline function with the specific -// realFunc and localsPtr values. -void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, - const void* realFunc, void* localsPtr); - // There is no C interface to the *_vfp_d8_d15_regs functions. There are // called in the prolog and epilog of Thumb1 functions. When the C++ ABI use // SJLJ for exceptions, each function with a catch clause or destructors needs diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c index 830e25e4c0303..844eb27944142 100644 --- a/compiler-rt/lib/builtins/trampoline_setup.c +++ b/compiler-rt/lib/builtins/trampoline_setup.c @@ -41,45 +41,3 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, __clear_cache(trampOnStack, &trampOnStack[10]); } #endif // __powerpc__ && !defined(__powerpc64__) - -// The AArch64 compiler generates calls to __trampoline_setup() when creating -// trampoline functions on the stack for use with nested functions. -// This function creates a custom 36-byte trampoline function on the stack -// which loads x18 with a pointer to the outer function's locals -// and then jumps to the target nested function. -// Note: x18 is a reserved platform register on Windows and macOS. - -#if defined(__aarch64__) && defined(__ELF__) -COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, - int trampSizeAllocated, - const void *realFunc, void *localsPtr) { - // This should never happen, but if compiler did not allocate - // enough space on stack for the trampoline, abort. - if (trampSizeAllocated < 36) - compilerrt_abort(); - - // create trampoline - // Load realFunc into x17. mov/movk 16 bits at a time. - trampOnStack[0] = - 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11; - trampOnStack[1] = - 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11; - trampOnStack[2] = - 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11; - trampOnStack[3] = - 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11; - // Load localsPtr into x18 - trampOnStack[4] = - 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12; - trampOnStack[5] = - 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12; - trampOnStack[6] = - 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12; - trampOnStack[7] = - 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12; - trampOnStack[8] = 0xd61f0220; // br x17 - - // Clear instruction cache. - __clear_cache(trampOnStack, &trampOnStack[9]); -} -#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64) diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c index d51d35acaa02f..da115fe764271 100644 --- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c +++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c @@ -7,7 +7,7 @@ /* * Tests nested functions - * The ppc and aarch64 compilers generates a call to __trampoline_setup + * The ppc compiler generates a call to __trampoline_setup * The i386 and x86_64 compilers generate a call to ___enable_execute_stack */ diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp index 26f4aee21d8bd..f402404121da0 100644 --- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp +++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp @@ -274,10 +274,10 @@ class BoxedProcedurePass auto loc = embox.getLoc(); mlir::Type i8Ty = builder.getI8Type(); mlir::Type i8Ptr = builder.getRefType(i8Ty); - // For AArch64, PPC32 and PPC64, the thunk is populated by a call to + // For PPC32 and PPC64, the thunk is populated by a call to // __trampoline_setup, which is defined in // compiler-rt/lib/builtins/trampoline_setup.c and requires the - // thunk size greater than 32 bytes. For RISCV and x86_64, the + // thunk size greater than 32 bytes. For Aarch64, RISCV and x86_64, the // thunk setup doesn't go through __trampoline_setup and fits in 32 // bytes. fir::SequenceType::Extent thunkSize = triple.getTrampolineSize(); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 7cca6d9bc6b9c..8355463dea94e 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -28,6 +28,12 @@ class CCIfSubtarget //===----------------------------------------------------------------------===// defvar AArch64_Common = [ + // The 'nest' parameter, if any, is passed in X15. + // The previous register used here (X18) is also defined to be unavailable + // for this purpose, while all of X9-X15 were defined to be free for LLVM to + // use for this, so use X15 (which LLVM often already clobbers anyways). + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, @@ -117,16 +123,12 @@ defvar AArch64_Common = [ ]; let Entry = 1 in -def CC_AArch64_AAPCS : CallingConv>], - AArch64_Common -)>; +def CC_AArch64_AAPCS : CallingConv; let Entry = 1 in def RetCC_AArch64_AAPCS : CallingConv<[ + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, @@ -177,6 +179,8 @@ def CC_AArch64_Win64_VarArg : CallingConv<[ // a stack layout compatible with the x64 calling convention. let Entry = 1 in def CC_AArch64_Arm64EC_VarArg : CallingConv<[ + CCIfNest>, + // Convert small floating-point values to integer. CCIfType<[f16, bf16], CCBitConvertToType>, CCIfType<[f32], CCBitConvertToType>, @@ -295,6 +299,8 @@ def CC_AArch64_Arm64EC_Thunk_Native : CallingConv<[ let Entry = 1 in def RetCC_AArch64_Arm64EC_Thunk : CallingConv<[ + CCIfNest>, + // The X86-Win64 calling convention always returns __m64 values in RAX. CCIfType<[x86mmx], CCBitConvertToType>, @@ -353,6 +359,8 @@ def RetCC_AArch64_Arm64EC_CFGuard_Check : CallingConv<[ // + Stack slots are sized as needed rather than being at least 64-bit. let Entry = 1 in def CC_AArch64_DarwinPCS : CallingConv<[ + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, @@ -427,6 +435,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ let Entry = 1 in def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, @@ -450,6 +460,8 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ // same as the normal Darwin VarArgs handling. let Entry = 1 in def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ + CCIfNest>, + CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, @@ -494,6 +506,8 @@ def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ let Entry = 1 in def CC_AArch64_GHC : CallingConv<[ + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, // Handle all vector types as either f64 or v2f64. @@ -523,6 +537,7 @@ def CC_AArch64_Preserve_None : CallingConv<[ // We can pass arguments in all general registers, except: // - X8, used for sret // - X16/X17, used by the linker as IP0/IP1 + // - X15, the nest register and used by Windows for stack allocation // - X18, the platform register // - X19, the base pointer // - X29, the frame pointer @@ -533,6 +548,7 @@ def CC_AArch64_Preserve_None : CallingConv<[ // normal functions without saving and reloading arguments. // X9 is assigned last as it is used in FrameLowering as the first // choice for a scratch register. + CCIfNest>, CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23, W24, W25, W26, W27, W28, W0, W1, W2, W3, W4, W5, @@ -544,12 +560,6 @@ def CC_AArch64_Preserve_None : CallingConv<[ X6, X7, X10, X11, X12, X13, X14, X9]>>, - // Windows uses X15 for stack allocation - CCIf<"!State.getMachineFunction().getSubtarget().isTargetWindows()", - CCIfType<[i32], CCAssignToReg<[W15]>>>, - CCIf<"!State.getMachineFunction().getSubtarget().isTargetWindows()", - CCIfType<[i64], CCAssignToReg<[X15]>>>, - CCDelegateTo ]>; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index d3abd79b85a75..ced3ff7b742ad 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2044,6 +2044,25 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, : 0; if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) { + // Find an available register to store value of VG to. + unsigned X15Scratch = AArch64::NoRegister; + if (LiveRegs.contains(AArch64::X15)) { + // if (llvm::any_of( + // MBB.liveins(), + // [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { + // return STI.getRegisterInfo()->isSuperOrSubRegisterEq( + // AArch64::X15, LiveIn.PhysReg); + // })) + X15Scratch = findScratchNonCalleeSaveRegister(&MBB); + assert(X15Scratch != AArch64::NoRegister); + LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch) + .addReg(AArch64::XZR) + .addReg(AArch64::X15, RegState::Undef) + .addReg(AArch64::X15, RegState::Implicit) + .setMIFlag(MachineInstr::FrameSetup); + } + uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4; if (NeedsWinCFI) { HasWinCFI = true; @@ -2166,6 +2185,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // we've set a frame pointer and already finished the SEH prologue. assert(!NeedsWinCFI); } + if (X15Scratch != AArch64::NoRegister) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15) + .addReg(AArch64::XZR) + .addReg(X15Scratch, RegState::Undef) + .addReg(X15Scratch, RegState::Implicit) + .setMIFlag(MachineInstr::FrameSetup); + } } StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0d1608a97bfd3..1404077446420 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7290,59 +7290,66 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { - // Note: x18 cannot be used for the Nest parameter on Windows and macOS. - if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) - report_fatal_error( - "ADJUST_TRAMPOLINE operation is only supported on Linux."); - return Op.getOperand(0); } SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { - - // Note: x18 cannot be used for the Nest parameter on Windows and macOS. - if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) - report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux."); - SDValue Chain = Op.getOperand(0); - SDValue Trmp = Op.getOperand(1); // trampoline + SDValue Trmp = Op.getOperand(1); // trampoline, 36 bytes SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value - SDLoc dl(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; + const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); - Entry.Ty = IntPtrTy; - Entry.Node = Trmp; - Args.push_back(Entry); + // ldr x15, .+16 + // ldr x17, .+20 + // br x17 + // 0 + // .nest: .qword nest + // .fptr: .qword fptr + SDValue OutChains[5]; - if (auto *FI = dyn_cast(Trmp.getNode())) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - Entry.Node = - DAG.getConstant(MFI.getObjectSize(FI->getIndex()), dl, MVT::i64); - } else - Entry.Node = DAG.getConstant(36, dl, MVT::i64); + const char X15 = 0x0f; + const char X17 = 0x11; - Args.push_back(Entry); - Entry.Node = FPtr; - Args.push_back(Entry); - Entry.Node = Nest; - Args.push_back(Entry); + SDValue Addr = Trmp; - // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( - CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); + SDLoc dl(Op); + OutChains[0] = + DAG.getStore(Chain, dl, DAG.getConstant(0x58000080u | X15, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr)); - std::pair CallResult = LowerCallTo(CLI); - return CallResult.second; + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(4, dl, MVT::i64)); + OutChains[1] = + DAG.getStore(Chain, dl, DAG.getConstant(0x580000b0u | X17, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr, 4)); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(8, dl, MVT::i64)); + OutChains[2] = + DAG.getStore(Chain, dl, DAG.getConstant(0xd61f0220u, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr, 8)); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(16, dl, MVT::i64)); + OutChains[3] = + DAG.getStore(Chain, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 16)); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(24, dl, MVT::i64)); + OutChains[4] = + DAG.getStore(Chain, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24)); + + SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + + SDValue EndOfTrmp = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(12, dl, MVT::i64)); + + // Call clear cache on the trampoline instructions. + return DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken, + Trmp, EndOfTrmp); } SDValue AArch64TargetLowering::LowerOperation(SDValue Op, diff --git a/llvm/test/CodeGen/AArch64/nest-register.ll b/llvm/test/CodeGen/AArch64/nest-register.ll index 1e1c1b044bab6..2e94dfba1fa52 100644 --- a/llvm/test/CodeGen/AArch64/nest-register.ll +++ b/llvm/test/CodeGen/AArch64/nest-register.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -disable-post-ra -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; Tests that the 'nest' parameter attribute causes the relevant parameter to be @@ -5,18 +6,21 @@ define ptr @nest_receiver(ptr nest %arg) nounwind { ; CHECK-LABEL: nest_receiver: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov x0, x18 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, x15 +; CHECK-NEXT: ret ret ptr %arg } define ptr @nest_caller(ptr %arg) nounwind { ; CHECK-LABEL: nest_caller: -; CHECK: mov x18, x0 -; CHECK-NEXT: bl nest_receiver -; CHECK: ret +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov x15, x0 +; CHECK-NEXT: bl nest_receiver +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %result = call ptr @nest_receiver(ptr nest %arg) ret ptr %result diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll index 9b9717c19321e..e0d7b5abe7bea 100644 --- a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll @@ -184,10 +184,11 @@ declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24) { ; CHECK-LABEL: callee_with_many_param: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov x8, x15 +; CHECK-NEXT: ldr x8, [sp, #32] ; CHECK-NEXT: mov x15, x20 ; CHECK-NEXT: mov x20, x21 ; CHECK-NEXT: mov x21, x22 @@ -212,17 +213,20 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6 ; CHECK-NEXT: mov x13, x14 ; CHECK-NEXT: mov x14, x9 ; CHECK-NEXT: mov x9, x8 +; CHECK-NEXT: str x15, [sp] ; CHECK-NEXT: bl callee_with_many_param2 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret ; ; DARWIN-LABEL: callee_with_many_param: ; DARWIN: ; %bb.0: -; DARWIN-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; DARWIN-NEXT: .cfi_def_cfa_offset 16 +; DARWIN-NEXT: sub sp, sp, #32 +; DARWIN-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DARWIN-NEXT: .cfi_def_cfa_offset 32 ; DARWIN-NEXT: .cfi_offset w30, -8 ; DARWIN-NEXT: .cfi_offset w29, -16 -; DARWIN-NEXT: mov x8, x15 +; DARWIN-NEXT: ldr x8, [sp, #32] ; DARWIN-NEXT: mov x15, x20 ; DARWIN-NEXT: mov x20, x21 ; DARWIN-NEXT: mov x21, x22 @@ -247,8 +251,10 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6 ; DARWIN-NEXT: mov x13, x14 ; DARWIN-NEXT: mov x14, x9 ; DARWIN-NEXT: mov x9, x8 +; DARWIN-NEXT: str x15, [sp] ; DARWIN-NEXT: bl _callee_with_many_param2 -; DARWIN-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; DARWIN-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DARWIN-NEXT: add sp, sp, #32 ; DARWIN-NEXT: ret ; ; WIN-LABEL: callee_with_many_param: @@ -302,17 +308,18 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6 define i64 @caller3() { ; CHECK-LABEL: caller3: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-160]! // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 160 +; CHECK-NEXT: sub sp, sp, #176 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 176 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -332,6 +339,7 @@ define i64 @caller3() { ; CHECK-NEXT: .cfi_offset b13, -144 ; CHECK-NEXT: .cfi_offset b14, -152 ; CHECK-NEXT: .cfi_offset b15, -160 +; CHECK-NEXT: mov w8, #24 // =0x18 ; CHECK-NEXT: mov w20, #1 // =0x1 ; CHECK-NEXT: mov w21, #2 // =0x2 ; CHECK-NEXT: mov w22, #3 // =0x3 @@ -355,33 +363,35 @@ define i64 @caller3() { ; CHECK-NEXT: mov w13, #21 // =0x15 ; CHECK-NEXT: mov w14, #22 // =0x16 ; CHECK-NEXT: mov w9, #23 // =0x17 -; CHECK-NEXT: mov w15, #24 // =0x18 +; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee_with_many_param -; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: ldp x28, x27, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #160 // 16-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #176 ; CHECK-NEXT: ret ; ; DARWIN-LABEL: caller3: ; DARWIN: ; %bb.0: -; DARWIN-NEXT: stp d15, d14, [sp, #-160]! ; 16-byte Folded Spill -; DARWIN-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill -; DARWIN-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill -; DARWIN-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill -; DARWIN-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill -; DARWIN-NEXT: stp x26, x25, [sp, #80] ; 16-byte Folded Spill -; DARWIN-NEXT: stp x24, x23, [sp, #96] ; 16-byte Folded Spill -; DARWIN-NEXT: stp x22, x21, [sp, #112] ; 16-byte Folded Spill -; DARWIN-NEXT: stp x20, x19, [sp, #128] ; 16-byte Folded Spill -; DARWIN-NEXT: stp x29, x30, [sp, #144] ; 16-byte Folded Spill -; DARWIN-NEXT: .cfi_def_cfa_offset 160 +; DARWIN-NEXT: sub sp, sp, #176 +; DARWIN-NEXT: stp d15, d14, [sp, #16] ; 16-byte Folded Spill +; DARWIN-NEXT: stp d13, d12, [sp, #32] ; 16-byte Folded Spill +; DARWIN-NEXT: stp d11, d10, [sp, #48] ; 16-byte Folded Spill +; DARWIN-NEXT: stp d9, d8, [sp, #64] ; 16-byte Folded Spill +; DARWIN-NEXT: stp x28, x27, [sp, #80] ; 16-byte Folded Spill +; DARWIN-NEXT: stp x26, x25, [sp, #96] ; 16-byte Folded Spill +; DARWIN-NEXT: stp x24, x23, [sp, #112] ; 16-byte Folded Spill +; DARWIN-NEXT: stp x22, x21, [sp, #128] ; 16-byte Folded Spill +; DARWIN-NEXT: stp x20, x19, [sp, #144] ; 16-byte Folded Spill +; DARWIN-NEXT: stp x29, x30, [sp, #160] ; 16-byte Folded Spill +; DARWIN-NEXT: .cfi_def_cfa_offset 176 ; DARWIN-NEXT: .cfi_offset w30, -8 ; DARWIN-NEXT: .cfi_offset w29, -16 ; DARWIN-NEXT: .cfi_offset w19, -24 @@ -402,6 +412,7 @@ define i64 @caller3() { ; DARWIN-NEXT: .cfi_offset b13, -144 ; DARWIN-NEXT: .cfi_offset b14, -152 ; DARWIN-NEXT: .cfi_offset b15, -160 +; DARWIN-NEXT: mov w8, #24 ; =0x18 ; DARWIN-NEXT: mov w20, #1 ; =0x1 ; DARWIN-NEXT: mov w21, #2 ; =0x2 ; DARWIN-NEXT: mov w22, #3 ; =0x3 @@ -425,18 +436,19 @@ define i64 @caller3() { ; DARWIN-NEXT: mov w13, #21 ; =0x15 ; DARWIN-NEXT: mov w14, #22 ; =0x16 ; DARWIN-NEXT: mov w9, #23 ; =0x17 -; DARWIN-NEXT: mov w15, #24 ; =0x18 +; DARWIN-NEXT: str x8, [sp] ; DARWIN-NEXT: bl _callee_with_many_param -; DARWIN-NEXT: ldp x29, x30, [sp, #144] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp x20, x19, [sp, #128] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp x22, x21, [sp, #112] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp x24, x23, [sp, #96] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp x26, x25, [sp, #80] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload -; DARWIN-NEXT: ldp d15, d14, [sp], #160 ; 16-byte Folded Reload +; DARWIN-NEXT: ldp x29, x30, [sp, #160] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp x20, x19, [sp, #144] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp x22, x21, [sp, #128] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp x24, x23, [sp, #112] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp x26, x25, [sp, #96] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp x28, x27, [sp, #80] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp d9, d8, [sp, #64] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp d11, d10, [sp, #48] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp d13, d12, [sp, #32] ; 16-byte Folded Reload +; DARWIN-NEXT: ldp d15, d14, [sp, #16] ; 16-byte Folded Reload +; DARWIN-NEXT: add sp, sp, #176 ; DARWIN-NEXT: ret ; ; WIN-LABEL: caller3: diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll index 9619895c450ca..32c3eaeb9c876 100644 --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -207,7 +207,7 @@ define void @test_attributes(ptr byval(%struct2) %s) gc "statepoint-example" { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr x8, [sp, #64] ; CHECK-NEXT: ldr q0, [sp, #48] -; CHECK-NEXT: mov x18, xzr +; CHECK-NEXT: mov x15, xzr ; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: mov w1, #17 // =0x11 ; CHECK-NEXT: str x8, [sp, #16] diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll index 30ac2aa283b3e..0e682704afbf8 100644 --- a/llvm/test/CodeGen/AArch64/trampoline.ll +++ b/llvm/test/CodeGen/AArch64/trampoline.ll @@ -1,32 +1,265 @@ -; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK-LINUX +; RUN: llc -mtriple=aarch64-none-eabi < %s | FileCheck %s --check-prefixes=CHECK-LINUX +; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-PC +; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefixes=CHECK-APPLE @trampg = internal global [36 x i8] zeroinitializer, align 8 declare void @llvm.init.trampoline(ptr, ptr, ptr); declare ptr @llvm.adjust.trampoline(ptr); -define i64 @f(ptr nest %c, i64 %x, i64 %y) { - %sum = add i64 %x, %y - ret i64 %sum +define ptr @f(ptr nest %x, i64 %y) { +; CHECK-LINUX-LABEL: f: +; CHECK-LINUX: // %bb.0: +; CHECK-LINUX-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-LINUX-NEXT: sub sp, sp, #237, lsl #12 // =970752 +; CHECK-LINUX-NEXT: sub sp, sp, #3264 +; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 974032 +; CHECK-LINUX-NEXT: .cfi_offset w29, -16 +; CHECK-LINUX-NEXT: add x0, x15, x0 +; CHECK-LINUX-NEXT: add sp, sp, #237, lsl #12 // =970752 +; CHECK-LINUX-NEXT: add sp, sp, #3264 +; CHECK-LINUX-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-LINUX-NEXT: ret +; +; CHECK-PC-LABEL: f: +; CHECK-PC: .seh_proc f +; CHECK-PC-NEXT: // %bb.0: +; CHECK-PC-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-PC-NEXT: .seh_save_fplr_x 16 +; CHECK-PC-NEXT: mov x9, x15 +; CHECK-PC-NEXT: mov x15, #60876 // =0xedcc +; CHECK-PC-NEXT: .seh_nop +; CHECK-PC-NEXT: bl __chkstk +; CHECK-PC-NEXT: .seh_nop +; CHECK-PC-NEXT: sub sp, sp, x15, lsl #4 +; CHECK-PC-NEXT: .seh_stackalloc 974016 +; CHECK-PC-NEXT: mov x15, x9 +; CHECK-PC-NEXT: .seh_endprologue +; CHECK-PC-NEXT: add x0, x15, x0 +; CHECK-PC-NEXT: .seh_startepilogue +; CHECK-PC-NEXT: add sp, sp, #237, lsl #12 // =970752 +; CHECK-PC-NEXT: .seh_stackalloc 970752 +; CHECK-PC-NEXT: add sp, sp, #3264 +; CHECK-PC-NEXT: .seh_stackalloc 3264 +; CHECK-PC-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-PC-NEXT: .seh_save_fplr_x 16 +; CHECK-PC-NEXT: .seh_endepilogue +; CHECK-PC-NEXT: ret +; CHECK-PC-NEXT: .seh_endfunclet +; CHECK-PC-NEXT: .seh_endproc +; +; CHECK-APPLE-LABEL: f: +; CHECK-APPLE: ; %bb.0: +; CHECK-APPLE-NEXT: stp x28, x27, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-APPLE-NEXT: sub sp, sp, #237, lsl #12 ; =970752 +; CHECK-APPLE-NEXT: sub sp, sp, #3264 +; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 974032 +; CHECK-APPLE-NEXT: .cfi_offset w27, -8 +; CHECK-APPLE-NEXT: .cfi_offset w28, -16 +; CHECK-APPLE-NEXT: add x0, x15, x0 +; CHECK-APPLE-NEXT: add sp, sp, #237, lsl #12 ; =970752 +; CHECK-APPLE-NEXT: add sp, sp, #3264 +; CHECK-APPLE-NEXT: ldp x28, x27, [sp], #16 ; 16-byte Folded Reload +; CHECK-APPLE-NEXT: ret + %chkstack = alloca [u0xedcba x i8] + %sum = getelementptr i8, ptr %x, i64 %y + ret ptr %sum } define i64 @func1() { +; CHECK-LINUX-LABEL: func1: +; CHECK-LINUX: // %bb.0: +; CHECK-LINUX-NEXT: sub sp, sp, #64 +; CHECK-LINUX-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 64 +; CHECK-LINUX-NEXT: .cfi_offset w30, -16 +; CHECK-LINUX-NEXT: adrp x8, :got:f +; CHECK-LINUX-NEXT: mov w9, #544 // =0x220 +; CHECK-LINUX-NEXT: add x0, sp, #8 +; CHECK-LINUX-NEXT: ldr x8, [x8, :got_lo12:f] +; CHECK-LINUX-NEXT: movk w9, #54815, lsl #16 +; CHECK-LINUX-NEXT: str w9, [sp, #16] +; CHECK-LINUX-NEXT: add x9, sp, #56 +; CHECK-LINUX-NEXT: stp x9, x8, [sp, #24] +; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f +; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16 +; CHECK-LINUX-NEXT: movk x8, #177, lsl #32 +; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48 +; CHECK-LINUX-NEXT: str x8, [sp, #8] +; CHECK-LINUX-NEXT: add x8, sp, #8 +; CHECK-LINUX-NEXT: add x1, x8, #12 +; CHECK-LINUX-NEXT: bl __clear_cache +; CHECK-LINUX-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-LINUX-NEXT: mov x0, xzr +; CHECK-LINUX-NEXT: add sp, sp, #64 +; CHECK-LINUX-NEXT: ret +; +; CHECK-PC-LABEL: func1: +; CHECK-PC: .seh_proc func1 +; CHECK-PC-NEXT: // %bb.0: +; CHECK-PC-NEXT: sub sp, sp, #64 +; CHECK-PC-NEXT: .seh_stackalloc 64 +; CHECK-PC-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-PC-NEXT: .seh_save_reg x30, 48 +; CHECK-PC-NEXT: .seh_endprologue +; CHECK-PC-NEXT: adrp x8, f +; CHECK-PC-NEXT: add x8, x8, :lo12:f +; CHECK-PC-NEXT: add x9, sp, #56 +; CHECK-PC-NEXT: stp x9, x8, [sp, #24] +; CHECK-PC-NEXT: mov w8, #544 // =0x220 +; CHECK-PC-NEXT: add x0, sp, #8 +; CHECK-PC-NEXT: movk w8, #54815, lsl #16 +; CHECK-PC-NEXT: str w8, [sp, #16] +; CHECK-PC-NEXT: mov x8, #143 // =0x8f +; CHECK-PC-NEXT: movk x8, #22528, lsl #16 +; CHECK-PC-NEXT: movk x8, #177, lsl #32 +; CHECK-PC-NEXT: movk x8, #22528, lsl #48 +; CHECK-PC-NEXT: str x8, [sp, #8] +; CHECK-PC-NEXT: add x8, sp, #8 +; CHECK-PC-NEXT: add x1, x8, #12 +; CHECK-PC-NEXT: bl __clear_cache +; CHECK-PC-NEXT: mov x0, xzr +; CHECK-PC-NEXT: .seh_startepilogue +; CHECK-PC-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-PC-NEXT: .seh_save_reg x30, 48 +; CHECK-PC-NEXT: add sp, sp, #64 +; CHECK-PC-NEXT: .seh_stackalloc 64 +; CHECK-PC-NEXT: .seh_endepilogue +; CHECK-PC-NEXT: ret +; CHECK-PC-NEXT: .seh_endfunclet +; CHECK-PC-NEXT: .seh_endproc +; +; CHECK-APPLE-LABEL: func1: +; CHECK-APPLE: ; %bb.0: +; CHECK-APPLE-NEXT: sub sp, sp, #64 +; CHECK-APPLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 64 +; CHECK-APPLE-NEXT: .cfi_offset w30, -8 +; CHECK-APPLE-NEXT: .cfi_offset w29, -16 +; CHECK-APPLE-NEXT: Lloh0: +; CHECK-APPLE-NEXT: adrp x8, _f@PAGE +; CHECK-APPLE-NEXT: Lloh1: +; CHECK-APPLE-NEXT: add x8, x8, _f@PAGEOFF +; CHECK-APPLE-NEXT: add x9, sp, #40 +; CHECK-APPLE-NEXT: stp x9, x8, [sp, #16] +; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220 +; CHECK-APPLE-NEXT: mov x0, sp +; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16 +; CHECK-APPLE-NEXT: str w8, [sp, #8] +; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f +; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16 +; CHECK-APPLE-NEXT: movk x8, #177, lsl #32 +; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48 +; CHECK-APPLE-NEXT: str x8, [sp] +; CHECK-APPLE-NEXT: mov x8, sp +; CHECK-APPLE-NEXT: add x1, x8, #12 +; CHECK-APPLE-NEXT: bl ___clear_cache +; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; CHECK-APPLE-NEXT: mov x0, xzr +; CHECK-APPLE-NEXT: add sp, sp, #64 +; CHECK-APPLE-NEXT: ret +; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh0, Lloh1 %val = alloca i64 - %nval = bitcast ptr %val to ptr %tramp = alloca [36 x i8], align 8 - ; CHECK: mov w1, #36 - ; CHECK: bl __trampoline_setup - call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval) + call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %val) %fp = call ptr @llvm.adjust.trampoline(ptr %tramp) ret i64 0 } define i64 @func2() { +; CHECK-LINUX-LABEL: func2: +; CHECK-LINUX: // %bb.0: +; CHECK-LINUX-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-LINUX-NEXT: .cfi_offset w30, -16 +; CHECK-LINUX-NEXT: adrp x8, :got:f +; CHECK-LINUX-NEXT: mov w9, #544 // =0x220 +; CHECK-LINUX-NEXT: adrp x0, trampg +; CHECK-LINUX-NEXT: add x0, x0, :lo12:trampg +; CHECK-LINUX-NEXT: ldr x8, [x8, :got_lo12:f] +; CHECK-LINUX-NEXT: movk w9, #54815, lsl #16 +; CHECK-LINUX-NEXT: str w9, [x0, #8] +; CHECK-LINUX-NEXT: add x9, sp, #8 +; CHECK-LINUX-NEXT: add x1, x0, #12 +; CHECK-LINUX-NEXT: stp x9, x8, [x0, #16] +; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f +; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16 +; CHECK-LINUX-NEXT: movk x8, #177, lsl #32 +; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48 +; CHECK-LINUX-NEXT: str x8, [x0] +; CHECK-LINUX-NEXT: bl __clear_cache +; CHECK-LINUX-NEXT: mov x0, xzr +; CHECK-LINUX-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-LINUX-NEXT: ret +; +; CHECK-PC-LABEL: func2: +; CHECK-PC: .seh_proc func2 +; CHECK-PC-NEXT: // %bb.0: +; CHECK-PC-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-PC-NEXT: .seh_save_reg_x x30, 16 +; CHECK-PC-NEXT: .seh_endprologue +; CHECK-PC-NEXT: adrp x0, trampg +; CHECK-PC-NEXT: add x0, x0, :lo12:trampg +; CHECK-PC-NEXT: adrp x8, f +; CHECK-PC-NEXT: add x8, x8, :lo12:f +; CHECK-PC-NEXT: add x9, sp, #8 +; CHECK-PC-NEXT: add x1, x0, #12 +; CHECK-PC-NEXT: stp x9, x8, [x0, #16] +; CHECK-PC-NEXT: mov w8, #544 // =0x220 +; CHECK-PC-NEXT: movk w8, #54815, lsl #16 +; CHECK-PC-NEXT: str w8, [x0, #8] +; CHECK-PC-NEXT: mov x8, #143 // =0x8f +; CHECK-PC-NEXT: movk x8, #22528, lsl #16 +; CHECK-PC-NEXT: movk x8, #177, lsl #32 +; CHECK-PC-NEXT: movk x8, #22528, lsl #48 +; CHECK-PC-NEXT: str x8, [x0] +; CHECK-PC-NEXT: bl __clear_cache +; CHECK-PC-NEXT: mov x0, xzr +; CHECK-PC-NEXT: .seh_startepilogue +; CHECK-PC-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-PC-NEXT: .seh_save_reg_x x30, 16 +; CHECK-PC-NEXT: .seh_endepilogue +; CHECK-PC-NEXT: ret +; CHECK-PC-NEXT: .seh_endfunclet +; CHECK-PC-NEXT: .seh_endproc +; +; CHECK-APPLE-LABEL: func2: +; CHECK-APPLE: ; %bb.0: +; CHECK-APPLE-NEXT: sub sp, sp, #32 +; CHECK-APPLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-APPLE-NEXT: .cfi_offset w30, -8 +; CHECK-APPLE-NEXT: .cfi_offset w29, -16 +; CHECK-APPLE-NEXT: Lloh2: +; CHECK-APPLE-NEXT: adrp x0, _trampg@PAGE +; CHECK-APPLE-NEXT: Lloh3: +; CHECK-APPLE-NEXT: add x0, x0, _trampg@PAGEOFF +; CHECK-APPLE-NEXT: Lloh4: +; CHECK-APPLE-NEXT: adrp x8, _f@PAGE +; CHECK-APPLE-NEXT: Lloh5: +; CHECK-APPLE-NEXT: add x8, x8, _f@PAGEOFF +; CHECK-APPLE-NEXT: add x9, sp, #8 +; CHECK-APPLE-NEXT: add x1, x0, #12 +; CHECK-APPLE-NEXT: stp x9, x8, [x0, #16] +; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220 +; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16 +; CHECK-APPLE-NEXT: str w8, [x0, #8] +; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f +; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16 +; CHECK-APPLE-NEXT: movk x8, #177, lsl #32 +; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48 +; CHECK-APPLE-NEXT: str x8, [x0] +; CHECK-APPLE-NEXT: bl ___clear_cache +; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-APPLE-NEXT: mov x0, xzr +; CHECK-APPLE-NEXT: add sp, sp, #32 +; CHECK-APPLE-NEXT: ret +; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh4, Lloh5 +; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh2, Lloh3 %val = alloca i64 - %nval = bitcast ptr %val to ptr - ; CHECK: mov w1, #36 - ; CHECK: bl __trampoline_setup - call void @llvm.init.trampoline(ptr @trampg, ptr @f, ptr %nval) + call void @llvm.init.trampoline(ptr @trampg, ptr @f, ptr %val) %fp = call ptr @llvm.adjust.trampoline(ptr @trampg) ret i64 0 } diff --git a/llvm/test/CodeGen/AArch64/win64cc-x18.ll b/llvm/test/CodeGen/AArch64/win64cc-x18.ll index b3e78cc9bbb81..4b45c300e9c1d 100644 --- a/llvm/test/CodeGen/AArch64/win64cc-x18.ll +++ b/llvm/test/CodeGen/AArch64/win64cc-x18.ll @@ -1,35 +1,26 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;; Testing that nest uses x15 on all calling conventions (except Arm64EC) -;; Testing that x18 is not clobbered when passing pointers with the nest -;; attribute on windows - -; RUN: llc < %s -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=CHECK,CHECK-NO-X18 -; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-X18 +; RUN: llc < %s -mtriple=aarch64-pc-windows-msvc | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-apple-darwin- | FileCheck %s define dso_local i64 @other(ptr nest %p) #0 { ; CHECK-LABEL: other: -; CHECK-X18: ldr x0, [x18] -; CHECK-NO-X18: ldr x0, [x0] +; CHECK: ldr x0, [x15] +; CHECK: ret %r = load i64, ptr %p -; CHECK: ret ret i64 %r } define dso_local void @func() #0 { ; CHECK-LABEL: func: - - +; CHECK: add x15, sp, #8 +; CHECK: bl {{_?other}} +; CHECK: ret entry: %p = alloca i64 -; CHECK: mov w8, #1 -; CHECK: stp x30, x8, [sp, #-16] -; CHECK-X18: add x18, sp, #8 store i64 1, ptr %p -; CHECK-NO-X18: add x0, sp, #8 -; CHECK: bl other call void @other(ptr nest %p) -; CHECK: ldr x30, [sp], #16 -; CHECK: ret ret void } diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll index 4799ea3bcd19f..986666e015e9e 100644 --- a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll +++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -93,7 +93,7 @@ define dso_local i32 @all_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c ; CHECK-NEXT: mov x5, #0 // =0x0 ; CHECK-NEXT: mov x6, #0 // =0x0 ; CHECK-NEXT: mov x7, #0 // =0x0 -; CHECK-NEXT: mov x18, #0 // =0x0 +; CHECK-NEXT: mov x15, #0 // =0x0 ; CHECK-NEXT: orr w0, w8, w2 ; CHECK-NEXT: mov x2, #0 // =0x0 ; CHECK-NEXT: mov x8, #0 // =0x0 @@ -146,7 +146,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; DEFAULT-NEXT: mov x5, #0 // =0x0 ; DEFAULT-NEXT: mov x6, #0 // =0x0 ; DEFAULT-NEXT: mov x7, #0 // =0x0 -; DEFAULT-NEXT: mov x18, #0 // =0x0 +; DEFAULT-NEXT: mov x15, #0 // =0x0 ; DEFAULT-NEXT: movi v0.2d, #0000000000000000 ; DEFAULT-NEXT: orr w0, w8, w2 ; DEFAULT-NEXT: mov x2, #0 // =0x0 @@ -169,7 +169,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; SVE-OR-SME-NEXT: mov x5, #0 // =0x0 ; SVE-OR-SME-NEXT: mov x6, #0 // =0x0 ; SVE-OR-SME-NEXT: mov x7, #0 // =0x0 -; SVE-OR-SME-NEXT: mov x18, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x15, #0 // =0x0 ; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0 ; SVE-OR-SME-NEXT: orr w0, w8, w2 ; SVE-OR-SME-NEXT: mov x2, #0 // =0x0 @@ -196,7 +196,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0 ; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0 ; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0 -; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0 ; STREAMING-COMPAT-NEXT: fmov d0, xzr ; STREAMING-COMPAT-NEXT: orr w0, w8, w2 ; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0 @@ -492,7 +492,7 @@ define dso_local double @all_gpr_arg_float(double noundef %a, float noundef %b) ; CHECK-NEXT: mov x6, #0 // =0x0 ; CHECK-NEXT: mov x7, #0 // =0x0 ; CHECK-NEXT: mov x8, #0 // =0x0 -; CHECK-NEXT: mov x18, #0 // =0x0 +; CHECK-NEXT: mov x15, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -547,7 +547,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; DEFAULT-NEXT: mov x6, #0 // =0x0 ; DEFAULT-NEXT: mov x7, #0 // =0x0 ; DEFAULT-NEXT: mov x8, #0 // =0x0 -; DEFAULT-NEXT: mov x18, #0 // =0x0 +; DEFAULT-NEXT: mov x15, #0 // =0x0 ; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: movi v2.2d, #0000000000000000 ; DEFAULT-NEXT: movi v3.2d, #0000000000000000 @@ -570,7 +570,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; SVE-OR-SME-NEXT: mov x6, #0 // =0x0 ; SVE-OR-SME-NEXT: mov x7, #0 // =0x0 ; SVE-OR-SME-NEXT: mov x8, #0 // =0x0 -; SVE-OR-SME-NEXT: mov x18, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x15, #0 // =0x0 ; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0 ; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0 ; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0 @@ -597,7 +597,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0 ; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0 ; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0 -; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0 ; STREAMING-COMPAT-NEXT: fmov d1, xzr ; STREAMING-COMPAT-NEXT: fmov d2, xzr ; STREAMING-COMPAT-NEXT: fmov d3, xzr From f6122fa0eeeef5b9598439a5e049a0fdef9bd84a Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 12 Feb 2025 09:31:34 -0500 Subject: [PATCH 2/5] fixup! [AArch64] fix trampoline implementation: use X15 --- flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp | 2 +- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp index f402404121da0..c91ead6f0c019 100644 --- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp +++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp @@ -277,7 +277,7 @@ class BoxedProcedurePass // For PPC32 and PPC64, the thunk is populated by a call to // __trampoline_setup, which is defined in // compiler-rt/lib/builtins/trampoline_setup.c and requires the - // thunk size greater than 32 bytes. For Aarch64, RISCV and x86_64, the + // thunk size greater than 32 bytes. For AArch64, RISCV and x86_64, the // thunk setup doesn't go through __trampoline_setup and fits in 32 // bytes. fir::SequenceType::Extent thunkSize = triple.getTrampolineSize(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index ced3ff7b742ad..789bd6249b400 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2046,16 +2046,18 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) { // Find an available register to store value of VG to. unsigned X15Scratch = AArch64::NoRegister; - if (LiveRegs.contains(AArch64::X15)) { - // if (llvm::any_of( - // MBB.liveins(), - // [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { - // return STI.getRegisterInfo()->isSuperOrSubRegisterEq( - // AArch64::X15, LiveIn.PhysReg); - // })) + const AArch64Subtarget &STI = MF.getSubtarget(); + if (llvm::any_of( + MBB.liveins(), + [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { + return STI.getRegisterInfo()->isSuperOrSubRegisterEq( + AArch64::X15, LiveIn.PhysReg); + })) { X15Scratch = findScratchNonCalleeSaveRegister(&MBB); assert(X15Scratch != AArch64::NoRegister); +#ifndef NDEBUG LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it +#endif BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch) .addReg(AArch64::XZR) .addReg(AArch64::X15, RegState::Undef) From 9aa05538f87ea17f349a9c9921a6171a3d83aa36 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 12 Feb 2025 10:04:51 -0500 Subject: [PATCH 3/5] fix langref and Arm64EC CallingConvention --- llvm/docs/LangRef.rst | 7 ++++- .../Target/AArch64/AArch64ISelLowering.cpp | 26 ++++++++++++++----- llvm/lib/TargetParser/Triple.cpp | 2 -- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2d72e548ec82a..c1dd9dcfd63f7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -20903,7 +20903,12 @@ sufficiently aligned block of memory; this memory is written to by the intrinsic. Note that the size and the alignment are target-specific - LLVM currently provides no portable way of determining them, so a front-end that generates this intrinsic needs to have some -target-specific knowledge. The ``func`` argument must hold a function. +target-specific knowledge. + +The ``func`` argument must be a constant (potentially bitcasted) pointer to a +function declaration or definition, since the calling convention may affect the +content of the trampoline that is created. + Semantics: """""""""" diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1404077446420..795e6bd1c27fa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7302,28 +7302,42 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); - // ldr x15, .+16 + // ldr NestReg, .+16 // ldr x17, .+20 // br x17 - // 0 + // .word 0 // .nest: .qword nest // .fptr: .qword fptr SDValue OutChains[5]; - const char X15 = 0x0f; - const char X17 = 0x11; + const Function *Func = + cast(cast(Op.getOperand(5))->getValue()); + CallingConv::ID CC = Func->getCallingConv(); + unsigned NestReg; + + switch (CC) { + default: + NestReg = 0x0f; // X15 + case CallingConv::ARM64EC_Thunk_Native: + case CallingConv::ARM64EC_Thunk_X64: + // Must be kept in sync with AArch64CallingConv.td + NestReg = 0x04; // X4 + break; + } + + const char FptrReg = 0x11; // X17 SDValue Addr = Trmp; SDLoc dl(Op); OutChains[0] = - DAG.getStore(Chain, dl, DAG.getConstant(0x58000080u | X15, dl, MVT::i32), Addr, + DAG.getStore(Chain, dl, DAG.getConstant(0x58000080u | NestReg, dl, MVT::i32), Addr, MachinePointerInfo(TrmpAddr)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(4, dl, MVT::i64)); OutChains[1] = - DAG.getStore(Chain, dl, DAG.getConstant(0x580000b0u | X17, dl, MVT::i32), Addr, + DAG.getStore(Chain, dl, DAG.getConstant(0x580000b0u | FptrReg, dl, MVT::i32), Addr, MachinePointerInfo(TrmpAddr, 4)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index e9e6f130f757c..8ada0d44f96f9 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -1725,8 +1725,6 @@ unsigned Triple::getTrampolineSize() const { if (isOSLinux()) return 48; break; - case Triple::aarch64: - return 36; } return 32; } From 78745ede2a514e0c0c874ccceee500454d46a3fd Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 12 Feb 2025 10:13:49 -0500 Subject: [PATCH 4/5] fix more langref mistakes with aarch64 calling convention The calling convention previously stated that X9 and X15 were callee-preserved, but the implementation of AArch64FrameLowering.cpp uses those as scratch registers and does not actually preserve it. --- llvm/docs/LangRef.rst | 10 ++++----- .../AArch64/AArch64CallingConvention.td | 2 +- .../CodeGen/AArch64/arm64-preserve-all.ll | 16 +++++++------- .../CodeGen/AArch64/arm64-preserve-most.ll | 21 +++++++++---------- llvm/test/CodeGen/AArch64/preserve.ll | 4 ++-- llvm/test/CodeGen/AArch64/trampoline.ll | 12 +++++------ 6 files changed, 33 insertions(+), 32 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index c1dd9dcfd63f7..f14d5f1a0e14b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -410,8 +410,8 @@ added in the future: calling convention: on most platforms, they are not preserved and need to be saved by the caller, but on Windows, xmm6-xmm15 are preserved. - - On AArch64 the callee preserve all general purpose registers, except X0-X8 - and X16-X18. + - On AArch64 the callee preserve all general purpose registers, except X0-X9 + and X15-X18. X9 can be used as a scratch register. The idea behind this convention is to support calls to runtime functions that have a hot path and a cold path. The hot path is usually a small piece @@ -447,9 +447,9 @@ added in the future: R11. R11 can be used as a scratch register. Furthermore it also preserves all floating-point registers (XMMs/YMMs). - - On AArch64 the callee preserve all general purpose registers, except X0-X8 - and X16-X18. Furthermore it also preserves lower 128 bits of V8-V31 SIMD - - floating point registers. + - On AArch64 the callee preserve all general purpose registers, except X0-X9 + and X15-X18. Furthermore it also preserves lower 128 bits of V8-V31 SIMD - + floating point registers. X9 can be used as a scratch register. The idea behind this convention is to support calls to runtime functions that don't need to call out to any other functions. diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 8355463dea94e..366e8122b55dc 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -691,7 +691,7 @@ def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; def CSR_AArch64_NoneRegs : CalleeSavedRegs<(add LR, FP)>; def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, - (sequence "X%u", 9, 15))>; + (sequence "X%u", 10, 14))>; def CSR_AArch64_RT_AllRegs : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, (sequence "Q%u", 8, 31))>; diff --git a/llvm/test/CodeGen/AArch64/arm64-preserve-all.ll b/llvm/test/CodeGen/AArch64/arm64-preserve-all.ll index 778f4e2f9ec01..b5257f6518166 100644 --- a/llvm/test/CodeGen/AArch64/arm64-preserve-all.ll +++ b/llvm/test/CodeGen/AArch64/arm64-preserve-all.ll @@ -7,8 +7,8 @@ target triple = "aarch64-unknown-linux-gnu" declare void @normal_cc() ; Caller: preserve_allcc; callee: normalcc. Normally callee saved registers -; x9~x15 need to be spilled. Since most of them will be spilled in pairs in -; reverse order, we only check the odd number ones due to FileCheck not +; x10~x14 need to be spilled. Since most of them will be spilled in pairs in +; reverse order, we only check the even number ones due to FileCheck not ; matching the same line of assembly twice. ; CHECK-LABEL: preserve_all ; CHECK-DAG: {{st[rp]}} {{(q[0-9]+, )?q8(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] @@ -22,21 +22,23 @@ declare void @normal_cc() ; CHECK-DAG: {{st[rp]}} {{(q[0-9]+, )?q26(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] ; CHECK-DAG: {{st[rp]}} {{(q[0-9]+, )?q28(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] ; CHECK-DAG: {{st[rp]}} {{(q[0-9]+, )?q30(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] -; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x9(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] -; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x11(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] -; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x13(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] -; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x15(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] +; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x10(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] +; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x12(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] +; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x14(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] define preserve_allcc void @preserve_all() { call void @normal_cc() ret void } ; Caller: normalcc; callee: preserve_allcc. x9/q9 does not need to be spilled. -; The same holds for other x and q registers, but we only check x9 and q9. +; The same holds for other x and q registers, but we only check x9, x11, and q9. ; CHECK-LABEL: normal_cc_caller ; CHECK-NOT: stp {{x[0-9]+}}, x9, [sp, #{{[-0-9]+}}] ; CHECK-NOT: stp x9, {{x[0-9]+}}, [sp, #{{[-0-9]+}}] ; CHECK-NOT: str x9, [sp, {{#[-0-9]+}}] +; CHECK-NOT: stp {{x[0-9]+}}, x11, [sp, #{{[-0-9]+}}] +; CHECK-NOT: stp x10, {{x[0-9]+}}, [sp, #{{[-0-9]+}}] +; CHECK-NOT: str x10, [sp, {{#[-0-9]+}}] ; CHECK-NOT: stp {{q[0-9]+}}, q9, [sp, #{{[-0-9]+}}] ; CHECK-NOT: stp q9, {{q[0-9]+}}, [sp, #{{[-0-9]+}}] ; CHECK-NOT: str q9, [sp, {{#[-0-9]+}}] diff --git a/llvm/test/CodeGen/AArch64/arm64-preserve-most.ll b/llvm/test/CodeGen/AArch64/arm64-preserve-most.ll index f8196860aa34f..78605991114a4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-preserve-most.ll +++ b/llvm/test/CodeGen/AArch64/arm64-preserve-most.ll @@ -7,25 +7,24 @@ target triple = "aarch64-unknown-linux-gnu" declare void @normal_cc() ; Caller: preserve_mostcc; callee: normalcc. Normally callee saved registers -; x9~x15 need to be spilled. Since most of them will be spilled in pairs in -; reverse order, we only check the odd number ones due to FileCheck not +; x10~x14 need to be spilled. Since most of them will be spilled in pairs in +; reverse order, we only check the even number ones due to FileCheck not ; matching the same line of assembly twice. ; CHECK-LABEL: preserve_most -; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x9(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] -; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x11(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] -; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x13(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] -; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x15(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] +; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x10(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] +; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x12(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] +; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x14(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}] define preserve_mostcc void @preserve_most() { call void @normal_cc() ret void } -; Caller: normalcc; callee: preserve_mostcc. x9 does not need to be spilled. -; The same holds for x10 through x15, but we only check x9. +; Caller: normalcc; callee: preserve_mostcc. x9 does need to be spilled, but not x10 to x14. +; (we only check x10). ; CHECK-LABEL: normal_cc_caller -; CHECK-NOT: stp {{x[0-9]+}}, x9, [sp, #{{[-0-9]+}}] -; CHECK-NOT: stp x9, {{x[0-9]+}}, [sp, #{{[-0-9]+}}] -; CHECK-NOT: str x9, [sp, {{#[-0-9]+}}] +; CHECK-NOT: stp {{x[0-9]+}}, x10, [sp, #{{[-0-9]+}}] +; CHECK-NOT: stp x10, {{x[0-9]+}}, [sp, #{{[-0-9]+}}] +; CHECK-NOT: str x10, [sp, {{#[-0-9]+}}] define dso_local void @normal_cc_caller() { entry: %v = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/preserve.ll b/llvm/test/CodeGen/AArch64/preserve.ll index 49fb3685bcfc1..6f6a85c158082 100644 --- a/llvm/test/CodeGen/AArch64/preserve.ll +++ b/llvm/test/CodeGen/AArch64/preserve.ll @@ -8,13 +8,13 @@ target triple = "aarch64-unknown-unknown" declare void @bar1() define preserve_mostcc void @baz() #0 { -; CHECK: baz Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $b16_hi $b17_hi $b18_hi $b19_hi $b20_hi $b21_hi $b22_hi $b23_hi $b24_hi $b25_hi $b26_hi $b27_hi $b28_hi $b29_hi $b30_hi $b31_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $d8_hi $d9_hi $d10_hi $d11_hi $d12_hi $d13_hi $d14_hi $d15_hi $d16_hi $d17_hi $d18_hi $d19_hi $d20_hi $d21_hi $d22_hi $d23_hi $d24_hi $d25_hi $d26_hi $d27_hi $d28_hi $d29_hi $d30_hi $d31_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $h16_hi $h17_hi $h18_hi $h19_hi $h20_hi $h21_hi $h22_hi $h23_hi $h24_hi $h25_hi $h26_hi $h27_hi $h28_hi $h29_hi $h30_hi $h31_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $s16_hi $s17_hi $s18_hi $s19_hi $s20_hi $s21_hi $s22_hi $s23_hi $s24_hi $s25_hi $s26_hi $s27_hi $s28_hi $s29_hi $s30_hi $s31_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 +; CHECK: baz Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w15 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x15 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $b16_hi $b17_hi $b18_hi $b19_hi $b20_hi $b21_hi $b22_hi $b23_hi $b24_hi $b25_hi $b26_hi $b27_hi $b28_hi $b29_hi $b30_hi $b31_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $d8_hi $d9_hi $d10_hi $d11_hi $d12_hi $d13_hi $d14_hi $d15_hi $d16_hi $d17_hi $d18_hi $d19_hi $d20_hi $d21_hi $d22_hi $d23_hi $d24_hi $d25_hi $d26_hi $d27_hi $d28_hi $d29_hi $d30_hi $d31_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $h16_hi $h17_hi $h18_hi $h19_hi $h20_hi $h21_hi $h22_hi $h23_hi $h24_hi $h25_hi $h26_hi $h27_hi $h28_hi $h29_hi $h30_hi $h31_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $s16_hi $s17_hi $s18_hi $s19_hi $s20_hi $s21_hi $s22_hi $s23_hi $s24_hi $s25_hi $s26_hi $s27_hi $s28_hi $s29_hi $s30_hi $s31_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w9_hi $w15_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 call void @bar1() call void @bar2() ret void } define preserve_allcc void @foo() #0 { -; CHECK: foo Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 +; CHECK: foo Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w15 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x15 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w9_hi $w15_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15 call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll index 0e682704afbf8..d9016b02a0f80 100644 --- a/llvm/test/CodeGen/AArch64/trampoline.ll +++ b/llvm/test/CodeGen/AArch64/trampoline.ll @@ -83,7 +83,7 @@ define i64 @func1() { ; CHECK-LINUX-NEXT: str w9, [sp, #16] ; CHECK-LINUX-NEXT: add x9, sp, #56 ; CHECK-LINUX-NEXT: stp x9, x8, [sp, #24] -; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f +; CHECK-LINUX-NEXT: mov x8, #132 // =0x84 ; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16 ; CHECK-LINUX-NEXT: movk x8, #177, lsl #32 ; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48 @@ -112,7 +112,7 @@ define i64 @func1() { ; CHECK-PC-NEXT: add x0, sp, #8 ; CHECK-PC-NEXT: movk w8, #54815, lsl #16 ; CHECK-PC-NEXT: str w8, [sp, #16] -; CHECK-PC-NEXT: mov x8, #143 // =0x8f +; CHECK-PC-NEXT: mov x8, #132 // =0x84 ; CHECK-PC-NEXT: movk x8, #22528, lsl #16 ; CHECK-PC-NEXT: movk x8, #177, lsl #32 ; CHECK-PC-NEXT: movk x8, #22528, lsl #48 @@ -148,7 +148,7 @@ define i64 @func1() { ; CHECK-APPLE-NEXT: mov x0, sp ; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16 ; CHECK-APPLE-NEXT: str w8, [sp, #8] -; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f +; CHECK-APPLE-NEXT: mov x8, #132 ; =0x84 ; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16 ; CHECK-APPLE-NEXT: movk x8, #177, lsl #32 ; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48 @@ -184,7 +184,7 @@ define i64 @func2() { ; CHECK-LINUX-NEXT: add x9, sp, #8 ; CHECK-LINUX-NEXT: add x1, x0, #12 ; CHECK-LINUX-NEXT: stp x9, x8, [x0, #16] -; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f +; CHECK-LINUX-NEXT: mov x8, #132 // =0x84 ; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16 ; CHECK-LINUX-NEXT: movk x8, #177, lsl #32 ; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48 @@ -210,7 +210,7 @@ define i64 @func2() { ; CHECK-PC-NEXT: mov w8, #544 // =0x220 ; CHECK-PC-NEXT: movk w8, #54815, lsl #16 ; CHECK-PC-NEXT: str w8, [x0, #8] -; CHECK-PC-NEXT: mov x8, #143 // =0x8f +; CHECK-PC-NEXT: mov x8, #132 // =0x84 ; CHECK-PC-NEXT: movk x8, #22528, lsl #16 ; CHECK-PC-NEXT: movk x8, #177, lsl #32 ; CHECK-PC-NEXT: movk x8, #22528, lsl #48 @@ -246,7 +246,7 @@ define i64 @func2() { ; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220 ; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16 ; CHECK-APPLE-NEXT: str w8, [x0, #8] -; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f +; CHECK-APPLE-NEXT: mov x8, #132 ; =0x84 ; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16 ; CHECK-APPLE-NEXT: movk x8, #177, lsl #32 ; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48 From 3e53925716d0cef9f77fd8057bea1aa1080f8e1b Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 12 Feb 2025 10:47:42 -0500 Subject: [PATCH 5/5] fixup! [AArch64] fix trampoline implementation: use X15 --- .../lib/Optimizer/CodeGen/BoxedProcedure.cpp | 6 ++--- flang/test/Fir/boxproc.fir | 4 ++-- .../Target/AArch64/AArch64FrameLowering.cpp | 11 +++++----- .../Target/AArch64/AArch64ISelLowering.cpp | 22 +++++++++---------- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp index c91ead6f0c019..d47b404747bca 100644 --- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp +++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp @@ -277,9 +277,9 @@ class BoxedProcedurePass // For PPC32 and PPC64, the thunk is populated by a call to // __trampoline_setup, which is defined in // compiler-rt/lib/builtins/trampoline_setup.c and requires the - // thunk size greater than 32 bytes. For AArch64, RISCV and x86_64, the - // thunk setup doesn't go through __trampoline_setup and fits in 32 - // bytes. + // thunk size greater than 32 bytes. For AArch64, RISCV and x86_64, + // the thunk setup doesn't go through __trampoline_setup and fits in + // 32 bytes. fir::SequenceType::Extent thunkSize = triple.getTrampolineSize(); mlir::Type buffTy = SequenceType::get({thunkSize}, i8Ty); auto buffer = builder.create(loc, buffTy); diff --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir index e99dfd0b92afd..9e5e41a94069c 100644 --- a/flang/test/Fir/boxproc.fir +++ b/flang/test/Fir/boxproc.fir @@ -3,7 +3,7 @@ // RUN: %if powerpc-registered-target %{tco --target=powerpc64le-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-PPC %} // CHECK-LABEL: define void @_QPtest_proc_dummy() -// CHECK-AARCH64: %[[VAL_3:.*]] = alloca [36 x i8], i64 1, align 1 +// CHECK-AARCH64: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK-X86: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK-PPC: %[[VAL_3:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1 // CHECK: %[[VAL_1:.*]] = alloca { ptr }, i64 1, align 8 @@ -63,7 +63,7 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) { } // CHECK-LABEL: define void @_QPtest_proc_dummy_char() -// CHECK-AARCH64: %[[VAL_20:.*]] = alloca [36 x i8], i64 1, align 1 +// CHECK-AARCH64: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK-X86: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK-PPC: %[[VAL_20:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1 // CHECK: %[[VAL_2:.*]] = alloca { { ptr, i64 } }, i64 1, align 8 diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 789bd6249b400..538f08e905f4e 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2047,12 +2047,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Find an available register to store value of VG to. unsigned X15Scratch = AArch64::NoRegister; const AArch64Subtarget &STI = MF.getSubtarget(); - if (llvm::any_of( - MBB.liveins(), - [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { - return STI.getRegisterInfo()->isSuperOrSubRegisterEq( - AArch64::X15, LiveIn.PhysReg); - })) { + if (llvm::any_of(MBB.liveins(), + [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { + return STI.getRegisterInfo()->isSuperOrSubRegisterEq( + AArch64::X15, LiveIn.PhysReg); + })) { X15Scratch = findScratchNonCalleeSaveRegister(&MBB); assert(X15Scratch != AArch64::NoRegister); #ifndef NDEBUG diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 795e6bd1c27fa..b214c0c92742b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7296,7 +7296,7 @@ SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); - SDValue Trmp = Op.getOperand(1); // trampoline, 36 bytes + SDValue Trmp = Op.getOperand(1); // trampoline, >=32 bytes SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value @@ -7311,7 +7311,7 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue OutChains[5]; const Function *Func = - cast(cast(Op.getOperand(5))->getValue()); + cast(cast(Op.getOperand(5))->getValue()); CallingConv::ID CC = Func->getCallingConv(); unsigned NestReg; @@ -7330,15 +7330,15 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue Addr = Trmp; SDLoc dl(Op); - OutChains[0] = - DAG.getStore(Chain, dl, DAG.getConstant(0x58000080u | NestReg, dl, MVT::i32), Addr, - MachinePointerInfo(TrmpAddr)); + OutChains[0] = DAG.getStore( + Chain, dl, DAG.getConstant(0x58000080u | NestReg, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(4, dl, MVT::i64)); - OutChains[1] = - DAG.getStore(Chain, dl, DAG.getConstant(0x580000b0u | FptrReg, dl, MVT::i32), Addr, - MachinePointerInfo(TrmpAddr, 4)); + OutChains[1] = DAG.getStore( + Chain, dl, DAG.getConstant(0x580000b0u | FptrReg, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr, 4)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(8, dl, MVT::i64)); @@ -7359,11 +7359,11 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); SDValue EndOfTrmp = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, - DAG.getConstant(12, dl, MVT::i64)); + DAG.getConstant(12, dl, MVT::i64)); // Call clear cache on the trampoline instructions. - return DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken, - Trmp, EndOfTrmp); + return DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken, Trmp, + EndOfTrmp); } SDValue AArch64TargetLowering::LowerOperation(SDValue Op,