-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[AArch64] fix trampoline implementation: use X15 #126743
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
546fed8
f6122fa
9aa0553
78745ed
3e53925
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,12 @@ class CCIfSubtarget<string F, CCAction A> | |
//===----------------------------------------------------------------------===// | ||
|
||
defvar AArch64_Common = [ | ||
// The 'nest' parameter, if any, is passed in X15. | ||
// The previous register used here (X18) is also defined to be unavailable | ||
// for this purpose, while all of X9-X15 were defined to be free for LLVM to | ||
// use for this, so use X15 (which LLVM often already clobbers anyways). | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
|
||
CCIfType<[iPTR], CCBitConvertToType<i64>>, | ||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>, | ||
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, | ||
|
@@ -117,16 +123,12 @@ defvar AArch64_Common = [ | |
]; | ||
|
||
let Entry = 1 in | ||
def CC_AArch64_AAPCS : CallingConv<!listconcat( | ||
// The 'nest' parameter, if any, is passed in X18. | ||
// Darwin and Windows use X18 as the platform register and hence 'nest' isn't | ||
// currently supported there. | ||
[CCIfNest<CCAssignToReg<[X18]>>], | ||
AArch64_Common | ||
)>; | ||
def CC_AArch64_AAPCS : CallingConv<AArch64_Common>; | ||
|
||
let Entry = 1 in | ||
def RetCC_AArch64_AAPCS : CallingConv<[ | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A return value can't be "nest"? |
||
|
||
CCIfType<[iPTR], CCBitConvertToType<i64>>, | ||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>, | ||
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, | ||
|
@@ -177,6 +179,8 @@ def CC_AArch64_Win64_VarArg : CallingConv<[ | |
// a stack layout compatible with the x64 calling convention. | ||
let Entry = 1 in | ||
def CC_AArch64_Arm64EC_VarArg : CallingConv<[ | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
|
||
// Convert small floating-point values to integer. | ||
CCIfType<[f16, bf16], CCBitConvertToType<i16>>, | ||
CCIfType<[f32], CCBitConvertToType<i32>>, | ||
|
@@ -295,6 +299,8 @@ def CC_AArch64_Arm64EC_Thunk_Native : CallingConv<[ | |
|
||
let Entry = 1 in | ||
def RetCC_AArch64_Arm64EC_Thunk : CallingConv<[ | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
|
||
// The X86-Win64 calling convention always returns __m64 values in RAX. | ||
CCIfType<[x86mmx], CCBitConvertToType<i64>>, | ||
|
||
|
@@ -353,6 +359,8 @@ def RetCC_AArch64_Arm64EC_CFGuard_Check : CallingConv<[ | |
// + Stack slots are sized as needed rather than being at least 64-bit. | ||
let Entry = 1 in | ||
def CC_AArch64_DarwinPCS : CallingConv<[ | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
|
||
CCIfType<[iPTR], CCBitConvertToType<i64>>, | ||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>, | ||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, | ||
|
@@ -427,6 +435,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ | |
|
||
let Entry = 1 in | ||
def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
|
||
CCIfType<[iPTR], CCBitConvertToType<i64>>, | ||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>, | ||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, | ||
|
@@ -450,6 +460,8 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ | |
// same as the normal Darwin VarArgs handling. | ||
let Entry = 1 in | ||
def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
|
||
CCIfType<[v2f32], CCBitConvertToType<v2i32>>, | ||
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, | ||
|
||
|
@@ -494,6 +506,8 @@ def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ | |
|
||
let Entry = 1 in | ||
def CC_AArch64_GHC : CallingConv<[ | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
|
||
CCIfType<[iPTR], CCBitConvertToType<i64>>, | ||
|
||
// Handle all vector types as either f64 or v2f64. | ||
|
@@ -523,6 +537,7 @@ def CC_AArch64_Preserve_None : CallingConv<[ | |
// We can pass arguments in all general registers, except: | ||
// - X8, used for sret | ||
// - X16/X17, used by the linker as IP0/IP1 | ||
// - X15, the nest register and used by Windows for stack allocation | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of reducing the utility preservenone/preservemost/etc., can we just forbid using "nest" arguments with them? I can't see why you'd want to use them together. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That sounds reasonable. If I read the code correctly, the comment about "X15 for stack allocation" is not quite accurate as well (the code doesn't necessarily have to clobber that, since it is implemented to use a temp register first--though such a register does need to be made available for it). Other comments here seem contradictory also, since it assigns X9 last "because it is needed as a scratch register"–but it seems like either it is needed as a scratch register (in which case it should not have been allowed to assign it), or it doesn't need it as a scratch register (in which case the comment is wrong since it doesn't get used as a scratch register) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, the x15 thing isn't a fundamental limitation; it's just a bit complicated to generate the correct code, and the author of the preservenonecc code didn't want to try to implement it. For the x9 thing, see #99434 . |
||
// - X18, the platform register | ||
// - X19, the base pointer | ||
// - X29, the frame pointer | ||
|
@@ -533,6 +548,7 @@ def CC_AArch64_Preserve_None : CallingConv<[ | |
// normal functions without saving and reloading arguments. | ||
// X9 is assigned last as it is used in FrameLowering as the first | ||
// choice for a scratch register. | ||
CCIfNest<CCAssignToReg<[X15]>>, | ||
CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23, | ||
W24, W25, W26, W27, W28, | ||
W0, W1, W2, W3, W4, W5, | ||
|
@@ -544,12 +560,6 @@ def CC_AArch64_Preserve_None : CallingConv<[ | |
X6, X7, X10, X11, | ||
X12, X13, X14, X9]>>, | ||
|
||
// Windows uses X15 for stack allocation | ||
CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()", | ||
CCIfType<[i32], CCAssignToReg<[W15]>>>, | ||
CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()", | ||
CCIfType<[i64], CCAssignToReg<[X15]>>>, | ||
|
||
CCDelegateTo<CC_AArch64_AAPCS> | ||
]>; | ||
|
||
|
@@ -681,7 +691,7 @@ def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; | |
def CSR_AArch64_NoneRegs : CalleeSavedRegs<(add LR, FP)>; | ||
|
||
def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, | ||
(sequence "X%u", 9, 15))>; | ||
(sequence "X%u", 10, 14))>; | ||
|
||
def CSR_AArch64_RT_AllRegs : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, | ||
(sequence "Q%u", 8, 31))>; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How are bitcasts relevant here?
Probably we should tighten the verifier check to just require a function. And if some target eventually needs trampolines where any of the operands is in a non-zero address-space, we can make the intrinsic overloaded.
Probably we also should tighten llvm::canReplaceOperandWithVariable.
And please land this separately.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It probably isn't of much relevance since opaque pointers. I think it is just also fairly silly of a restriction, since llvm doesn't care at all about the source of the value here, it just needs to know what calling convention it should have (to know which register to use). That info probably could just as easily be passed as a separate immarg value, since calling conventions are defined in the langref to be numeric.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That all makes sense.