Skip to content

Commit 4aaf889

Browse files
committed
[llvm][clang] Allocate a new stack instead of spawning a new thread to get more stack space
Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
1 parent d19d7c6 commit 4aaf889

File tree

11 files changed

+242
-30
lines changed

11 files changed

+242
-30
lines changed

clang/docs/ReleaseNotes.rst

+4
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ Non-comprehensive list of changes in this release
158158

159159
- Support parsing the `cc` operand modifier and alias it to the `c` modifier (#GH127719).
160160
- Added `__builtin_elementwise_exp10`.
161+
- Clang itself now uses split stacks instead of threads for allocating more
162+
stack space when running on Apple AArch64 based platforms. This means that
163+
stack traces of Clang from debuggers, crashes, and profilers may look
164+
different than before.
161165

162166
New Compiler Flags
163167
------------------

clang/include/clang/Basic/Stack.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ namespace clang {
2727

2828
/// Call this once on each thread, as soon after starting the thread as
2929
/// feasible, to note the approximate address of the bottom of the stack.
30-
void noteBottomOfStack();
30+
///
31+
/// \param ForceSet set to true if you know the call is near the bottom of a
32+
/// new stack. Used for split stacks.
33+
void noteBottomOfStack(bool ForceSet = false);
3134

3235
/// Determine whether the stack is nearly exhausted.
3336
bool isStackNearlyExhausted();

clang/lib/Basic/Stack.cpp

+12-28
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,13 @@
1313

1414
#include "clang/Basic/Stack.h"
1515
#include "llvm/Support/CrashRecoveryContext.h"
16+
#include "llvm/Support/ProgramStack.h"
1617

17-
#ifdef _MSC_VER
18-
#include <intrin.h> // for _AddressOfReturnAddress
19-
#endif
18+
static LLVM_THREAD_LOCAL uintptr_t BottomOfStack = 0;
2019

21-
static LLVM_THREAD_LOCAL void *BottomOfStack = nullptr;
22-
23-
static void *getStackPointer() {
24-
#if __GNUC__ || __has_builtin(__builtin_frame_address)
25-
return __builtin_frame_address(0);
26-
#elif defined(_MSC_VER)
27-
return _AddressOfReturnAddress();
28-
#else
29-
char CharOnStack = 0;
30-
// The volatile store here is intended to escape the local variable, to
31-
// prevent the compiler from optimizing CharOnStack into anything other
32-
// than a char on the stack.
33-
//
34-
// Tested on: MSVC 2015 - 2019, GCC 4.9 - 9, Clang 3.2 - 9, ICC 13 - 19.
35-
char *volatile Ptr = &CharOnStack;
36-
return Ptr;
37-
#endif
38-
}
39-
40-
void clang::noteBottomOfStack() {
41-
if (!BottomOfStack)
42-
BottomOfStack = getStackPointer();
20+
void clang::noteBottomOfStack(bool ForceSet) {
21+
if (!BottomOfStack || ForceSet)
22+
BottomOfStack = llvm::getStackPointer();
4323
}
4424

4525
bool clang::isStackNearlyExhausted() {
@@ -51,7 +31,8 @@ bool clang::isStackNearlyExhausted() {
5131
if (!BottomOfStack)
5232
return false;
5333

54-
intptr_t StackDiff = (intptr_t)getStackPointer() - (intptr_t)BottomOfStack;
34+
intptr_t StackDiff =
35+
(intptr_t)llvm::getStackPointer() - (intptr_t)BottomOfStack;
5536
size_t StackUsage = (size_t)std::abs(StackDiff);
5637

5738
// If the stack pointer has a surprising value, we do not understand this
@@ -66,9 +47,12 @@ bool clang::isStackNearlyExhausted() {
6647
void clang::runWithSufficientStackSpaceSlow(llvm::function_ref<void()> Diag,
6748
llvm::function_ref<void()> Fn) {
6849
llvm::CrashRecoveryContext CRC;
69-
CRC.RunSafelyOnThread([&] {
70-
noteBottomOfStack();
50+
// Preserve the BottomOfStack in case RunSafelyOnNewStack uses split stacks.
51+
uintptr_t PrevBottom = BottomOfStack;
52+
CRC.RunSafelyOnNewStack([&] {
53+
noteBottomOfStack(true);
7154
Diag();
7255
Fn();
7356
}, DesiredStackSize);
57+
BottomOfStack = PrevBottom;
7458
}

clang/lib/Frontend/CompilerInstance.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1276,7 +1276,7 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc,
12761276

12771277
// Execute the action to actually build the module in-place. Use a separate
12781278
// thread so that we get a stack large enough.
1279-
bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnThread(
1279+
bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack(
12801280
[&]() {
12811281
GenerateModuleFromModuleMapAction Action;
12821282
Instance.ExecuteAction(Action);

llvm/include/llvm/Support/CrashRecoveryContext.h

+3
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class CrashRecoveryContext {
9797
return RunSafelyOnThread([&]() { Fn(UserData); }, RequestedStackSize);
9898
}
9999

100+
bool RunSafelyOnNewStack(function_ref<void()>,
101+
unsigned RequestedStackSize = 0);
102+
100103
/// Explicitly trigger a crash recovery in the current process, and
101104
/// return failure from RunSafely(). This function does not return.
102105
[[noreturn]] void HandleExit(int RetCode);
+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
//===--- ProgramStack.h -----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_SUPPORT_PROGRAMSTACK_H
10+
#define LLVM_SUPPORT_PROGRAMSTACK_H
11+
12+
#include "llvm/ADT/STLFunctionalExtras.h"
13+
14+
namespace llvm {
15+
16+
/// \returns an address close to the current value of the stack pointer.
17+
///
18+
/// The value is not guaranteed to point to anything specific. It can be used to
19+
/// estimate how much stack space has been used since the previous call.
20+
uintptr_t getStackPointer();
21+
22+
/// \returns the default stack size for this platform.
23+
///
24+
/// Based on \p RLIMIT_STACK or the equivalent.
25+
unsigned getDefaultStackSize();
26+
27+
/// Runs Fn on a new stack of at least the given size.
28+
///
29+
/// \param StackSize requested stack size. A size of 0 uses the default stack
30+
/// size of the platform.
31+
///
32+
/// The preferred implementation is split stacks on platforms that have a good
33+
/// debugging experience for them. On other platforms a new thread is used.
34+
void runOnNewStack(unsigned StackSize, function_ref<void()> Fn);
35+
36+
template <typename R, typename... Ts>
37+
R runOnNewStack(unsigned StackSize, function_ref<R(Ts...)> Fn, Ts &&...Args) {
38+
std::optional<R> Ret;
39+
runOnNewStack(StackSize, [&]() { Ret = Fn(std::forward<Ts>(Args)...); });
40+
return std::move(*Ret);
41+
}
42+
43+
} // namespace llvm
44+
45+
#endif // LLVM_SUPPORT_PROGRAMSTACK_H

llvm/lib/Support/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ add_llvm_component_library(LLVMSupport
294294
Path.cpp
295295
Process.cpp
296296
Program.cpp
297+
ProgramStack.cpp
297298
RWMutex.cpp
298299
Signals.cpp
299300
Threading.cpp

llvm/lib/Support/CrashRecoveryContext.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "llvm/Config/llvm-config.h"
1111
#include "llvm/Support/ErrorHandling.h"
1212
#include "llvm/Support/ExitCodes.h"
13+
#include "llvm/Support/ProgramStack.h"
1314
#include "llvm/Support/Signals.h"
1415
#include "llvm/Support/thread.h"
1516
#include <cassert>
@@ -523,3 +524,21 @@ bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn,
523524
CRC->setSwitchedThread();
524525
return Info.Result;
525526
}
527+
528+
bool CrashRecoveryContext::RunSafelyOnNewStack(function_ref<void()> Fn,
529+
unsigned RequestedStackSize) {
530+
// If crash recovery is disabled, do nothing.
531+
if (gCrashRecoveryEnabled) {
532+
assert(!Impl && "Crash recovery context already initialized!");
533+
CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
534+
Impl = CRCI;
535+
536+
CRCI->ValidJumpBuffer = true;
537+
if (setjmp(CRCI->JumpBuffer) != 0) {
538+
return false;
539+
}
540+
}
541+
542+
runOnNewStack(RequestedStackSize, Fn);
543+
return true;
544+
}

llvm/lib/Support/ProgramStack.cpp

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
//===--- RunOnNewStack.cpp - Crash Recovery -------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/Support/ProgramStack.h"
10+
#include "llvm/Config/config.h"
11+
#include "llvm/Support/Compiler.h"
12+
13+
#ifdef LLVM_ON_UNIX
14+
# include <sys/resource.h> // for getrlimit
15+
#endif
16+
17+
#ifdef _MSC_VER
18+
# include <intrin.h> // for _AddressOfReturnAddress
19+
#endif
20+
21+
// Currently only Apple AArch64 is known to support split stacks in the debugger
22+
// and other tooling.
23+
#if defined(__APPLE__) && defined(__aarch64__) && \
24+
LLVM_HAS_CPP_ATTRIBUTE(gnu::naked) && __has_extension(gnu_asm)
25+
# define LLVM_HAS_SPLIT_STACKS
26+
# define LLVM_HAS_SPLIT_STACKS_AARCH64
27+
#include <sys/mman.h>
28+
#endif
29+
30+
#ifndef LLVM_HAS_SPLIT_STACKS
31+
# include "llvm/Support/thread.h"
32+
#endif
33+
34+
using namespace llvm;
35+
36+
uintptr_t llvm::getStackPointer() {
37+
#if __GNUC__ || __has_builtin(__builtin_frame_address)
38+
return (uintptr_t)__builtin_frame_address(0);
39+
#elif defined(_MSC_VER)
40+
return (uintptr_t)_AddressOfReturnAddress();
41+
#else
42+
volatile char CharOnStack = 0;
43+
// The volatile store here is intended to escape the local variable, to
44+
// prevent the compiler from optimizing CharOnStack into anything other
45+
// than a char on the stack.
46+
//
47+
// Tested on: MSVC 2015 - 2019, GCC 4.9 - 9, Clang 3.2 - 9, ICC 13 - 19.
48+
char *volatile Ptr = &CharOnStack;
49+
return (uintptr_t)Ptr;
50+
#endif
51+
}
52+
53+
unsigned llvm::getDefaultStackSize() {
54+
#ifdef LLVM_ON_UNIX
55+
rlimit RL;
56+
getrlimit(RLIMIT_STACK, &RL);
57+
return RL.rlim_cur;
58+
#else
59+
// Clang recursively parses, instantiates templates, and evaluates constant
60+
// expressions. We've found 8MiB to be a reasonable stack size given the way
61+
// Clang works and the way C++ is commonly written.
62+
return 8 << 20;
63+
#endif
64+
}
65+
66+
namespace {
67+
#ifdef LLVM_HAS_SPLIT_STACKS_AARCH64
68+
[[gnu::naked]] void runOnNewStackImpl(void *Stack, void (*Fn)(void *),
69+
void *Ctx) {
70+
__asm__ volatile(
71+
"mov x16, sp\n\t"
72+
"sub x0, x0, #0x20\n\t" // subtract space from stack
73+
"stp xzr, x16, [x0, #0x00]\n\t" // save old sp
74+
"stp x29, x30, [x0, #0x10]\n\t" // save fp, lr
75+
"mov sp, x0\n\t" // switch to new stack
76+
"add x29, x0, #0x10\n\t" // switch to new frame
77+
".cfi_def_cfa w29, 16\n\t"
78+
".cfi_offset w30, -8\n\t" // lr
79+
".cfi_offset w29, -16\n\t" // fp
80+
81+
"mov x0, x2\n\t" // Ctx is the only argument
82+
"blr x1\n\t" // call Fn
83+
84+
"ldp x29, x30, [sp, #0x10]\n\t" // restore fp, lr
85+
"ldp xzr, x16, [sp, #0x00]\n\t" // load old sp
86+
"mov sp, x16\n\t"
87+
"ret"
88+
);
89+
}
90+
#endif
91+
92+
#ifdef LLVM_HAS_SPLIT_STACKS
93+
void callback(void *Ctx) {
94+
(*reinterpret_cast<function_ref<void()> *>(Ctx))();
95+
}
96+
#endif
97+
} // namespace
98+
99+
#ifdef LLVM_HAS_SPLIT_STACKS
100+
void llvm::runOnNewStack(unsigned StackSize, function_ref<void()> Fn) {
101+
if (StackSize == 0)
102+
StackSize = getDefaultStackSize();
103+
104+
// We use malloc here instead of mmap because:
105+
// - it's simpler,
106+
// - many malloc implementations will reuse the allocation in cases where
107+
// we're bouncing accross the edge of a stack boundry, and
108+
// - many malloc implemenations will already provide guard pages for
109+
// allocations this large.
110+
void *Stack = malloc(StackSize);
111+
void *BottomOfStack = (char *)Stack + StackSize;
112+
113+
runOnNewStackImpl(BottomOfStack, callback, &Fn);
114+
115+
free(Stack);
116+
}
117+
#else
118+
void llvm::runOnNewStack(unsigned StackSize, function_ref<void()> Fn) {
119+
llvm::thread Thread(
120+
StackSize == 0 ? std::nullopt : std::optional<unsigned>(StackSize), Fn);
121+
Thread.join();
122+
}
123+
#endif

llvm/unittests/Support/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ add_llvm_unittest(SupportTests
7070
PerThreadBumpPtrAllocatorTest.cpp
7171
ProcessTest.cpp
7272
ProgramTest.cpp
73+
ProgramStackTest.cpp
7374
RecyclerTest.cpp
7475
RegexTest.cpp
7576
ReverseIterationTest.cpp
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===- unittest/Support/ProgramStackTest.cpp ------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/Support/ProgramStack.h"
10+
#include "llvm/Support/Process.h"
11+
#include "gtest/gtest.h"
12+
13+
using namespace llvm;
14+
15+
static uintptr_t func(int &A) {
16+
A = 7;
17+
return getStackPointer();
18+
}
19+
20+
TEST(ProgramStackTest, runOnNewStack) {
21+
int A = 0;
22+
uintptr_t Stack = runOnNewStack(0, function_ref<uintptr_t(int &)>(func), A);
23+
EXPECT_EQ(A, 7);
24+
intptr_t StackDiff = (intptr_t)llvm::getStackPointer() - (intptr_t)Stack;
25+
size_t StackDistance = (size_t)std::abs(StackDiff);
26+
// Page size is used as it's large enough to guarantee were not on the same
27+
// stack but not too large to cause spurious failures.
28+
EXPECT_GT(StackDistance, llvm::sys::Process::getPageSizeEstimate());
29+
}

0 commit comments

Comments
 (0)