Skip to content

Commit f843602

Browse files
author
Mikhail Gudim
committed
[RISCV][WIP] Let RA do the CSR saves.
We turn the problem of saving and restoring callee-saved registers efficiently into a register allocation problem. This has the advantage that the register allocator can essentialy do shrink-wrapping on per register basis. Currently, shrink-wrapping pass saves all CSR in the same place which may be suboptimal. Also, improvements to register allocation / coalescing will translate to improvements in shrink-wrapping. In finalizeLowering() we copy all callee-saved registers from a physical register to a virtual one. In all return blocks we copy do the reverse.
1 parent 28731f5 commit f843602

27 files changed

+1034
-187
lines changed

.gitlab-ci.yml

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# image should be available on the server
2+
image:
3+
name: gitlab.dc1.ventanamicro.com:5005/toolchain/llvm
4+
entrypoint: [""]
5+
6+
stages:
7+
- build
8+
- test
9+
- deploy
10+
11+
# TODO: split this up into several jobs. To do this,
12+
# need to figure out how to pass files from one job to another.
13+
build-test-benchmark:
14+
tags:
15+
- toolchain
16+
stage: build
17+
except:
18+
variables:
19+
- $CI_COMMIT_MESSAGE =~ /Merge.+branch\s(.*)\sinto(.*)/
20+
script:
21+
- echo "Hello, $GITLAB_USER_LOGIN!"
22+
- whoami
23+
- pwd
24+
25+
# check that we can execute riscv64 with the plugin
26+
- cp $TEST_RISCV64 .
27+
- (export USE_QEMU_PLUGIN="1"; export QEMU_CPU="veyron-v2"; ./test-riscv64.elf)
28+
- ls
29+
30+
- export BRANCH_NAME=$CI_COMMIT_BRANCH
31+
# check that needed volumes are mounter correctly
32+
- export CCACHE_DIR=/mnt/ccache/llvm
33+
- export ARTIFACTS_DIR=/mnt/artifacts/llvm
34+
- ls $CCACHE_DIR
35+
- ls $ARTIFACTS_DIR
36+
37+
# check that ccache is actually working
38+
- ccache -s -v
39+
40+
- export STAGING_DIR=$ARTIFACTS_DIR/staging/$CI_COMMIT_BRANCH
41+
- rm -rf $STAGING_DIR
42+
- mkdir -p $STAGING_DIR
43+
44+
- cp -r /mnt/spec2017 $STAGING_DIR/
45+
- export SPEC_DIR=$STAGING_DIR/spec2017
46+
47+
- export BUILD_DIR=$STAGING_DIR/build
48+
- export INSTALL_DIR=$STAGING_DIR/install
49+
50+
- make prepare
51+
- make configure_llvm
52+
# TODO: run tests
53+
- make install_llvm
54+
55+
# run benchmarks with test workload
56+
- make clean_spec
57+
- make run_spec_test
58+
- make check_spec_logs
59+
60+
# run benchmarks with train workload
61+
- make clean_spec
62+
- make run_spec_train
63+
- make check_spec_logs
64+
65+
# MKDIR_CP is defined by the docker container
66+
- $MKDIR_CP -listOfPaths $(ls $SPEC_DIR/cpu2017/benchspec/CPU/*/build/*/*.out) -pathPrefix $STAGING_DIR
67+
- $MKDIR_CP -listOfPaths $(ls $SPEC_DIR/cpu2017/benchspec/CPU/*/run/*/*.collect) -pathPrefix $STAGING_DIR
68+
- python3 $PARSE_BENCHMARK_DATA -pathToSpec $SPEC_DIR/cpu2017 -pathToOutput $STAGING_DIR/parsedBenchmarkData.json
69+
70+
# Baseline has to be first in the -listOfJsonFiles
71+
#- python3 $REPORT -listOfJsonFiles parsedBenchmarkData.json -dashBoardFile $ARTIFACTS_DIR/dashboard.csv | tee report.txt
72+
- python3 $REPORT -listOfJsonFiles $ARTIFACTS_DIR/commited/baseline/parsedBenchmarkData.json $STAGING_DIR/parsedBenchmarkData.json -dashBoardFile $ARTIFACTS_DIR/dashboard.csv | tee report.txt
73+
74+
75+
update-baseline:
76+
tags:
77+
- toolchain
78+
stage: deploy
79+
script:
80+
- export ARTIFACTS_DIR=/mnt/artifacts/llvm
81+
- export BRANCH_NAME=$(python3 $EXTRACT_BRANCH_NAME_FROM_COMMIT_MESSAGE -commitMessage "$CI_COMMIT_MESSAGE")
82+
- export STAGING_DIR=$ARTIFACTS_DIR/staging/$BRANCH_NAME
83+
- export INSTALL_DIR=$STAGING_DIR/install
84+
- make package_llvm
85+
- mv $STAGING_DIR/ventana-llvm.deb $ARTIFACTS_DIR/latest_build/ventana-llvm.deb
86+
- rm -rf $STAGING_DIR/build
87+
- rm -rf $STAGING_DIR/install
88+
- rm -rf $STAGING_DIR/spec2017
89+
90+
# GET_THIS_COMMIT_NUM is defined by the docker container
91+
- python3 $UPDATE_ARTIFACTS -artifactsDir $ARTIFACTS_DIR -branchToCommit=$(python3 $EXTRACT_BRANCH_NAME_FROM_COMMIT_MESSAGE -commitMessage "$CI_COMMIT_MESSAGE") -commitNo=$(python3 $GET_THIS_COMMIT_NUM -commitedDir $ARTIFACTS_DIR/commited)
92+
93+
only:
94+
variables:
95+
- $CI_COMMIT_MESSAGE =~ /Merge.+branch\s(.*)\sinto(.*)/
96+

Makefile

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
TOP = $(PWD)
2+
SHELL := /bin/bash
3+
4+
prepare:
5+
rm -rf $(BUILD_DIR)
6+
rm -rf $(INSTALL_DIR)
7+
mkdir -p $(BUILD_DIR)
8+
mkdir -p $(INSTALL_DIR)
9+
dpkg --extract /mnt/artifacts/gcc/latest_build/ventana-gcc.deb $(INSTALL_DIR)
10+
11+
configure_llvm:
12+
cd $(BUILD_DIR); \
13+
cmake $(TOP)/llvm \
14+
-G Ninja \
15+
-DCMAKE_BUILD_TYPE=Release \
16+
-DLLVM_ENABLE_ASSERTIONS=ON \
17+
-DCMAKE_C_COMPILER=$(CC) \
18+
-DCMAKE_CXX_COMPILER=$(CXX) \
19+
-DCMAKE_CXX_COMPILER_LAUNCHER="ccache" \
20+
-DCMAKE_CXX_FLAGS="-stdlib=libc++" \
21+
-DLLVM_USE_LINKER=lld \
22+
-DBUILD_SHARED_LIBS=ON \
23+
-DLLVM_TARGETS_TO_BUILD="RISCV" \
24+
-DLLVM_ENABLE_PROJECTS="clang;lld" \
25+
-DLLVM_OPTIMIZED_TABLEGEN=ON \
26+
-DLLVM_PARALLEL_LINK_JOBS=1 \
27+
-DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) \
28+
-DLLVM_BINUTILS_INCDIR=$(INSTALL_DIR)/x86_64-pc-linux-gnu/riscv64-linux-gnu/include \
29+
-DLLVM_FORCE_VC_REPOSITORY=blah
30+
31+
configure_llvm_native_riscv64_flang_build:
32+
rm -rf $(BUILD_DIR)
33+
mkdir -p $(BUILD_DIR)
34+
mkdir -p $(INSTALL_DIR)
35+
cd $(BUILD_DIR); \
36+
cmake $(TOP)/llvm \
37+
-G Ninja \
38+
-DCMAKE_BUILD_TYPE=Release \
39+
-DLLVM_ENABLE_ASSERTIONS=ON \
40+
-DLLVM_TARGETS_TO_BUILD="host" \
41+
-DLLVM_ENABLE_PROJECTS="clang;mlir;flang;openmp" \
42+
-DCMAKE_C_COMPILER=gcc \
43+
-DCMAKE_CXX_COMPILER=g++ \
44+
-DLLVM_PARALLEL_LINK_JOBS=1 \
45+
-DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) \
46+
-DLLVM_ENABLE_RUNTIMES="compiler-rt"
47+
48+
install_llvm:
49+
cd $(BUILD_DIR); cmake --build . --target install
50+
51+
package_llvm:
52+
mkdir -p $(INSTALL_DIR)/DEBIAN
53+
echo -e "\
54+
Package: ventanta-llvm \n\
55+
Version: 1.0 \n\
56+
Section: utils \n\
57+
Priority: optional \n\
58+
Architecture: all \n\
59+
Maintainer: Ventana Micro Systems \n\
60+
Description: LLVM, $(BRANCH_NAME) \n\
61+
" > $(INSTALL_DIR)/DEBIAN/control
62+
dpkg-deb --root-owner-group --build $(INSTALL_DIR)
63+
mv $(STAGING_DIR)/install.deb $(STAGING_DIR)/ventana-llvm.deb
64+
65+
MCPU=veyron-v1
66+
SPEC_OPTIMIZE_FLAGS="\
67+
-mcpu=$(MCPU) \
68+
--sysroot=$(INSTALL_DIR) \
69+
-O3 \
70+
-mllvm -stats \
71+
"
72+
SPEC_LD_FLAGS="\
73+
-fuse-ld=$(INSTALL_DIR)/riscv64-linux-gnu/bin/ld.bfd \
74+
-static \
75+
"
76+
# SPEC_DIR is defined in gitlab-ci.yml
77+
#
78+
define runSpecBenchmark
79+
cd $(SPEC_DIR)/cpu2017; \
80+
(\
81+
source shrc; \
82+
export USE_QEMU_PLUGIN="1"; \
83+
export QEMU_CPU="$(MCPU)"; \
84+
runcpu \
85+
--config=llvm-linux-riscv-ventana.cfg \
86+
--define label=$(BRANCH_NAME) \
87+
--define llvm_bin_dir="$(INSTALL_DIR)/bin" \
88+
--define optimize_flags=$(SPEC_OPTIMIZE_FLAGS) \
89+
--define ld_flags=$(SPEC_LD_FLAGS) \
90+
--action=validate \
91+
--size=$(2) \
92+
$(1) \
93+
)
94+
endef
95+
96+
run_spec_test:
97+
$(call runSpecBenchmark,500.perlbench_r,test) & \
98+
$(call runSpecBenchmark,502.gcc_r,test) & \
99+
$(call runSpecBenchmark,505.mcf_r,test) & \
100+
$(call runSpecBenchmark,508.namd_r,test) & \
101+
$(call runSpecBenchmark,510.parest_r,test) & \
102+
$(call runSpecBenchmark,511.povray_r,test) & \
103+
$(call runSpecBenchmark,519.lbm_r,test) & \
104+
$(call runSpecBenchmark,520.omnetpp_r,test) & \
105+
$(call runSpecBenchmark,523.xalancbmk_r,test) & \
106+
$(call runSpecBenchmark,525.x264_r,test) & \
107+
$(call runSpecBenchmark,526.blender_r,test) & \
108+
$(call runSpecBenchmark,531.deepsjeng_r,test) & \
109+
$(call runSpecBenchmark,538.imagick_r,test) & \
110+
$(call runSpecBenchmark,541.leela_r,test) & \
111+
$(call runSpecBenchmark,544.nab_r,test) & \
112+
$(call runSpecBenchmark,557.xz_r,test) & \
113+
wait
114+
115+
run_spec_train:
116+
$(call runSpecBenchmark,500.perlbench_r,train) & \
117+
$(call runSpecBenchmark,502.gcc_r,train) & \
118+
$(call runSpecBenchmark,505.mcf_r,train) & \
119+
$(call runSpecBenchmark,508.namd_r,train) & \
120+
$(call runSpecBenchmark,510.parest_r,train) & \
121+
$(call runSpecBenchmark,511.povray_r,train) & \
122+
$(call runSpecBenchmark,519.lbm_r,train) & \
123+
$(call runSpecBenchmark,520.omnetpp_r,train) & \
124+
$(call runSpecBenchmark,523.xalancbmk_r,train) & \
125+
$(call runSpecBenchmark,525.x264_r,train) & \
126+
$(call runSpecBenchmark,526.blender_r,train) & \
127+
$(call runSpecBenchmark,531.deepsjeng_r,train) & \
128+
$(call runSpecBenchmark,538.imagick_r,train) & \
129+
$(call runSpecBenchmark,541.leela_r,train) & \
130+
$(call runSpecBenchmark,544.nab_r,train) & \
131+
$(call runSpecBenchmark,557.xz_r,train) & \
132+
wait
133+
134+
check_spec_logs:
135+
cd $(SPEC_DIR); \
136+
python3 spec.py \
137+
--specCPU2017Path=cpu2017 \
138+
--checkSpecLogs \
139+
--benchmarksList="\
140+
500.perlbench_r,\
141+
502.gcc_r,\
142+
505.mcf_r,\
143+
508.namd_r,\
144+
510.parest_r,\
145+
511.povray_r,\
146+
519.lbm_r,\
147+
520.omnetpp_r,\
148+
523.xalancbmk_r,\
149+
525.x264_r,\
150+
526.blender_r,\
151+
531.deepsjeng_r,\
152+
538.imagick_r,\
153+
541.leela_r,\
154+
544.nab_r,\
155+
557.xz_r\
156+
"
157+
158+
clean_spec:
159+
rm -rf $(SPEC_DIR)/cpu2017/benchspec/C*/*/run
160+
rm -rf $(SPEC_DIR)/cpu2017/benchspec/C*/*/build
161+
rm -rf $(SPEC_DIR)/cpu2017/benchspec/C*/*/exe
162+
rm -rf $(SPEC_DIR)/cpu2017/result/*
163+
rm -rf $(SPEC_DIR)/cpu2017/tmp/*

llvm/include/llvm/CodeGen/ReachingDefAnalysis.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,11 @@ class ReachingDefAnalysis : public MachineFunctionPass {
114114
private:
115115
MachineFunction *MF = nullptr;
116116
const TargetRegisterInfo *TRI = nullptr;
117+
const TargetInstrInfo *TII = nullptr;
117118
LoopTraversal::TraversalOrder TraversedMBBOrder;
118119
unsigned NumRegUnits = 0;
120+
unsigned NumStackObjects = 0;
121+
int ObjectIndexBegin = 0;
119122
/// Instruction that defined each register, relative to the beginning of the
120123
/// current basic block. When a LiveRegsDefInfo is used to represent a
121124
/// live-out register, this value is relative to the end of the basic block,
@@ -138,6 +141,9 @@ class ReachingDefAnalysis : public MachineFunctionPass {
138141
DenseMap<MachineInstr *, int> InstIds;
139142

140143
MBBReachingDefsInfo MBBReachingDefs;
144+
using MBBFrameObjsReachingDefsInfo =
145+
std::vector<std::vector<std::vector<int>>>;
146+
MBBFrameObjsReachingDefsInfo MBBFrameObjsReachingDefs;
141147

142148
/// Default values are 'nothing happened a long time ago'.
143149
const int ReachingDefDefaultVal = -(1 << 21);

llvm/include/llvm/CodeGen/TargetFrameLowering.h

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,16 @@ namespace llvm {
2424
class CalleeSavedInfo;
2525
class MachineFunction;
2626
class RegScavenger;
27-
28-
namespace TargetStackID {
29-
enum Value {
30-
Default = 0,
31-
SGPRSpill = 1,
32-
ScalableVector = 2,
33-
WasmLocal = 3,
34-
NoAlloc = 255
35-
};
27+
class ReachingDefAnalysis;
28+
29+
namespace TargetStackID {
30+
enum Value {
31+
Default = 0,
32+
SGPRSpill = 1,
33+
ScalableVector = 2,
34+
WasmLocal = 3,
35+
NoAlloc = 255
36+
};
3637
}
3738

3839
/// Information about stack frame layout on the target. It holds the direction
@@ -210,6 +211,11 @@ class TargetFrameLowering {
210211
/// for noreturn nounwind functions.
211212
virtual bool enableCalleeSaveSkip(const MachineFunction &MF) const;
212213

214+
virtual void emitCFIsForCSRsHandledByRA(MachineFunction &MF,
215+
ReachingDefAnalysis *RDA) const {
216+
return;
217+
}
218+
213219
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
214220
/// the function.
215221
virtual void emitPrologue(MachineFunction &MF,

llvm/include/llvm/CodeGen/TargetSubtargetInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,8 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
328328
return false;
329329
}
330330

331+
virtual bool doCSRSavesInRA() const;
332+
331333
/// Classify a global function reference. This mainly used to fetch target
332334
/// special flags for lowering a function address. For example mark a function
333335
/// call should be plt or pc-related addressing.

llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,7 @@ class UnwindLocation {
7474
bool Dereference; /// If true, the resulting location must be dereferenced
7575
/// after the location value is computed.
7676

77-
// Constructors are private to force people to use the create static
78-
// functions.
77+
public:
7978
UnwindLocation(Location K)
8079
: Kind(K), RegNum(InvalidRegisterNumber), Offset(0),
8180
AddrSpace(std::nullopt), Dereference(false) {}
@@ -88,7 +87,6 @@ class UnwindLocation {
8887
: Kind(DWARFExpr), RegNum(InvalidRegisterNumber), Offset(0), Expr(E),
8988
Dereference(Deref) {}
9089

91-
public:
9290
/// Create a location whose rule is set to Unspecified. This means the
9391
/// register value might be in the same register but it wasn't specified in
9492
/// the unwind opcodes.
@@ -135,6 +133,7 @@ class UnwindLocation {
135133
assert(Kind == RegPlusOffset && AddrSpace);
136134
return *AddrSpace;
137135
}
136+
bool getDeref() const { return Dereference; }
138137
int32_t getConstant() const { return Offset; }
139138
/// Some opcodes will modify the CFA location's register only, so we need
140139
/// to be able to modify the CFA register when evaluating DWARF Call Frame
@@ -148,6 +147,11 @@ class UnwindLocation {
148147
/// the constant value (DW_CFA_GNU_window_save which is also known as
149148
// DW_CFA_AARCH64_negate_ra_state).
150149
void setConstant(int32_t Value) { Offset = Value; }
150+
void setDeref(bool NewDeref) { Dereference = NewDeref; }
151+
void setKind(Location NewKind) { Kind = NewKind; }
152+
bool isRegister() const {
153+
return ((Kind == RegPlusOffset) && !Dereference && (Offset == 0));
154+
}
151155

152156
std::optional<DWARFExpression> getDWARFExpressionBytes() const {
153157
return Expr;

0 commit comments

Comments
 (0)