Skip to content

Commit 6e05f36

Browse files
committed
[lld][LoongArch] GOT indirection to PC relative optimization.
In LoongArch, this optimization is only supported when relaxation is enabled. From: * pcalau12i $a0, %got_pc_hi20(sym_got) * ld.w/d $a0, $a0, %got_pc_lo12(sym_got) To: * pcalau12i $a0, %pc_hi20(sym) * addi.w/d $a0, $a0, %pc_lo12(sym) If the original code sequence can be relaxed into a single instruction `pcaddi`, this patch will not be taken (see https://). The implementation related to `got` is split into two locations because the `relax()` function is part of an iteration fixed-point algorithm. We should minimize it to achieve better linker performance. FIXME: Althouth the optimization has been performed, the GOT entries still exists, similarly to AArch64. Eliminating the entries may be require additional marking in the common code.
1 parent e024b7c commit 6e05f36

File tree

2 files changed

+72
-4
lines changed

2 files changed

+72
-4
lines changed

lld/ELF/Arch/LoongArch.cpp

+66
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class LoongArch final : public TargetInfo {
4747
void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
4848
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
4949
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
50+
bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
51+
const Relocation &rLo12, uint64_t secAddr) const;
5052
};
5153
} // end anonymous namespace
5254

@@ -1150,6 +1152,54 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
11501152
}
11511153
}
11521154

1155+
// Try GOT indirection to PC relative optimization when relaxation is enabled.
1156+
// From:
1157+
// * pcalau12i $a0, %got_pc_hi20(sym_got)
1158+
// * ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
1159+
// To:
1160+
// * pcalau12i $a0, %pc_hi20(sym)
1161+
// * addi.w/d $a0, $a0, %pc_lo12(sym)
1162+
//
1163+
// FIXME: Althouth the optimization has been performed, the GOT entries still
1164+
// exists, similarly to AArch64. Eliminating the entries may be require
1165+
// additional marking in the common code.
1166+
bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
1167+
const Relocation &rLo12, uint64_t secAddr) const {
1168+
if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
1169+
rHi20.sym->isGnuIFunc() ||
1170+
(ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
1171+
return false;
1172+
1173+
Symbol &sym = *rHi20.sym;
1174+
uint64_t symLocal = sym.getVA(ctx) + rHi20.addend;
1175+
// Check if the address difference is within +/-2GB range.
1176+
// For simplicity, the range mentioned here is an approximate estimate and is
1177+
// not fully equivalent to the entire region that PC-relative addressing can
1178+
// cover.
1179+
int64_t pageOffset =
1180+
getLoongArchPage(symLocal) - getLoongArchPage(secAddr + rHi20.offset);
1181+
if (!isInt<20>(pageOffset >> 12))
1182+
return false;
1183+
1184+
Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
1185+
rHi20.addend, &sym};
1186+
Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
1187+
&sym};
1188+
1189+
const uint32_t currInsn = read32le(loc);
1190+
const uint32_t nextInsn = read32le(loc + 4);
1191+
uint64_t pageDelta =
1192+
getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
1193+
// pcalau12i $a0, %pc_hi20
1194+
write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0));
1195+
relocate(loc, newRHi20, pageDelta);
1196+
// addi.w/d $a0, $a0, %pc_lo12
1197+
write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn),
1198+
getJ5(nextInsn), 0));
1199+
relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64));
1200+
return true;
1201+
}
1202+
11531203
// During TLSDESC GD_TO_IE, the converted code sequence always includes an
11541204
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
11551205
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
@@ -1259,6 +1309,22 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
12591309
tlsdescToLe(loc, rel, val);
12601310
}
12611311
continue;
1312+
case RE_LOONGARCH_GOT_PAGE_PC:
1313+
// In LoongArch, we try GOT indirection to PC relative optimization only
1314+
// when relaxation is enabled. This approach avoids determining whether
1315+
// relocation types are paired and whether the destination register of
1316+
// pcalau12i is only used by the immediately following instruction.
1317+
// Moreover, if the original code sequence can be relaxed to a single
1318+
// instruction `pcaddi`, the first instruction will be removed and it will
1319+
// not reach here.
1320+
if (isPairRelaxable(relocs, i) && rel.type == R_LARCH_GOT_PC_HI20 &&
1321+
relocs[i + 2].type == R_LARCH_GOT_PC_LO12 &&
1322+
tryGotToPCRel(loc, rel, relocs[i + 2], secAddr)) {
1323+
i = i + 3; // skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12,
1324+
// R_LARCH_RELAX
1325+
continue;
1326+
}
1327+
break;
12621328
default:
12631329
break;
12641330
}

lld/test/ELF/loongarch-relax-pc-hi20-lo12.s

+6-4
Original file line numberDiff line numberDiff line change
@@ -30,24 +30,26 @@
3030
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
3131
# NORELAX32-NEXT: 10000: pcalau12i $a0, 1024
3232
# NORELAX32-NEXT: addi.w $a0, $a0, 0
33+
## Not relaxation, convertion to PCRel.
3334
# NORELAX32-NEXT: pcalau12i $a0, 1024
34-
# NORELAX32-NEXT: ld.w $a0, $a0, 4
35+
# NORELAX32-NEXT: addi.w $a0, $a0, 0
3536
# NORELAX32-NEXT: pcalau12i $a0, 1024
3637
# NORELAX32-NEXT: addi.w $a0, $a0, 0
3738
# NORELAX32-NEXT: pcalau12i $a0, 1024
38-
# NORELAX32-NEXT: ld.w $a0, $a0, 4
39+
# NORELAX32-NEXT: addi.w $a0, $a0, 0
3940

4041
# NORELAX64-LABEL: <_start>:
4142
## offset exceed range of pcaddi
4243
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
4344
# NORELAX64-NEXT: 10000: pcalau12i $a0, 1024
4445
# NORELAX64-NEXT: addi.d $a0, $a0, 0
46+
## Not relaxation, convertion to PCRel.
4547
# NORELAX64-NEXT: pcalau12i $a0, 1024
46-
# NORELAX64-NEXT: ld.d $a0, $a0, 8
48+
# NORELAX64-NEXT: addi.d $a0, $a0, 0
4749
# NORELAX64-NEXT: pcalau12i $a0, 1024
4850
# NORELAX64-NEXT: addi.d $a0, $a0, 0
4951
# NORELAX64-NEXT: pcalau12i $a0, 1024
50-
# NORELAX64-NEXT: ld.d $a0, $a0, 8
52+
# NORELAX64-NEXT: addi.d $a0, $a0, 0
5153

5254
.section .text
5355
.global _start

0 commit comments

Comments
 (0)