Skip to content

Commit 46536f1

Browse files
committed
Auto merge of rust-lang#137426 - DianQK:link-used, r=<try>
Link object files that use `#[used]` By directly linking the object files that use `#[used]`, we ensure the linker can see them. This approach allows `#[used]` to avoid modifying symbol visibility, preserving local symbols. A similar example in C would be: ```c // foo.c __attribute__((constructor)) static void foo() {} // main.c void main(void) {} ``` If `foo.c` is placed in a static library, it will never be loaded unless the entire static library is fully loaded by `--whole-archive`. This pull request removes some of the symbols in `symbols.o`. We can remove more symbols in a follow-up PR.
2 parents 9af8985 + bb77554 commit 46536f1

File tree

15 files changed

+282
-49
lines changed

15 files changed

+282
-49
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

+50
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::{env, fmt, fs, io, mem, str};
99

1010
use cc::windows_registry;
1111
use itertools::Itertools;
12+
use object::read::archive::ArchiveFile;
1213
use regex::Regex;
1314
use rustc_arena::TypedArena;
1415
use rustc_ast::CRATE_NODE_ID;
@@ -78,6 +79,7 @@ pub fn link_binary(
7879
let _timer = sess.timer("link_binary");
7980
let output_metadata = sess.opts.output_types.contains_key(&OutputType::Metadata);
8081
let mut tempfiles_for_stdout_output: Vec<PathBuf> = Vec::new();
82+
let mut tempfiles_for_linked_objects: Vec<PathBuf> = Vec::new();
8183
for &crate_type in &codegen_results.crate_info.crate_types {
8284
// Ignore executable crates if we have -Z no-codegen, as they will error.
8385
if (sess.opts.unstable_opts.no_codegen || !sess.opts.output_types.should_codegen())
@@ -142,6 +144,8 @@ pub fn link_binary(
142144
&out_filename,
143145
&codegen_results,
144146
path.as_ref(),
147+
&mut tempfiles_for_linked_objects,
148+
outputs,
145149
);
146150
}
147151
}
@@ -214,6 +218,10 @@ pub fn link_binary(
214218
ensure_removed(sess.dcx(), &temp);
215219
}
216220

221+
for temp in tempfiles_for_linked_objects {
222+
ensure_removed(sess.dcx(), &temp);
223+
}
224+
217225
// If no requested outputs require linking, then the object temporaries should
218226
// be kept.
219227
if !sess.opts.output_types.should_link() {
@@ -765,6 +773,8 @@ fn link_natively(
765773
out_filename: &Path,
766774
codegen_results: &CodegenResults,
767775
tmpdir: &Path,
776+
tempfiles_for_linked_objects: &mut Vec<PathBuf>,
777+
outputs: &OutputFilenames,
768778
) {
769779
info!("preparing {:?} to {:?}", crate_type, out_filename);
770780
let (linker_path, flavor) = linker_and_flavor(sess);
@@ -789,6 +799,8 @@ fn link_natively(
789799
temp_filename,
790800
codegen_results,
791801
self_contained_components,
802+
tempfiles_for_linked_objects,
803+
outputs,
792804
);
793805

794806
linker::disable_localization(&mut cmd);
@@ -2248,6 +2260,8 @@ fn linker_with_args(
22482260
out_filename: &Path,
22492261
codegen_results: &CodegenResults,
22502262
self_contained_components: LinkSelfContainedComponents,
2263+
tempfiles_for_linked_objects: &mut Vec<PathBuf>,
2264+
outputs: &OutputFilenames,
22512265
) -> Command {
22522266
let self_contained_crt_objects = self_contained_components.is_crt_objects_enabled();
22532267
let cmd = &mut *super::linker::get_linker(
@@ -2323,6 +2337,13 @@ fn linker_with_args(
23232337
add_local_crate_regular_objects(cmd, codegen_results);
23242338
add_local_crate_metadata_objects(cmd, crate_type, codegen_results);
23252339
add_local_crate_allocator_objects(cmd, codegen_results);
2340+
add_local_crate_linked_objects(
2341+
cmd,
2342+
codegen_results,
2343+
crate_type,
2344+
tempfiles_for_linked_objects,
2345+
outputs,
2346+
);
23262347

23272348
// Avoid linking to dynamic libraries unless they satisfy some undefined symbols
23282349
// at the point at which they are specified on the command line.
@@ -2919,6 +2940,35 @@ fn rehome_lib_path(sess: &Session, path: &Path) -> PathBuf {
29192940
}
29202941
}
29212942

2943+
fn add_local_crate_linked_objects(
2944+
cmd: &mut dyn Linker,
2945+
codegen_results: &CodegenResults,
2946+
crate_type: CrateType,
2947+
tempfiles_for_linked_objects: &mut Vec<PathBuf>,
2948+
outputs: &OutputFilenames,
2949+
) {
2950+
for (cnum, objects) in &codegen_results.crate_info.linked_objects[&crate_type] {
2951+
let src = &codegen_results.crate_info.used_crate_source[cnum];
2952+
let cratepath = &src.rlib.as_ref().unwrap().0;
2953+
let archive_map = unsafe { Mmap::map(File::open(cratepath).unwrap()).unwrap() };
2954+
let archive = ArchiveFile::parse(&*archive_map)
2955+
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))
2956+
.unwrap();
2957+
for member in archive.members() {
2958+
let member = member.unwrap();
2959+
let name = std::str::from_utf8(member.name()).unwrap();
2960+
if objects.contains(name) {
2961+
let data = member.data(&*archive_map).unwrap();
2962+
let obj =
2963+
outputs.temp_path(OutputType::Object, Some(&format!("{name}.linked_object")));
2964+
fs::write(&obj, data).unwrap();
2965+
cmd.add_object(&obj);
2966+
tempfiles_for_linked_objects.push(obj);
2967+
}
2968+
}
2969+
}
2970+
}
2971+
29222972
// Adds the static "rlib" versions of all crates to the command line.
29232973
// There's a bit of magic which happens here specifically related to LTO,
29242974
// namely that we remove upstream object files.

compiler/rustc_codegen_ssa/src/back/linker.rs

+81-24
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@ use std::path::{Path, PathBuf};
55
use std::{env, io, iter, mem, str};
66

77
use cc::windows_registry;
8+
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
89
use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
910
use rustc_metadata::{
1011
find_native_static_library, try_find_native_dynamic_library, try_find_native_static_library,
1112
};
1213
use rustc_middle::bug;
1314
use rustc_middle::middle::dependency_format::Linkage;
14-
use rustc_middle::middle::exported_symbols;
15-
use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportKind};
15+
use rustc_middle::middle::exported_symbols::{
16+
self, ExportedSymbol, SymbolExportInfo, SymbolExportKind,
17+
};
1618
use rustc_middle::ty::TyCtxt;
1719
use rustc_session::Session;
1820
use rustc_session::config::{self, CrateType, DebugInfo, LinkerPluginLto, Lto, OptLevel, Strip};
@@ -21,6 +23,7 @@ use rustc_target::spec::{Cc, LinkOutputKind, LinkerFlavor, Lld};
2123
use tracing::{debug, warn};
2224

2325
use super::command::Command;
26+
use super::link::are_upstream_rust_objects_already_included;
2427
use super::symbol_export;
2528
use crate::errors;
2629

@@ -1753,17 +1756,15 @@ impl<'a> Linker for AixLinker<'a> {
17531756
fn for_each_exported_symbols_include_dep<'tcx>(
17541757
tcx: TyCtxt<'tcx>,
17551758
crate_type: CrateType,
1756-
mut callback: impl FnMut(ExportedSymbol<'tcx>, SymbolExportInfo, CrateNum),
1759+
mut callback: impl FnMut(&'tcx [(ExportedSymbol<'tcx>, SymbolExportInfo)], CrateNum),
17571760
) {
17581761
let formats = tcx.dependency_formats(());
17591762
let deps = &formats[&crate_type];
17601763

17611764
for (cnum, dep_format) in deps.iter_enumerated() {
17621765
// For each dependency that we are linking to statically ...
17631766
if *dep_format == Linkage::Static {
1764-
for &(symbol, info) in tcx.exported_symbols(cnum).iter() {
1765-
callback(symbol, info, cnum);
1766-
}
1767+
callback(tcx.exported_symbols(cnum), cnum);
17671768
}
17681769
}
17691770
}
@@ -1783,12 +1784,14 @@ pub(crate) fn exported_symbols(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec<St
17831784
fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec<String> {
17841785
let mut symbols = Vec::new();
17851786
let export_threshold = symbol_export::crates_export_threshold(&[crate_type]);
1786-
for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| {
1787-
if info.level.is_below_threshold(export_threshold) {
1788-
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
1789-
tcx, symbol, cnum,
1790-
));
1791-
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
1787+
for_each_exported_symbols_include_dep(tcx, crate_type, |exported_symbols, cnum| {
1788+
for &(symbol, info) in exported_symbols {
1789+
if info.level.is_below_threshold(export_threshold) {
1790+
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
1791+
tcx, symbol, cnum,
1792+
));
1793+
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
1794+
}
17921795
}
17931796
});
17941797

@@ -1808,30 +1811,84 @@ fn exported_symbols_for_proc_macro_crate(tcx: TyCtxt<'_>) -> Vec<String> {
18081811
vec![proc_macro_decls_name, metadata_symbol_name]
18091812
}
18101813

1811-
pub(crate) fn linked_symbols(
1814+
pub(crate) fn linked_objects(
18121815
tcx: TyCtxt<'_>,
18131816
crate_type: CrateType,
1814-
) -> Vec<(String, SymbolExportKind)> {
1817+
linked_symbols: &mut Vec<(String, SymbolExportKind)>,
1818+
) -> FxIndexMap<CrateNum, FxHashSet<String>> {
18151819
match crate_type {
18161820
CrateType::Executable | CrateType::Cdylib | CrateType::Dylib => (),
18171821
CrateType::Staticlib | CrateType::ProcMacro | CrateType::Rlib => {
1818-
return Vec::new();
1822+
return FxIndexMap::default();
18191823
}
18201824
}
18211825

1822-
let mut symbols = Vec::new();
1823-
1826+
let mut objects = FxIndexMap::default();
1827+
let upstream_rust_objects_already_included =
1828+
are_upstream_rust_objects_already_included(tcx.sess);
18241829
let export_threshold = symbol_export::crates_export_threshold(&[crate_type]);
1825-
for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| {
1826-
if info.level.is_below_threshold(export_threshold) || info.used {
1827-
symbols.push((
1828-
symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum),
1829-
info.kind,
1830-
));
1830+
for_each_exported_symbols_include_dep(tcx, crate_type, |exported_symbols, cnum| {
1831+
let exported_symbols = exported_symbols.iter().filter(|(_, info)| {
1832+
(!matches!(crate_type, CrateType::Executable)
1833+
&& info.level.is_below_threshold(export_threshold))
1834+
|| info.used
1835+
});
1836+
if cnum == LOCAL_CRATE {
1837+
// Since the local crate is always linked directly to object files, `#[used]` works as expected,
1838+
// we only need add undefined symbols.
1839+
linked_symbols.extend(
1840+
exported_symbols
1841+
.filter(|(symbol, _)| match symbol {
1842+
ExportedSymbol::NonGeneric { cgu, .. } => cgu.is_none(),
1843+
ExportedSymbol::Generic(..)
1844+
| ExportedSymbol::DropGlue(..)
1845+
| ExportedSymbol::AsyncDropGlueCtorShim(..) => false,
1846+
ExportedSymbol::ThreadLocalShim(_def_id) => false,
1847+
ExportedSymbol::NoDefId(..) => true,
1848+
})
1849+
.map(|&(symbol, info)| {
1850+
(
1851+
symbol_export::linking_symbol_name_for_instance_in_crate(
1852+
tcx, symbol, cnum,
1853+
),
1854+
info.kind,
1855+
)
1856+
}),
1857+
);
1858+
return;
1859+
}
1860+
if matches!(crate_type, CrateType::Executable) && tcx.is_compiler_builtins(cnum) {
1861+
return;
18311862
}
1863+
let lto = upstream_rust_objects_already_included;
1864+
let mut cgus = FxHashSet::default();
1865+
for &(symbol, info) in exported_symbols {
1866+
match symbol {
1867+
ExportedSymbol::NonGeneric { cgu: Some(cgu), .. } => {
1868+
if !lto {
1869+
cgus.insert(cgu.as_str().to_string());
1870+
}
1871+
}
1872+
ExportedSymbol::NonGeneric { cgu: None, .. } | ExportedSymbol::NoDefId(..) => {
1873+
// Unresolved symbols may come from external libraries.
1874+
linked_symbols.push((
1875+
symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum),
1876+
info.kind,
1877+
));
1878+
}
1879+
ExportedSymbol::Generic(..)
1880+
| ExportedSymbol::DropGlue(..)
1881+
| ExportedSymbol::AsyncDropGlueCtorShim(..)
1882+
| ExportedSymbol::ThreadLocalShim(..) => {}
1883+
};
1884+
}
1885+
if cgus.is_empty() {
1886+
return;
1887+
}
1888+
objects.insert(cnum, cgus);
18321889
});
18331890

1834-
symbols
1891+
objects
18351892
}
18361893

18371894
/// Much simplified and explicit CLI for the NVPTX linker. The linker operates

compiler/rustc_codegen_ssa/src/back/symbol_export.rs

+52-8
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
99
use rustc_middle::middle::exported_symbols::{
1010
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
1111
};
12+
use rustc_middle::mir::mono::{CodegenUnit, MonoItem};
1213
use rustc_middle::query::LocalCrate;
1314
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt};
1415
use rustc_middle::util::Providers;
15-
use rustc_session::config::{CrateType, OomStrategy};
16+
use rustc_session::config::{CrateType, OomStrategy, OutputFilenames, OutputType};
17+
use rustc_span::Symbol;
1618
use rustc_target::callconv::Conv;
1719
use rustc_target::spec::{SanitizerSet, TlsModel};
1820
use tracing::debug;
@@ -168,6 +170,36 @@ fn is_reachable_non_generic_provider_extern(tcx: TyCtxt<'_>, def_id: DefId) -> b
168170
tcx.reachable_non_generics(def_id.krate).contains_key(&def_id)
169171
}
170172

173+
fn find_codegen_unit<'tcx>(
174+
tcx: TyCtxt<'tcx>,
175+
codegen_units: &'tcx [CodegenUnit<'tcx>],
176+
outputs: &'tcx OutputFilenames,
177+
def_id: DefId,
178+
) -> Option<Symbol> {
179+
if !tcx.is_codegened_item(def_id) {
180+
return None;
181+
}
182+
let item = if tcx.is_static(def_id) {
183+
MonoItem::Static(def_id)
184+
} else {
185+
MonoItem::Fn(Instance::mono(tcx, def_id))
186+
};
187+
codegen_units.iter().find_map(|cgu| {
188+
if cgu.contains_item(&item) {
189+
Some(Symbol::intern(
190+
outputs
191+
.temp_path(OutputType::Object, Some(cgu.name().as_str()))
192+
.file_name()
193+
.unwrap()
194+
.to_str()
195+
.unwrap(),
196+
))
197+
} else {
198+
None
199+
}
200+
})
201+
}
202+
171203
fn exported_symbols_provider_local(
172204
tcx: TyCtxt<'_>,
173205
_: LocalCrate,
@@ -182,8 +214,20 @@ fn exported_symbols_provider_local(
182214
tcx.reachable_non_generics(LOCAL_CRATE).to_sorted(&hcx, true)
183215
});
184216

185-
let mut symbols: Vec<_> =
186-
sorted.iter().map(|&(&def_id, &info)| (ExportedSymbol::NonGeneric(def_id), info)).collect();
217+
let outputs = tcx.output_filenames(());
218+
let codegen_units = tcx.collect_and_partition_mono_items(()).codegen_units;
219+
let mut symbols: Vec<_> = sorted
220+
.iter()
221+
.map(|&(&def_id, &info)| {
222+
(
223+
ExportedSymbol::NonGeneric {
224+
def_id,
225+
cgu: find_codegen_unit(tcx, codegen_units, outputs, def_id),
226+
},
227+
info,
228+
)
229+
})
230+
.collect();
187231

188232
// Export TLS shims
189233
if !tcx.sess.target.dll_tls_export {
@@ -433,7 +477,7 @@ fn upstream_monomorphizations_provider(
433477
continue;
434478
}
435479
}
436-
ExportedSymbol::NonGeneric(..)
480+
ExportedSymbol::NonGeneric { .. }
437481
| ExportedSymbol::ThreadLocalShim(..)
438482
| ExportedSymbol::NoDefId(..) => {
439483
// These are no monomorphizations
@@ -545,7 +589,7 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>(
545589
// This is something instantiated in an upstream crate, so we have to use
546590
// the slower (because uncached) version of computing the symbol name.
547591
match symbol {
548-
ExportedSymbol::NonGeneric(def_id) => {
592+
ExportedSymbol::NonGeneric { def_id, .. } => {
549593
rustc_symbol_mangling::symbol_name_for_instance_in_crate(
550594
tcx,
551595
Instance::mono(tcx, def_id),
@@ -590,12 +634,12 @@ fn calling_convention_for_symbol<'tcx>(
590634
symbol: ExportedSymbol<'tcx>,
591635
) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) {
592636
let instance = match symbol {
593-
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
637+
ExportedSymbol::NonGeneric { def_id, .. } | ExportedSymbol::Generic(def_id, _)
594638
if tcx.is_static(def_id) =>
595639
{
596640
None
597641
}
598-
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
642+
ExportedSymbol::NonGeneric { def_id, .. } => Some(Instance::mono(tcx, def_id)),
599643
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
600644
// DropGlue always use the Rust calling convention and thus follow the target's default
601645
// symbol decoration scheme.
@@ -711,7 +755,7 @@ fn maybe_emutls_symbol_name<'tcx>(
711755
undecorated: &str,
712756
) -> Option<String> {
713757
if matches!(tcx.sess.tls_model(), TlsModel::Emulated)
714-
&& let ExportedSymbol::NonGeneric(def_id) = symbol
758+
&& let ExportedSymbol::NonGeneric { def_id, .. } = symbol
715759
&& tcx.is_thread_local_static(def_id)
716760
{
717761
// When using emutls, LLVM will add the `__emutls_v.` prefix to thread local symbols,

0 commit comments

Comments
 (0)