Skip to content

Commit 75424f6

Browse files
committed
write .debug_str and .debug_str_offsets to dwp
DWARF packages need to merge the `.debug_str` sections of input DWARF objects. `.debug_str_offsets` sections then need to be rebuilt with offsets into the new merged `.debug_str` section and then concatenated (indices into each dwarf object's offset list will therefore still refer to the same string). A merged `.debug_str` section is now output to the dwarf package, and rebuilt `.debug_str_offsets` for each input DWARF object. Signed-off-by: David Wood <david.wood@huawei.com>
1 parent ade9015 commit 75424f6

File tree

3 files changed

+189
-25
lines changed

3 files changed

+189
-25
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ edition = "2021"
66
[dependencies]
77
anyhow = "1.0.45"
88
gimli = "0.26.1"
9+
indexmap = "1.7.0"
910
memmap2 = "0.5.0"
1011
object = { version = "0.27.1", features = [ "read", "write", "compression" ] }
1112
structopt = "0.3.25"

src/main.rs

Lines changed: 187 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
11
use crate::relocate::{add_relocations, Relocate, RelocationMap};
22
use anyhow::{anyhow, Context, Result};
3-
use gimli::{DebugAddr, DwoId, EndianSlice, RunTimeEndian, UnitType};
3+
use gimli::{
4+
write::{EndianVec, Writer},
5+
DebugAddr, DebugStrOffset, DebugStrOffsetsBase, DebugStrOffsetsIndex, DwarfFileType, DwoId,
6+
EndianSlice, Format, Reader, RunTimeEndian, UnitType,
7+
};
8+
use indexmap::IndexSet;
49
use memmap2::Mmap;
510
use object::{
611
write::{self, SectionId, StreamingBuffer},
712
Architecture, BinaryFormat, Endianness, Object, ObjectSection, SectionKind,
813
};
914
use std::borrow::{Borrow, Cow};
15+
use std::collections::HashMap;
1016
use std::fmt;
1117
use std::fs;
1218
use std::io::{self, BufWriter, Write};
1319
use std::path::{Path, PathBuf};
1420
use structopt::StructOpt;
1521
use thiserror::Error;
16-
use tracing::trace;
22+
use tracing::{debug, trace};
1723
use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Registry};
1824
use tracing_tree::HierarchicalLayer;
1925
use typed_arena::Arena;
@@ -40,6 +46,10 @@ enum DwpError {
4046
DwarfObjectMissingSection(String),
4147
#[error("failed to create output file")]
4248
FailedToCreateOutputFile,
49+
#[error("dwarf object has no units")]
50+
DwarfObjectWithNoUnits,
51+
#[error("str offset value out of range of entry size")]
52+
DwpStrOffsetOutOfRange,
4353
}
4454

4555
/// In-progress DWARF package output being produced.
@@ -54,6 +64,10 @@ struct DwpOutputObject<'file> {
5464
debug_loclists: SectionId,
5565
/// Identifier for the `.debug_rnglists.dwo` section in the object file being created.
5666
debug_rnglists: SectionId,
67+
/// Identifier for the `.debug_str.dwo` section in the object file being created.
68+
debug_str: SectionId,
69+
/// Identifier for the `.debug_str_offsets.dwo` section in the object file being created.
70+
debug_str_offsets: SectionId,
5771
}
5872

5973
/// A DWARF object referenced by input object.
@@ -80,6 +94,68 @@ impl fmt::Debug for TargetDwarfObject {
8094
}
8195
}
8296

97+
/// New-type'd index from `IndexVec` of strings inserted into the `.debug_str` section.
98+
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
99+
struct DwpStringId(usize);
100+
101+
/// DWARF packages need to merge the `.debug_str` sections of input DWARF objects.
102+
/// `.debug_str_offsets` sections then need to be rebuilt with offsets into the new merged
103+
/// `.debug_str` section and then concatenated (indices into each dwarf object's offset list will
104+
/// therefore still refer to the same string).
105+
///
106+
/// Gimli's `StringTable` produces a `.debug_str` section with a single `.debug_str_offsets`
107+
/// section, but `DwpStringTable` accumulates a single `.debug_str` section and can be used to
108+
/// produce multiple `.debug_str_offsets` sections (which will be concatenated) which all offset
109+
/// into the same `.debug_str`.
110+
struct DwpStringTable<E: gimli::Endianity> {
111+
debug_str: gimli::write::DebugStr<EndianVec<E>>,
112+
strings: IndexSet<Vec<u8>>,
113+
offsets: HashMap<DwpStringId, DebugStrOffset>,
114+
}
115+
116+
impl<E: gimli::Endianity> DwpStringTable<E> {
117+
/// Create a new `DwpStringTable` with a given endianity.
118+
fn new(endianity: E) -> Self {
119+
Self {
120+
debug_str: gimli::write::DebugStr(EndianVec::new(endianity)),
121+
strings: IndexSet::new(),
122+
offsets: HashMap::new(),
123+
}
124+
}
125+
126+
/// Insert a string into the string table and return its offset in the table. If the string is
127+
/// already in the table, returns its offset.
128+
fn get_or_insert<T: Into<Vec<u8>>>(&mut self, bytes: T) -> Result<DebugStrOffset> {
129+
let bytes = bytes.into();
130+
assert!(!bytes.contains(&0));
131+
let (index, is_new) = self.strings.insert_full(bytes.clone());
132+
let index = DwpStringId(index);
133+
if !is_new {
134+
return Ok(*self
135+
.offsets
136+
.get(&index)
137+
.expect("insert exists but no offset"));
138+
}
139+
140+
// Keep track of the offset for this string, it might be referenced by the next compilation
141+
// unit too.
142+
let offset = self.debug_str.offset();
143+
self.offsets.insert(index, offset);
144+
145+
// Insert into the string table.
146+
self.debug_str.write(&bytes)?;
147+
self.debug_str.write_u8(0)?;
148+
149+
Ok(offset)
150+
}
151+
152+
/// Write the accumulated `.debug_str` section to an object file, returns the offset of the
153+
/// section in the object.
154+
fn write<'file>(self, obj: &mut write::Object<'file>, section: SectionId) -> u64 {
155+
obj.append_section_data(section, &self.debug_str.0.into_vec(), 1)
156+
}
157+
}
158+
83159
#[derive(Debug, StructOpt)]
84160
#[structopt(name = "rust-dwp", about = "merge split dwarf (.dwo) files")]
85161
struct Opt {
@@ -233,10 +309,7 @@ fn parse_executable<'input, 'arena: 'input>(
233309
obj: &object::File<'input>,
234310
arena_data: &'arena Arena<Cow<'input, [u8]>>,
235311
arena_relocations: &'arena Arena<RelocationMap>,
236-
) -> Result<(
237-
gimli::read::DebugAddr<DwpReader<'arena>>,
238-
Vec<TargetDwarfObject>,
239-
)> {
312+
) -> Result<(DebugAddr<DwpReader<'arena>>, Vec<TargetDwarfObject>)> {
240313
let mut dwarf_objects = Vec::new();
241314

242315
let mut load_section = |id: gimli::SectionId| -> Result<_> {
@@ -279,25 +352,114 @@ fn create_output_object<'file>(
279352
let debug_line = add_section(gimli::SectionId::DebugLine);
280353
let debug_loclists = add_section(gimli::SectionId::DebugLocLists);
281354
let debug_rnglists = add_section(gimli::SectionId::DebugRngLists);
355+
let debug_str = add_section(gimli::SectionId::DebugStr);
356+
let debug_str_offsets = add_section(gimli::SectionId::DebugStrOffsets);
282357

283358
Ok(DwpOutputObject {
284359
obj,
285360
debug_abbrev,
286361
debug_line,
287362
debug_loclists,
288363
debug_rnglists,
364+
debug_str,
365+
debug_str_offsets,
289366
})
290367
}
291368

369+
/// Read the string offsets from `.debug_str_offsets.dwo` in the DWARF object, adding each to the
370+
/// in-progress `.debug_str` (`DwpStringTable`) and building a new `.debug_str_offsets.dwo` to be
371+
/// the current DWARF object's contribution to the DWARF package.
372+
#[tracing::instrument(level = "trace", skip(dwo_obj, dwo_dwarf, string_table, output))]
373+
fn append_debug_str_offset<'input, 'output, 'arena: 'input, Endian: gimli::Endianity>(
374+
dwo_obj: &object::File<'input>,
375+
dwo_dwarf: &gimli::Dwarf<DwpReader<'arena>>,
376+
string_table: &mut DwpStringTable<Endian>,
377+
output: &mut DwpOutputObject<'output>,
378+
) -> Result<u64> {
379+
let mut data = EndianVec::new(runtime_endian_of_object(dwo_obj));
380+
381+
let root_header = dwo_dwarf
382+
.units()
383+
.next()?
384+
.context(DwpError::DwarfObjectWithNoUnits)?;
385+
let encoding = root_header.encoding();
386+
let base = DebugStrOffsetsBase::default_for_encoding_and_file(encoding, DwarfFileType::Dwo);
387+
388+
let section_name = gimli::SectionId::DebugStrOffsets.dwo_name().unwrap();
389+
let section = dwo_obj
390+
.section_by_name(section_name)
391+
.with_context(|| DwpError::DwarfObjectMissingSection(section_name.to_string()))?;
392+
let section_size = section.size();
393+
394+
let entry_size = match encoding.format {
395+
Format::Dwarf32 => 4,
396+
Format::Dwarf64 => 8,
397+
};
398+
399+
debug!(
400+
?section_size,
401+
str_offset_size_num_elements = section_size / entry_size
402+
);
403+
for i in 0..(section_size / entry_size) {
404+
let dwo_index = DebugStrOffsetsIndex(i as usize);
405+
let dwo_offset =
406+
dwo_dwarf
407+
.debug_str_offsets
408+
.get_str_offset(encoding.format, base, dwo_index)?;
409+
let dwo_str = dwo_dwarf.debug_str.get_str(dwo_offset)?;
410+
let dwo_str = dwo_str.to_string()?;
411+
412+
let dwp_offset = string_table.get_or_insert(dwo_str.as_ref())?;
413+
debug!(
414+
?i,
415+
?dwo_str,
416+
"dwo_offset={:#x} dwp_offset={:#x}",
417+
dwo_offset.0,
418+
dwp_offset.0
419+
);
420+
421+
match encoding.format {
422+
Format::Dwarf32 => {
423+
data.write_u32(
424+
dwp_offset
425+
.0
426+
.try_into()
427+
.context(DwpError::DwpStrOffsetOutOfRange)?,
428+
)?;
429+
}
430+
Format::Dwarf64 => {
431+
data.write_u64(
432+
dwp_offset
433+
.0
434+
.try_into()
435+
.context(DwpError::DwpStrOffsetOutOfRange)?,
436+
)?;
437+
}
438+
}
439+
}
440+
441+
Ok(output
442+
.obj
443+
.append_section_data(output.debug_str_offsets, &data.into_vec(), section.align()))
444+
}
445+
292446
/// Process a DWARF object. Copies relevant sections, compilation/type units and strings from DWARF
293447
/// object into output object.
294448
#[tracing::instrument(
295449
level = "trace",
296-
skip(parent_debug_addr, output, arena_data, arena_mmap, arena_relocations)
450+
skip(
451+
parent_debug_addr,
452+
string_table,
453+
output,
454+
arena_data,
455+
arena_mmap,
456+
arena_relocations
457+
)
297458
)]
298-
fn process_dwarf_object<'input, 'output, 'arena: 'input>(
459+
fn process_dwarf_object<'input, 'output, 'arena: 'input, Endian: gimli::Endianity>(
299460
parent_debug_addr: DebugAddr<DwpReader<'arena>>,
300461
dwo: TargetDwarfObject,
462+
string_table: &mut DwpStringTable<Endian>,
301463
output: &mut DwpOutputObject<'output>,
302464
arena_data: &'arena Arena<Cow<'input, [u8]>>,
303465
arena_mmap: &'arena Arena<Mmap>,
@@ -309,31 +471,25 @@ fn process_dwarf_object<'input, 'output, 'arena: 'input>(
309471
load_file_section(id, &dwo_obj, true, &arena_data, &arena_relocations)
310472
};
311473

312-
let mut dwarf = gimli::Dwarf::load(&mut load_dwo_section)?;
313-
dwarf.debug_addr = parent_debug_addr.clone();
314-
let mut iter = dwarf.units();
315-
while let Some(header) = iter.next()? {
316-
let unit = dwarf.unit(header)?;
474+
let mut dwo_dwarf = gimli::Dwarf::load(&mut load_dwo_section)?;
475+
dwo_dwarf.debug_addr = parent_debug_addr.clone();
317476

318-
if matches!(dwo_id_of_unit(&unit), Some(dwo_id) if dwo_id == dwo.dwo_id) {
319-
trace!("match!");
320-
}
321-
}
322-
323-
let mut append_section_data = |gimli_id: gimli::SectionId, obj_id: SectionId| -> Result<u64> {
324-
let name = gimli_id.dwo_name().unwrap();
477+
let mut append_from_to = |from_id: gimli::SectionId, to_id: SectionId| -> Result<u64> {
478+
let name = from_id.dwo_name().unwrap();
325479
let section = dwo_obj
326480
.section_by_name(name)
327481
.with_context(|| DwpError::DwarfObjectMissingSection(name.to_string()))?;
328482
Ok(output
329483
.obj
330-
.append_section_data(obj_id, section.data()?, section.align()))
484+
.append_section_data(to_id, section.data()?, section.align()))
331485
};
332486

333-
let _ = append_section_data(gimli::SectionId::DebugAbbrev, output.debug_abbrev);
334-
let _ = append_section_data(gimli::SectionId::DebugLine, output.debug_line);
335-
let _ = append_section_data(gimli::SectionId::DebugLocLists, output.debug_loclists);
336-
let _ = append_section_data(gimli::SectionId::DebugRngLists, output.debug_rnglists);
487+
let _ = append_from_to(gimli::SectionId::DebugAbbrev, output.debug_abbrev);
488+
let _ = append_from_to(gimli::SectionId::DebugLine, output.debug_line);
489+
let _ = append_from_to(gimli::SectionId::DebugLocLists, output.debug_loclists);
490+
let _ = append_from_to(gimli::SectionId::DebugRngLists, output.debug_rnglists);
491+
492+
let _ = append_debug_str_offset(&dwo_obj, &dwo_dwarf, string_table, output)?;
337493

338494
Ok(())
339495
}
@@ -362,17 +518,23 @@ fn main() -> Result<()> {
362518
parse_executable(&obj, &arena_data, &arena_relocations)?;
363519

364520
let mut output = create_output_object(obj.architecture(), obj.endianness())?;
521+
let mut string_table = DwpStringTable::new(runtime_endian_of_object(&obj));
522+
365523
for dwo in dwarf_objects {
366524
process_dwarf_object(
367525
parent_debug_addr.clone(),
368526
dwo,
527+
&mut string_table,
369528
&mut output,
370529
&arena_data,
371530
&arena_mmap,
372531
&arena_relocations,
373532
)?;
374533
}
375534

535+
// Write the merged string table to the `.debug_str.dwo` section.
536+
let _ = string_table.write(&mut output.obj, output.debug_str);
537+
376538
let mut output_stream = StreamingBuffer::new(BufWriter::new(
377539
fs::File::create(opt.output).context(DwpError::FailedToCreateOutputFile)?,
378540
));

0 commit comments

Comments
 (0)