Skip to content

Commit 4045c94

Browse files
Support reading and writing data access profiles in memprof v4.
1 parent 6dd04e4 commit 4045c94

File tree

14 files changed

+235
-31
lines changed

14 files changed

+235
-31
lines changed

llvm/include/llvm/ProfileData/DataAccessProf.h

+9-3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ namespace data_access_prof {
4141
struct SourceLocation {
4242
SourceLocation(StringRef FileNameRef, uint32_t Line)
4343
: FileName(FileNameRef.str()), Line(Line) {}
44+
45+
SourceLocation() {}
4446
/// The filename where the data is located.
4547
std::string FileName;
4648
/// The line number in the source code.
@@ -53,6 +55,8 @@ namespace internal {
5355
// which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
5456
// to represent data locations internally.
5557
struct SourceLocationRef {
58+
SourceLocationRef(StringRef FileNameRef, uint32_t Line)
59+
: FileName(FileNameRef), Line(Line) {}
5660
// The filename where the data is located.
5761
StringRef FileName;
5862
// The line number in the source code.
@@ -100,8 +104,9 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
100104
/// The data access profiles for a symbol.
101105
struct DataAccessProfRecord {
102106
public:
103-
DataAccessProfRecord(SymbolHandleRef SymHandleRef,
104-
ArrayRef<internal::SourceLocationRef> LocRefs) {
107+
DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
108+
ArrayRef<internal::SourceLocationRef> LocRefs)
109+
: AccessCount(AccessCount) {
105110
if (std::holds_alternative<StringRef>(SymHandleRef)) {
106111
SymHandle = std::get<StringRef>(SymHandleRef).str();
107112
} else
@@ -110,8 +115,9 @@ struct DataAccessProfRecord {
110115
for (auto Loc : LocRefs)
111116
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
112117
}
118+
DataAccessProfRecord() {}
113119
SymbolHandle SymHandle;
114-
120+
uint64_t AccessCount;
115121
// The locations of data in the source code. Optional.
116122
SmallVector<SourceLocation> Locations;
117123
};

llvm/include/llvm/ProfileData/IndexedMemProfData.h

+9-3
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,20 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "llvm/ProfileData/DataAccessProf.h"
1314
#include "llvm/ProfileData/InstrProf.h"
1415
#include "llvm/ProfileData/MemProf.h"
1516

17+
#include <functional>
18+
#include <optional>
19+
1620
namespace llvm {
1721

1822
// Write the MemProf data to OS.
19-
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
20-
memprof::IndexedVersion MemProfVersionRequested,
21-
bool MemProfFullSchema);
23+
Error writeMemProf(
24+
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
25+
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
26+
std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
27+
DataAccessProfileData);
2228

2329
} // namespace llvm

llvm/include/llvm/ProfileData/InstrProfReader.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/StringRef.h"
1919
#include "llvm/IR/ProfileSummary.h"
2020
#include "llvm/Object/BuildID.h"
21+
#include "llvm/ProfileData/DataAccessProf.h"
2122
#include "llvm/ProfileData/InstrProf.h"
2223
#include "llvm/ProfileData/InstrProfCorrelator.h"
2324
#include "llvm/ProfileData/MemProf.h"
@@ -703,10 +704,13 @@ class IndexedMemProfReader {
703704
const unsigned char *CallStackBase = nullptr;
704705
// The number of elements in the radix tree array.
705706
unsigned RadixTreeSize = 0;
707+
/// The data access profiles, deserialized from binary data.
708+
std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
706709

707710
Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
708711
Error deserializeRadixTreeBased(const unsigned char *Start,
709-
const unsigned char *Ptr);
712+
const unsigned char *Ptr,
713+
memprof::IndexedVersion Version);
710714

711715
public:
712716
IndexedMemProfReader() = default;

llvm/include/llvm/ProfileData/InstrProfWriter.h

+6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/StringMap.h"
2020
#include "llvm/IR/GlobalValue.h"
2121
#include "llvm/Object/BuildID.h"
22+
#include "llvm/ProfileData/DataAccessProf.h"
2223
#include "llvm/ProfileData/InstrProf.h"
2324
#include "llvm/ProfileData/MemProf.h"
2425
#include "llvm/Support/Error.h"
@@ -81,6 +82,8 @@ class InstrProfWriter {
8182
// Whether to generated random memprof hotness for testing.
8283
bool MemprofGenerateRandomHotness;
8384

85+
std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
86+
8487
public:
8588
// For memprof testing, random hotness can be assigned to the contexts if
8689
// MemprofGenerateRandomHotness is enabled. The random seed can be either
@@ -122,6 +125,9 @@ class InstrProfWriter {
122125
// Add a binary id to the binary ids list.
123126
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
124127

128+
void addDataAccessProfData(
129+
std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfile);
130+
125131
/// Merge existing function counts from the given writer.
126132
void mergeRecordsFromWriter(InstrProfWriter &&IPW,
127133
function_ref<void(Error)> Warn);

llvm/include/llvm/ProfileData/MemProfReader.h

+15
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,21 @@ class YAMLMemProfReader final : public MemProfReader {
228228
create(std::unique_ptr<MemoryBuffer> Buffer);
229229

230230
void parse(StringRef YAMLData);
231+
232+
std::unique_ptr<data_access_prof::DataAccessProfData>
233+
takeDataAccessProfData() {
234+
return std::move(DataAccessProfileData);
235+
}
236+
237+
private:
238+
// Called by `parse` to set data access profiles after parsing them from Yaml
239+
// files.
240+
void setDataAccessProfileData(
241+
std::unique_ptr<data_access_prof::DataAccessProfData> Data) {
242+
DataAccessProfileData = std::move(Data);
243+
}
244+
245+
std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
231246
};
232247
} // namespace memprof
233248
} // namespace llvm

llvm/include/llvm/ProfileData/MemProfYAML.h

+58
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define LLVM_PROFILEDATA_MEMPROFYAML_H_
33

44
#include "llvm/ADT/SmallVector.h"
5+
#include "llvm/ProfileData/DataAccessProf.h"
56
#include "llvm/ProfileData/MemProf.h"
67
#include "llvm/Support/Format.h"
78
#include "llvm/Support/YAMLTraits.h"
@@ -20,9 +21,24 @@ struct GUIDMemProfRecordPair {
2021
MemProfRecord Record;
2122
};
2223

24+
// Helper struct to yamlify data_access_prof::DataAccessProfData. The struct
25+
// members use owned strings. This is for simplicity and assumes that most real
26+
// world use cases do look-ups and regression test scale is small.
27+
struct YamlDataAccessProfData {
28+
std::vector<data_access_prof::DataAccessProfRecord> Records;
29+
std::vector<uint64_t> KnownColdHashes;
30+
std::vector<std::string> KnownColdSymbols;
31+
32+
bool isEmpty() const {
33+
return Records.empty() && KnownColdHashes.empty() &&
34+
KnownColdSymbols.empty();
35+
}
36+
};
37+
2338
// The top-level data structure, only used with YAML for now.
2439
struct AllMemProfData {
2540
std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
41+
YamlDataAccessProfData YamlifiedDataAccessProfiles;
2642
};
2743
} // namespace memprof
2844

@@ -206,9 +222,49 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
206222
}
207223
};
208224

225+
template <> struct MappingTraits<data_access_prof::SourceLocation> {
226+
static void mapping(IO &Io, data_access_prof::SourceLocation &Loc) {
227+
Io.mapOptional("FileName", Loc.FileName);
228+
Io.mapOptional("Line", Loc.Line);
229+
}
230+
};
231+
232+
template <> struct MappingTraits<data_access_prof::DataAccessProfRecord> {
233+
static void mapping(IO &Io, data_access_prof::DataAccessProfRecord &Rec) {
234+
if (Io.outputting()) {
235+
if (std::holds_alternative<std::string>(Rec.SymHandle)) {
236+
Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
237+
} else {
238+
Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
239+
}
240+
} else {
241+
std::string SymName;
242+
uint64_t Hash = 0;
243+
Io.mapOptional("Symbol", SymName);
244+
Io.mapOptional("Hash", Hash);
245+
if (!SymName.empty()) {
246+
Rec.SymHandle = SymName;
247+
} else {
248+
Rec.SymHandle = Hash;
249+
}
250+
}
251+
252+
Io.mapOptional("Locations", Rec.Locations);
253+
}
254+
};
255+
256+
template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
257+
static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
258+
Io.mapOptional("SampledRecords", Data.Records);
259+
Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
260+
Io.mapOptional("KnownColdHashes", Data.KnownColdHashes);
261+
}
262+
};
263+
209264
template <> struct MappingTraits<memprof::AllMemProfData> {
210265
static void mapping(IO &Io, memprof::AllMemProfData &Data) {
211266
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
267+
Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
212268
}
213269
};
214270

@@ -234,5 +290,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
234290
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
235291
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
236292
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
293+
LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::DataAccessProfRecord)
294+
LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::SourceLocation)
237295

238296
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_

llvm/lib/ProfileData/DataAccessProf.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
4848

4949
auto It = Records.find(Key);
5050
if (It != Records.end()) {
51-
return DataAccessProfRecord(Key, It->second.Locations);
51+
return DataAccessProfRecord(Key, It->second.AccessCount,
52+
It->second.Locations);
5253
}
5354

5455
return std::nullopt;
@@ -111,7 +112,8 @@ Error DataAccessProfData::addKnownSymbolWithoutSamples(
111112
auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID));
112113
if (!CanonicalName)
113114
return CanonicalName.takeError();
114-
KnownColdSymbols.insert(*CanonicalName);
115+
KnownColdSymbols.insert(
116+
saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first);
115117
return Error::success();
116118
}
117119

llvm/lib/ProfileData/IndexedMemProfData.cpp

+48-13
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "llvm/ProfileData/DataAccessProf.h"
1314
#include "llvm/ProfileData/InstrProf.h"
1415
#include "llvm/ProfileData/InstrProfReader.h"
1516
#include "llvm/ProfileData/MemProf.h"
@@ -216,7 +217,9 @@ static Error writeMemProfV2(ProfOStream &OS,
216217

217218
static Error writeMemProfRadixTreeBased(
218219
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
219-
memprof::IndexedVersion Version, bool MemProfFullSchema) {
220+
memprof::IndexedVersion Version, bool MemProfFullSchema,
221+
std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
222+
DataAccessProfileData) {
220223
assert((Version == memprof::Version3 || Version == memprof::Version4) &&
221224
"Unsupported version for radix tree format");
222225

@@ -225,6 +228,8 @@ static Error writeMemProfRadixTreeBased(
225228
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
226229
OS.write(0ULL); // Reserve space for the memprof record payload offset.
227230
OS.write(0ULL); // Reserve space for the memprof record table offset.
231+
if (Version == memprof::Version4)
232+
OS.write(0ULL); // Reserve space for the data access profile offset.
228233

229234
auto Schema = memprof::getHotColdSchema();
230235
if (MemProfFullSchema)
@@ -251,17 +256,26 @@ static Error writeMemProfRadixTreeBased(
251256
uint64_t RecordTableOffset = writeMemProfRecords(
252257
OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
253258

259+
uint64_t DataAccessProfOffset = 0;
260+
if (DataAccessProfileData.has_value()) {
261+
DataAccessProfOffset = OS.tell();
262+
if (Error E = (*DataAccessProfileData).get().serialize(OS))
263+
return E;
264+
}
265+
254266
// Verify that the computation for the number of elements in the call stack
255267
// array works.
256268
assert(CallStackPayloadOffset +
257269
NumElements * sizeof(memprof::LinearFrameId) ==
258270
RecordPayloadOffset);
259271

260-
uint64_t Header[] = {
272+
SmallVector<uint64_t, 4> Header = {
261273
CallStackPayloadOffset,
262274
RecordPayloadOffset,
263275
RecordTableOffset,
264276
};
277+
if (Version == memprof::Version4)
278+
Header.push_back(DataAccessProfOffset);
265279
OS.patch({{HeaderUpdatePos, Header}});
266280

267281
return Error::success();
@@ -272,28 +286,33 @@ static Error writeMemProfV3(ProfOStream &OS,
272286
memprof::IndexedMemProfData &MemProfData,
273287
bool MemProfFullSchema) {
274288
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
275-
MemProfFullSchema);
289+
MemProfFullSchema, std::nullopt);
276290
}
277291

278292
// Write out MemProf Version4
279-
static Error writeMemProfV4(ProfOStream &OS,
280-
memprof::IndexedMemProfData &MemProfData,
281-
bool MemProfFullSchema) {
293+
static Error writeMemProfV4(
294+
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
295+
bool MemProfFullSchema,
296+
std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
297+
DataAccessProfileData) {
282298
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
283-
MemProfFullSchema);
299+
MemProfFullSchema, DataAccessProfileData);
284300
}
285301

286302
// Write out the MemProf data in a requested version.
287-
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
288-
memprof::IndexedVersion MemProfVersionRequested,
289-
bool MemProfFullSchema) {
303+
Error writeMemProf(
304+
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
305+
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
306+
std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
307+
DataAccessProfileData) {
290308
switch (MemProfVersionRequested) {
291309
case memprof::Version2:
292310
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
293311
case memprof::Version3:
294312
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
295313
case memprof::Version4:
296-
return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
314+
return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
315+
DataAccessProfileData);
297316
}
298317

299318
return make_error<InstrProfError>(
@@ -357,7 +376,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
357376
}
358377

359378
Error IndexedMemProfReader::deserializeRadixTreeBased(
360-
const unsigned char *Start, const unsigned char *Ptr) {
379+
const unsigned char *Start, const unsigned char *Ptr,
380+
memprof::IndexedVersion Version) {
381+
assert((Version == memprof::Version3 || Version == memprof::Version4) &&
382+
"Unsupported version for radix tree format");
361383
// The offset in the stream right before invoking
362384
// CallStackTableGenerator.Emit.
363385
const uint64_t CallStackPayloadOffset =
@@ -369,6 +391,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
369391
const uint64_t RecordTableOffset =
370392
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
371393

394+
uint64_t DataAccessProfOffset = 0;
395+
if (Version == memprof::Version4)
396+
DataAccessProfOffset =
397+
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
398+
372399
// Read the schema.
373400
auto SchemaOr = memprof::readMemProfSchema(Ptr);
374401
if (!SchemaOr)
@@ -390,6 +417,14 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
390417
/*Payload=*/Start + RecordPayloadOffset,
391418
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
392419

420+
if (DataAccessProfOffset > RecordTableOffset) {
421+
DataAccessProfileData =
422+
std::make_unique<data_access_prof::DataAccessProfData>();
423+
const unsigned char *DAPPtr = Start + DataAccessProfOffset;
424+
if (Error E = DataAccessProfileData->deserialize(DAPPtr))
425+
return E;
426+
}
427+
393428
return Error::success();
394429
}
395430

@@ -423,7 +458,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
423458
case memprof::Version3:
424459
case memprof::Version4:
425460
// V3 and V4 share the same high-level structure (radix tree, linear IDs).
426-
if (Error E = deserializeRadixTreeBased(Start, Ptr))
461+
if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
427462
return E;
428463
break;
429464
}

0 commit comments

Comments
 (0)