Skip to content

Commit 7587864

Browse files
[StaticDataLayout][PGO]Implement reader and writer change for data access profiles
1 parent 6dd04e4 commit 7587864

File tree

14 files changed

+244
-31
lines changed

14 files changed

+244
-31
lines changed

llvm/include/llvm/ProfileData/DataAccessProf.h

+9-3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ namespace data_access_prof {
4141
struct SourceLocation {
4242
SourceLocation(StringRef FileNameRef, uint32_t Line)
4343
: FileName(FileNameRef.str()), Line(Line) {}
44+
45+
SourceLocation() {}
4446
/// The filename where the data is located.
4547
std::string FileName;
4648
/// The line number in the source code.
@@ -53,6 +55,8 @@ namespace internal {
5355
// which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
5456
// to represent data locations internally.
5557
struct SourceLocationRef {
58+
SourceLocationRef(StringRef FileNameRef, uint32_t Line)
59+
: FileName(FileNameRef), Line(Line) {}
5660
// The filename where the data is located.
5761
StringRef FileName;
5862
// The line number in the source code.
@@ -100,8 +104,9 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
100104
/// The data access profiles for a symbol.
101105
struct DataAccessProfRecord {
102106
public:
103-
DataAccessProfRecord(SymbolHandleRef SymHandleRef,
104-
ArrayRef<internal::SourceLocationRef> LocRefs) {
107+
DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
108+
ArrayRef<internal::SourceLocationRef> LocRefs)
109+
: AccessCount(AccessCount) {
105110
if (std::holds_alternative<StringRef>(SymHandleRef)) {
106111
SymHandle = std::get<StringRef>(SymHandleRef).str();
107112
} else
@@ -110,8 +115,9 @@ struct DataAccessProfRecord {
110115
for (auto Loc : LocRefs)
111116
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
112117
}
118+
DataAccessProfRecord() {}
113119
SymbolHandle SymHandle;
114-
120+
uint64_t AccessCount;
115121
// The locations of data in the source code. Optional.
116122
SmallVector<SourceLocation> Locations;
117123
};

llvm/include/llvm/ProfileData/IndexedMemProfData.h

+9-3
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,20 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "llvm/ProfileData/DataAccessProf.h"
1314
#include "llvm/ProfileData/InstrProf.h"
1415
#include "llvm/ProfileData/MemProf.h"
1516

17+
#include <functional>
18+
#include <optional>
19+
1620
namespace llvm {
1721

1822
// Write the MemProf data to OS.
19-
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
20-
memprof::IndexedVersion MemProfVersionRequested,
21-
bool MemProfFullSchema);
23+
Error writeMemProf(
24+
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
25+
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
26+
std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
27+
DataAccessProfileData);
2228

2329
} // namespace llvm

llvm/include/llvm/ProfileData/InstrProfReader.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/StringRef.h"
1919
#include "llvm/IR/ProfileSummary.h"
2020
#include "llvm/Object/BuildID.h"
21+
#include "llvm/ProfileData/DataAccessProf.h"
2122
#include "llvm/ProfileData/InstrProf.h"
2223
#include "llvm/ProfileData/InstrProfCorrelator.h"
2324
#include "llvm/ProfileData/MemProf.h"
@@ -704,9 +705,12 @@ class IndexedMemProfReader {
704705
// The number of elements in the radix tree array.
705706
unsigned RadixTreeSize = 0;
706707

708+
std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
709+
707710
Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
708711
Error deserializeRadixTreeBased(const unsigned char *Start,
709-
const unsigned char *Ptr);
712+
const unsigned char *Ptr,
713+
memprof::IndexedVersion Version);
710714

711715
public:
712716
IndexedMemProfReader() = default;

llvm/include/llvm/ProfileData/InstrProfWriter.h

+6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/StringMap.h"
2020
#include "llvm/IR/GlobalValue.h"
2121
#include "llvm/Object/BuildID.h"
22+
#include "llvm/ProfileData/DataAccessProf.h"
2223
#include "llvm/ProfileData/InstrProf.h"
2324
#include "llvm/ProfileData/MemProf.h"
2425
#include "llvm/Support/Error.h"
@@ -81,6 +82,8 @@ class InstrProfWriter {
8182
// Whether to generated random memprof hotness for testing.
8283
bool MemprofGenerateRandomHotness;
8384

85+
std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
86+
8487
public:
8588
// For memprof testing, random hotness can be assigned to the contexts if
8689
// MemprofGenerateRandomHotness is enabled. The random seed can be either
@@ -122,6 +125,9 @@ class InstrProfWriter {
122125
// Add a binary id to the binary ids list.
123126
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
124127

128+
void addDataAccessProfData(
129+
std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfile);
130+
125131
/// Merge existing function counts from the given writer.
126132
void mergeRecordsFromWriter(InstrProfWriter &&IPW,
127133
function_ref<void(Error)> Warn);

llvm/include/llvm/ProfileData/MemProfReader.h

+12
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ class MemProfReader {
5050
// MemProfReader no longer owns the MemProf profile.
5151
IndexedMemProfData takeMemProfData() { return std::move(MemProfData); }
5252

53+
std::unique_ptr<data_access_prof::DataAccessProfData>
54+
takeDataAccessProfData() {
55+
return std::move(DataAccessProfileData);
56+
}
57+
5358
virtual Error
5459
readNextRecord(GuidMemProfRecordPair &GuidRecord,
5560
std::function<const Frame(const FrameId)> Callback = nullptr) {
@@ -86,6 +91,11 @@ class MemProfReader {
8691
MemProfReader(IndexedMemProfData &&MemProfData)
8792
: MemProfData(std::move(MemProfData)) {}
8893

94+
void setDataAccessProfileData(
95+
std::unique_ptr<data_access_prof::DataAccessProfData> Data) {
96+
DataAccessProfileData = std::move(Data);
97+
}
98+
8999
protected:
90100
// A helper method to extract the frame from the IdToFrame map.
91101
const Frame &idToFrame(const FrameId Id) const {
@@ -97,6 +107,8 @@ class MemProfReader {
97107
IndexedMemProfData MemProfData;
98108
// An iterator to the internal function profile data structure.
99109
llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
110+
111+
std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
100112
};
101113

102114
// Map from id (recorded from sanitizer stack depot) to virtual addresses for

llvm/include/llvm/ProfileData/MemProfYAML.h

+65
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define LLVM_PROFILEDATA_MEMPROFYAML_H_
33

44
#include "llvm/ADT/SmallVector.h"
5+
#include "llvm/ProfileData/DataAccessProf.h"
56
#include "llvm/ProfileData/MemProf.h"
67
#include "llvm/Support/Format.h"
78
#include "llvm/Support/YAMLTraits.h"
@@ -12,6 +13,9 @@ namespace memprof {
1213
// serialized and deserialized in YAML.
1314
LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64)
1415

16+
LLVM_YAML_STRONG_TYPEDEF(uint64_t, SymbolContentHash)
17+
LLVM_YAML_STRONG_TYPEDEF(std::string, OwnedSymbolName)
18+
1519
// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields
1620
// within MemProfRecord at the same level as if the GUID were part of
1721
// MemProfRecord.
@@ -20,9 +24,25 @@ struct GUIDMemProfRecordPair {
2024
MemProfRecord Record;
2125
};
2226

27+
// Helper struct to yamlify data_access_prof::DataAccessProfData. The struct
28+
// members use owned strings. This is for simplicity and assumes that most real
29+
// world use cases do look-ups and regression test scale is small, so string
30+
// efficiency is not a priority.
31+
struct YamlDataAccessProfData {
32+
std::vector<data_access_prof::DataAccessProfRecord> Records;
33+
std::vector<uint64_t> KnownColdHashes;
34+
std::vector<std::string> KnownColdSymbols;
35+
36+
bool isEmpty() const {
37+
return Records.empty() && KnownColdHashes.empty() &&
38+
KnownColdSymbols.empty();
39+
}
40+
};
41+
2342
// The top-level data structure, only used with YAML for now.
2443
struct AllMemProfData {
2544
std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
45+
YamlDataAccessProfData YamlifiedDataAccessProfiles;
2646
};
2747
} // namespace memprof
2848

@@ -206,9 +226,50 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
206226
}
207227
};
208228

229+
template <> struct MappingTraits<data_access_prof::SourceLocation> {
230+
static void mapping(IO &Io, data_access_prof::SourceLocation &Loc) {
231+
Io.mapOptional("FileName", Loc.FileName);
232+
Io.mapOptional("Line", Loc.Line);
233+
}
234+
};
235+
236+
template <> struct MappingTraits<data_access_prof::DataAccessProfRecord> {
237+
static void mapping(IO &Io, data_access_prof::DataAccessProfRecord &Rec) {
238+
if (Io.outputting()) {
239+
if (std::holds_alternative<std::string>(Rec.SymHandle)) {
240+
Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
241+
} else {
242+
Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
243+
}
244+
} else {
245+
std::string SymName;
246+
uint64_t Hash = 0;
247+
Io.mapOptional("Symbol", SymName);
248+
Io.mapOptional("Hash", Hash);
249+
if (!SymName.empty()) {
250+
Rec.SymHandle = SymName;
251+
} else {
252+
Rec.SymHandle = Hash;
253+
}
254+
}
255+
256+
Io.mapOptional("Locations", Rec.Locations);
257+
}
258+
};
259+
260+
template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
261+
static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
262+
Io.mapOptional("SampledRecords", Data.Records);
263+
Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
264+
Io.mapOptional("KnownColdHashes", Data.KnownColdHashes);
265+
}
266+
};
267+
209268
template <> struct MappingTraits<memprof::AllMemProfData> {
210269
static void mapping(IO &Io, memprof::AllMemProfData &Data) {
211270
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
271+
272+
Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
212273
}
213274
};
214275

@@ -234,5 +295,9 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
234295
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
235296
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
236297
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
298+
LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::DataAccessProfRecord)
299+
LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::SourceLocation)
300+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::SymbolContentHash)
301+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::OwnedSymbolName)
237302

238303
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_

llvm/lib/ProfileData/DataAccessProf.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
4848

4949
auto It = Records.find(Key);
5050
if (It != Records.end()) {
51-
return DataAccessProfRecord(Key, It->second.Locations);
51+
return DataAccessProfRecord(Key, It->second.AccessCount,
52+
It->second.Locations);
5253
}
5354

5455
return std::nullopt;
@@ -111,7 +112,8 @@ Error DataAccessProfData::addKnownSymbolWithoutSamples(
111112
auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID));
112113
if (!CanonicalName)
113114
return CanonicalName.takeError();
114-
KnownColdSymbols.insert(*CanonicalName);
115+
KnownColdSymbols.insert(
116+
saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first);
115117
return Error::success();
116118
}
117119

0 commit comments

Comments
 (0)