Skip to content

Commit 6dd04e4

Browse files
resolve comments
1 parent df08094 commit 6dd04e4

File tree

4 files changed

+167
-119
lines changed

4 files changed

+167
-119
lines changed

llvm/include/llvm/ProfileData/DataAccessProf.h

+83-36
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,40 @@
3030
#include "llvm/Support/StringSaver.h"
3131

3232
#include <cstdint>
33+
#include <optional>
3334
#include <variant>
3435

3536
namespace llvm {
3637

3738
namespace data_access_prof {
38-
// The location of data in the source code.
39-
struct DataLocation {
39+
40+
/// The location of data in the source code. Used by profile lookup API.
41+
struct SourceLocation {
42+
SourceLocation(StringRef FileNameRef, uint32_t Line)
43+
: FileName(FileNameRef.str()), Line(Line) {}
44+
/// The filename where the data is located.
45+
std::string FileName;
46+
/// The line number in the source code.
47+
uint32_t Line;
48+
};
49+
50+
namespace internal {
51+
52+
// Conceptually similar to SourceLocation except that FileNames are StringRef of
53+
// which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
54+
// to represent data locations internally.
55+
struct SourceLocationRef {
4056
// The filename where the data is located.
4157
StringRef FileName;
4258
// The line number in the source code.
4359
uint32_t Line;
4460
};
4561

46-
// The data access profiles for a symbol.
47-
struct DataAccessProfRecord {
48-
DataAccessProfRecord(uint64_t SymbolID, uint64_t AccessCount,
49-
bool IsStringLiteral)
62+
// The data access profiles for a symbol. Used by `DataAccessProfData`
63+
// to represent records internally.
64+
struct DataAccessProfRecordRef {
65+
DataAccessProfRecordRef(uint64_t SymbolID, uint64_t AccessCount,
66+
bool IsStringLiteral)
5067
: SymbolID(SymbolID), AccessCount(AccessCount),
5168
IsStringLiteral(IsStringLiteral) {}
5269

@@ -67,18 +84,43 @@ struct DataAccessProfRecord {
6784
bool IsStringLiteral;
6885

6986
// The locations of data in the source code. Optional.
70-
llvm::SmallVector<DataLocation, 0> Locations;
87+
llvm::SmallVector<SourceLocationRef, 0> Locations;
7188
};
89+
} // namespace internal
90+
91+
// SymbolID is either a string representing symbol name if the symbol has
92+
// stable mangled name relative to source code, or a uint64_t representing the
93+
// content hash of a string literal (with unstable name patterns like
94+
// `.str.N[.llvm.hash]`). The StringRef is owned by the class's saver object.
95+
using SymbolHandleRef = std::variant<StringRef, uint64_t>;
7296

73-
/// Encapsulates the data access profile data and the methods to operate on it.
74-
/// This class provides profile look-up, serialization and deserialization.
97+
// The senamtic is the same as `SymbolHandleRef` above. The strings are owned.
98+
using SymbolHandle = std::variant<std::string, uint64_t>;
99+
100+
/// The data access profiles for a symbol.
101+
struct DataAccessProfRecord {
102+
public:
103+
DataAccessProfRecord(SymbolHandleRef SymHandleRef,
104+
ArrayRef<internal::SourceLocationRef> LocRefs) {
105+
if (std::holds_alternative<StringRef>(SymHandleRef)) {
106+
SymHandle = std::get<StringRef>(SymHandleRef).str();
107+
} else
108+
SymHandle = std::get<uint64_t>(SymHandleRef);
109+
110+
for (auto Loc : LocRefs)
111+
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
112+
}
113+
SymbolHandle SymHandle;
114+
115+
// The locations of data in the source code. Optional.
116+
SmallVector<SourceLocation> Locations;
117+
};
118+
119+
/// Encapsulates the data access profile data and the methods to operate on
120+
/// it. This class provides profile look-up, serialization and
121+
/// deserialization.
75122
class DataAccessProfData {
76123
public:
77-
// SymbolID is either a string representing symbol name if the symbol has
78-
// stable mangled name relative to source code, or a uint64_t representing the
79-
// content hash of a string literal (with unstable name patterns like
80-
// `.str.N[.llvm.hash]`). The StringRef is owned by the class's saver object.
81-
using SymbolHandle = std::variant<StringRef, uint64_t>;
82124
using StringToIndexMap = llvm::MapVector<StringRef, uint64_t>;
83125

84126
DataAccessProfData() : Saver(Allocator) {}
@@ -93,35 +135,39 @@ class DataAccessProfData {
93135
/// Deserialize this class from the given buffer.
94136
Error deserialize(const unsigned char *&Ptr);
95137

96-
/// Returns a pointer of profile record for \p SymbolID, or nullptr if there
138+
/// Returns a profile record for \p SymbolID, or std::nullopt if there
97139
/// isn't a record. Internally, this function will canonicalize the symbol
98140
/// name before the lookup.
99-
const DataAccessProfRecord *getProfileRecord(const SymbolHandle SymID) const;
141+
std::optional<DataAccessProfRecord>
142+
getProfileRecord(const SymbolHandleRef SymID) const;
100143

101144
/// Returns true if \p SymID is seen in profiled binaries and cold.
102-
bool isKnownColdSymbol(const SymbolHandle SymID) const;
145+
bool isKnownColdSymbol(const SymbolHandleRef SymID) const;
103146

104-
/// Methods to set symbolized data access profile. Returns error if duplicated
105-
/// symbol names or content hashes are seen. The user of this class should
106-
/// aggregate counters that correspond to the same symbol name or with the
107-
/// same string literal hash before calling 'set*' methods.
108-
Error setDataAccessProfile(SymbolHandle SymbolID, uint64_t AccessCount);
147+
/// Methods to set symbolized data access profile. Returns error if
148+
/// duplicated symbol names or content hashes are seen. The user of this
149+
/// class should aggregate counters that correspond to the same symbol name
150+
/// or with the same string literal hash before calling 'set*' methods.
151+
Error setDataAccessProfile(SymbolHandleRef SymbolID, uint64_t AccessCount);
109152
/// Similar to the method above, for records with \p Locations representing
110153
/// the `filename:line` where this symbol shows up. Note because of linker's
111154
/// merge of identical symbols (e.g., unnamed_addr string literals), one
112155
/// symbol is likely to have multiple locations.
113-
Error setDataAccessProfile(SymbolHandle SymbolID, uint64_t AccessCount,
114-
ArrayRef<DataLocation> Locations);
115-
Error addKnownSymbolWithoutSamples(SymbolHandle SymbolID);
116-
117-
/// Returns an iterable StringRef for strings in the order they are added.
118-
/// Each string may be a symbol name or a file name.
119-
auto getStrings() const {
120-
return llvm::make_first_range(StrToIndexMap.getArrayRef());
156+
Error setDataAccessProfile(SymbolHandleRef SymbolID, uint64_t AccessCount,
157+
ArrayRef<SourceLocation> Locations);
158+
/// Add a symbol that's seen in the profiled binary without samples.
159+
Error addKnownSymbolWithoutSamples(SymbolHandleRef SymbolID);
160+
161+
/// The following methods return array reference for various internal data
162+
/// structures.
163+
ArrayRef<StringToIndexMap::value_type> getStrToIndexMapRef() const {
164+
return StrToIndexMap.getArrayRef();
165+
}
166+
ArrayRef<
167+
MapVector<SymbolHandleRef, internal::DataAccessProfRecordRef>::value_type>
168+
getRecords() const {
169+
return Records.getArrayRef();
121170
}
122-
123-
/// Returns array reference for various internal data structures.
124-
auto getRecords() const { return Records.getArrayRef(); }
125171
ArrayRef<StringRef> getKnownColdSymbols() const {
126172
return KnownColdSymbols.getArrayRef();
127173
}
@@ -139,11 +185,12 @@ class DataAccessProfData {
139185
const uint64_t NumSampledSymbols,
140186
const uint64_t NumColdKnownSymbols);
141187

142-
/// Decode the records and increment \p Ptr to the start of the next payload.
188+
/// Decode the records and increment \p Ptr to the start of the next
189+
/// payload.
143190
Error deserializeRecords(const unsigned char *&Ptr);
144191

145192
/// A helper function to compute a storage index for \p SymbolID.
146-
uint64_t getEncodedIndex(const SymbolHandle SymbolID) const;
193+
uint64_t getEncodedIndex(const SymbolHandleRef SymbolID) const;
147194

148195
// Keeps owned copies of the input strings.
149196
// NOTE: Keep `Saver` initialized before other class members that reference
@@ -152,7 +199,7 @@ class DataAccessProfData {
152199
llvm::UniqueStringSaver Saver;
153200

154201
// `Records` stores the records.
155-
MapVector<SymbolHandle, DataAccessProfRecord> Records;
202+
MapVector<SymbolHandleRef, internal::DataAccessProfRecordRef> Records;
156203

157204
// Use MapVector to keep input order of strings for serialization and
158205
// deserialization.

llvm/include/llvm/ProfileData/InstrProf.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ class InstrProfSymtab {
500500
public:
501501
using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
502502

503-
// Returns the canonial name of the given PGOName. In a canonical name, all
503+
// Returns the canonical name of the given PGOName. In a canonical name, all
504504
// suffixes that begins with "." except ".__uniq." are stripped.
505505
// FIXME: Unify this with `FunctionSamples::getCanonicalFnName`.
506506
static StringRef getCanonicalName(StringRef PGOName);

llvm/lib/ProfileData/DataAccessProf.cpp

+25-24
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ namespace data_access_prof {
1616
// If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise,
1717
// creates an owned copy of `Str`, adds a map entry for it and returns the
1818
// iterator.
19-
static MapVector<StringRef, uint64_t>::iterator
20-
saveStringToMap(MapVector<StringRef, uint64_t> &Map,
19+
static std::pair<StringRef, uint64_t>
20+
saveStringToMap(DataAccessProfData::StringToIndexMap &Map,
2121
llvm::UniqueStringSaver &Saver, StringRef Str) {
2222
auto [Iter, Inserted] = Map.try_emplace(Saver.save(Str), Map.size());
23-
return Iter;
23+
return *Iter;
2424
}
2525

2626
// Returns the canonical name or error.
@@ -31,8 +31,8 @@ static Expected<StringRef> getCanonicalName(StringRef Name) {
3131
return InstrProfSymtab::getCanonicalName(Name);
3232
}
3333

34-
const DataAccessProfRecord *
35-
DataAccessProfData::getProfileRecord(const SymbolHandle SymbolID) const {
34+
std::optional<DataAccessProfRecord>
35+
DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
3636
auto Key = SymbolID;
3737
if (std::holds_alternative<StringRef>(SymbolID)) {
3838
auto NameOrErr = getCanonicalName(std::get<StringRef>(SymbolID));
@@ -41,40 +41,39 @@ DataAccessProfData::getProfileRecord(const SymbolHandle SymbolID) const {
4141
assert(
4242
std::get<StringRef>(SymbolID).empty() &&
4343
"Name canonicalization only fails when stringified string is empty.");
44-
return nullptr;
44+
return std::nullopt;
4545
}
4646
Key = *NameOrErr;
4747
}
4848

4949
auto It = Records.find(Key);
50-
if (It != Records.end())
51-
return &It->second;
50+
if (It != Records.end()) {
51+
return DataAccessProfRecord(Key, It->second.Locations);
52+
}
5253

53-
return nullptr;
54+
return std::nullopt;
5455
}
5556

56-
bool DataAccessProfData::isKnownColdSymbol(const SymbolHandle SymID) const {
57+
bool DataAccessProfData::isKnownColdSymbol(const SymbolHandleRef SymID) const {
5758
if (std::holds_alternative<uint64_t>(SymID))
5859
return KnownColdHashes.contains(std::get<uint64_t>(SymID));
5960
return KnownColdSymbols.contains(std::get<StringRef>(SymID));
6061
}
6162

62-
Error DataAccessProfData::setDataAccessProfile(SymbolHandle Symbol,
63+
Error DataAccessProfData::setDataAccessProfile(SymbolHandleRef Symbol,
6364
uint64_t AccessCount) {
6465
uint64_t RecordID = -1;
65-
bool IsStringLiteral = false;
66-
SymbolHandle Key;
67-
if (std::holds_alternative<uint64_t>(Symbol)) {
66+
const bool IsStringLiteral = std::holds_alternative<uint64_t>(Symbol);
67+
SymbolHandleRef Key;
68+
if (IsStringLiteral) {
6869
RecordID = std::get<uint64_t>(Symbol);
6970
Key = RecordID;
70-
IsStringLiteral = true;
7171
} else {
7272
auto CanonicalName = getCanonicalName(std::get<StringRef>(Symbol));
7373
if (!CanonicalName)
7474
return CanonicalName.takeError();
7575
std::tie(Key, RecordID) =
76-
*saveStringToMap(StrToIndexMap, Saver, *CanonicalName);
77-
IsStringLiteral = false;
76+
saveStringToMap(StrToIndexMap, Saver, *CanonicalName);
7877
}
7978

8079
auto [Iter, Inserted] =
@@ -89,21 +88,22 @@ Error DataAccessProfData::setDataAccessProfile(SymbolHandle Symbol,
8988
}
9089

9190
Error DataAccessProfData::setDataAccessProfile(
92-
SymbolHandle SymbolID, uint64_t AccessCount,
93-
ArrayRef<DataLocation> Locations) {
91+
SymbolHandleRef SymbolID, uint64_t AccessCount,
92+
ArrayRef<SourceLocation> Locations) {
9493
if (Error E = setDataAccessProfile(SymbolID, AccessCount))
9594
return E;
9695

9796
auto &Record = Records.back().second;
9897
for (const auto &Location : Locations)
9998
Record.Locations.push_back(
100-
{saveStringToMap(StrToIndexMap, Saver, Location.FileName)->first,
99+
{saveStringToMap(StrToIndexMap, Saver, Location.FileName).first,
101100
Location.Line});
102101

103102
return Error::success();
104103
}
105104

106-
Error DataAccessProfData::addKnownSymbolWithoutSamples(SymbolHandle SymbolID) {
105+
Error DataAccessProfData::addKnownSymbolWithoutSamples(
106+
SymbolHandleRef SymbolID) {
107107
if (std::holds_alternative<uint64_t>(SymbolID)) {
108108
KnownColdHashes.insert(std::get<uint64_t>(SymbolID));
109109
return Error::success();
@@ -164,7 +164,7 @@ Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const {
164164
}
165165

166166
uint64_t
167-
DataAccessProfData::getEncodedIndex(const SymbolHandle SymbolID) const {
167+
DataAccessProfData::getEncodedIndex(const SymbolHandleRef SymbolID) const {
168168
if (std::holds_alternative<uint64_t>(SymbolID))
169169
return std::get<uint64_t>(SymbolID);
170170

@@ -220,7 +220,8 @@ Error DataAccessProfData::deserializeSymbolsAndFilenames(
220220
}
221221

222222
Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
223-
SmallVector<StringRef> Strings = llvm::to_vector(getStrings());
223+
SmallVector<StringRef> Strings =
224+
llvm::to_vector(llvm::make_first_range(getStrToIndexMapRef()));
224225

225226
uint64_t NumRecords =
226227
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
@@ -235,7 +236,7 @@ Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
235236
uint64_t AccessCount =
236237
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
237238

238-
SymbolHandle SymbolID;
239+
SymbolHandleRef SymbolID;
239240
if (IsStringLiteral)
240241
SymbolID = ID;
241242
else

0 commit comments

Comments
 (0)