Skip to content

Commit 74da08e

Browse files
committed
[Exegesis] Add supports to serialize/deserialize benchmarks
TBA...
1 parent d4edd1d commit 74da08e

File tree

7 files changed

+367
-102
lines changed

7 files changed

+367
-102
lines changed

llvm/docs/CommandGuide/llvm-exegesis.rst

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,18 @@ OPTIONS
299299
However, it is possible to stop at some stage before measuring. Choices are:
300300
* ``prepare-snippet``: Only generate the minimal instruction sequence.
301301
* ``prepare-and-assemble-snippet``: Same as ``prepare-snippet``, but also dumps an excerpt of the sequence (hex encoded).
302-
* ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
302+
* ``assemble-measured-code``: Same as ``prepare-and-assemble-snippet``. but
303+
also creates the full sequence that can be dumped to a file using ``--dump-object-to-disk``.
304+
If either zlib or zstd is available and we're using either duplicate or
305+
loop repetition mode, this phase generates benchmarks with a serialized
306+
snippet object file attached to it.
303307
* ``measure``: Same as ``assemble-measured-code``, but also runs the measurement.
304308

309+
.. option:: --run-measurement=<benchmarks file>
310+
311+
Given a benchmarks file generated after the ``assembly-measured-code`` phase,
312+
resume the measurement phase from it.
313+
305314
.. option:: --x86-lbr-sample-period=<nBranches/sample>
306315

307316
Specify the LBR sampling period - how many branches before we take a sample.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --benchmark-phase=assemble-measured-code --mode=latency --benchmarks-file=%t.yaml
2+
# RUN: FileCheck --input-file=%t.yaml %s --check-prefixes=CHECK,SERIALIZE
3+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --run-measurement=%t.yaml --mode=latency --dry-run-measurement --use-dummy-perf-counters \
4+
# RUN: --dump-object-to-disk=%t.o | FileCheck %s --check-prefixes=CHECK,DESERIALIZE
5+
# RUN: llvm-objdump -d %t.o | FileCheck %s --check-prefix=OBJDUMP
6+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --dry-run-measurement --use-dummy-perf-counters | \
7+
# RUN: FileCheck %s --check-prefix=NO-SERIALIZE
8+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=min | \
9+
# RUN: FileCheck %s --check-prefix=NO-SERIALIZE
10+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-loop | \
11+
# RUN: FileCheck %s --check-prefix=NO-SERIALIZE
12+
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 --opcode-name=SH3ADD --mode=latency --benchmark-phase=assemble-measured-code --repetition-mode=middle-half-duplicate | \
13+
# RUN: FileCheck %s --check-prefix=NO-SERIALIZE
14+
# REQUIRES: zlib || zstd
15+
16+
# A round-trip test for serialize/deserialize benchmarks.
17+
18+
# CHECK: mode: latency
19+
# CHECK: instructions:
20+
# CHECK-NEXT: - 'SH3ADD X{{.*}} X{{.*}} X{{.*}}'
21+
# CHECK: cpu_name: sifive-p470
22+
# CHECK-NEXT: llvm_triple: riscv64
23+
# CHECK-NEXT: min_instructions: 10000
24+
# CHECK-NEXT: measurements: []
25+
# SERIALIZE: error: actual measurements skipped.
26+
# DESERIALIZE: error: ''
27+
# CHECK: info: Repeating a single explicitly serial instruction
28+
29+
# OBJDUMP: sh3add
30+
31+
# Negative tests: we shouldn't serialize object files in some scenarios.
32+
33+
# NO-SERIALIZE-NOT: object_file:

llvm/tools/llvm-exegesis/lib/BenchmarkResult.cpp

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515
#include "llvm/ADT/StringRef.h"
1616
#include "llvm/ADT/bit.h"
1717
#include "llvm/ObjectYAML/YAML.h"
18+
#include "llvm/Support/Base64.h"
19+
#include "llvm/Support/CommandLine.h"
1820
#include "llvm/Support/Errc.h"
1921
#include "llvm/Support/FileOutputBuffer.h"
2022
#include "llvm/Support/FileSystem.h"
2123
#include "llvm/Support/Format.h"
24+
#include "llvm/Support/Timer.h"
2225
#include "llvm/Support/raw_ostream.h"
2326

2427
static constexpr const char kIntegerPrefix[] = "i_0x";
@@ -27,6 +30,12 @@ static constexpr const char kInvalidOperand[] = "INVALID";
2730

2831
namespace llvm {
2932

33+
static cl::opt<compression::Format> ForceObjectFileCompressionFormat(
34+
"exegesis-force-obj-compress-format", cl::Hidden,
35+
cl::desc("Force to use this compression format for object files."),
36+
cl::values(clEnumValN(compression::Format::Zstd, "zstd", "Using Zstandard"),
37+
clEnumValN(compression::Format::Zlib, "zlib", "Using LibZ")));
38+
3039
namespace {
3140

3241
// A mutable struct holding an LLVMState that can be passed through the
@@ -278,6 +287,13 @@ template <> struct ScalarTraits<exegesis::RegisterValue> {
278287
static const bool flow = true;
279288
};
280289

290+
template <> struct ScalarEnumerationTraits<compression::Format> {
291+
static void enumeration(IO &Io, compression::Format &Format) {
292+
Io.enumCase(Format, "zstd", compression::Format::Zstd);
293+
Io.enumCase(Format, "zlib", compression::Format::Zlib);
294+
}
295+
};
296+
281297
template <> struct MappingContextTraits<exegesis::BenchmarkKey, YamlContext> {
282298
static void mapping(IO &Io, exegesis::BenchmarkKey &Obj,
283299
YamlContext &Context) {
@@ -288,6 +304,33 @@ template <> struct MappingContextTraits<exegesis::BenchmarkKey, YamlContext> {
288304
}
289305
};
290306

307+
template <> struct MappingTraits<exegesis::Benchmark::ObjectFile> {
308+
struct NormalizedBase64Binary {
309+
std::string Base64Str;
310+
311+
NormalizedBase64Binary(IO &) {}
312+
NormalizedBase64Binary(IO &, const std::vector<uint8_t> &Data)
313+
: Base64Str(llvm::encodeBase64(Data)) {}
314+
315+
std::vector<uint8_t> denormalize(IO &) {
316+
std::vector<char> Buffer;
317+
if (Error E = llvm::decodeBase64(Base64Str, Buffer))
318+
report_fatal_error(std::move(E));
319+
320+
StringRef Data(Buffer.data(), Buffer.size());
321+
return std::vector<uint8_t>(Data.bytes_begin(), Data.bytes_end());
322+
}
323+
};
324+
325+
static void mapping(IO &Io, exegesis::Benchmark::ObjectFile &Obj) {
326+
Io.mapRequired("compression", Obj.CompressionFormat);
327+
Io.mapRequired("original_size", Obj.UncompressedSize);
328+
MappingNormalization<NormalizedBase64Binary, std::vector<uint8_t>>
329+
ObjFileString(Io, Obj.CompressedBytes);
330+
Io.mapRequired("compressed_bytes", ObjFileString->Base64Str);
331+
}
332+
};
333+
291334
template <> struct MappingContextTraits<exegesis::Benchmark, YamlContext> {
292335
struct NormalizedBinary {
293336
NormalizedBinary(IO &io) {}
@@ -325,9 +368,11 @@ template <> struct MappingContextTraits<exegesis::Benchmark, YamlContext> {
325368
Io.mapRequired("error", Obj.Error);
326369
Io.mapOptional("info", Obj.Info);
327370
// AssembledSnippet
328-
MappingNormalization<NormalizedBinary, std::vector<uint8_t>> BinaryString(
371+
MappingNormalization<NormalizedBinary, std::vector<uint8_t>> SnippetString(
329372
Io, Obj.AssembledSnippet);
330-
Io.mapOptional("assembled_snippet", BinaryString->Binary);
373+
Io.mapOptional("assembled_snippet", SnippetString->Binary);
374+
// ObjectFile
375+
Io.mapOptional("object_file", Obj.ObjFile);
331376
}
332377
};
333378

@@ -364,6 +409,52 @@ Benchmark::readTriplesAndCpusFromYamls(MemoryBufferRef Buffer) {
364409
return Result;
365410
}
366411

412+
Error Benchmark::setObjectFile(StringRef RawBytes) {
413+
SmallVector<uint8_t> CompressedBytes;
414+
llvm::compression::Format CompressionFormat;
415+
416+
auto isFormatAvailable = [](llvm::compression::Format F) -> bool {
417+
switch (F) {
418+
case compression::Format::Zstd:
419+
return compression::zstd::isAvailable();
420+
case compression::Format::Zlib:
421+
return compression::zlib::isAvailable();
422+
}
423+
};
424+
if (ForceObjectFileCompressionFormat.getNumOccurrences() > 0) {
425+
CompressionFormat = ForceObjectFileCompressionFormat;
426+
if (!isFormatAvailable(CompressionFormat))
427+
return make_error<StringError>(
428+
"The designated compression format is not available.",
429+
inconvertibleErrorCode());
430+
} else if (isFormatAvailable(compression::Format::Zstd)) {
431+
// Try newer compression algorithm first.
432+
CompressionFormat = compression::Format::Zstd;
433+
} else if (isFormatAvailable(compression::Format::Zlib)) {
434+
CompressionFormat = compression::Format::Zlib;
435+
} else {
436+
return make_error<StringError>(
437+
"None of the compression methods is available.",
438+
inconvertibleErrorCode());
439+
}
440+
441+
switch (CompressionFormat) {
442+
case compression::Format::Zstd:
443+
compression::zstd::compress({RawBytes.bytes_begin(), RawBytes.bytes_end()},
444+
CompressedBytes);
445+
break;
446+
case compression::Format::Zlib:
447+
compression::zlib::compress({RawBytes.bytes_begin(), RawBytes.bytes_end()},
448+
CompressedBytes);
449+
break;
450+
}
451+
452+
ObjFile = {CompressionFormat,
453+
RawBytes.size(),
454+
{CompressedBytes.begin(), CompressedBytes.end()}};
455+
return Error::success();
456+
}
457+
367458
Expected<Benchmark> Benchmark::readYaml(const LLVMState &State,
368459
MemoryBufferRef Buffer) {
369460
yaml::Input Yin(Buffer);

llvm/tools/llvm-exegesis/lib/BenchmarkResult.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/ADT/StringRef.h"
2222
#include "llvm/MC/MCInst.h"
2323
#include "llvm/MC/MCInstBuilder.h"
24+
#include "llvm/Support/Compression.h"
2425
#include "llvm/Support/YAMLTraits.h"
2526
#include <limits>
2627
#include <set>
@@ -76,6 +77,11 @@ struct BenchmarkKey {
7677
uintptr_t SnippetAddress = 0;
7778
// The register that should be used to hold the loop counter.
7879
unsigned LoopRegister;
80+
81+
bool operator==(const BenchmarkKey &RHS) const {
82+
return Config == RHS.Config &&
83+
Instructions[0].getOpcode() == RHS.Instructions[0].getOpcode();
84+
}
7985
};
8086

8187
struct BenchmarkMeasure {
@@ -122,6 +128,16 @@ struct Benchmark {
122128
std::string Error;
123129
std::string Info;
124130
std::vector<uint8_t> AssembledSnippet;
131+
132+
struct ObjectFile {
133+
llvm::compression::Format CompressionFormat;
134+
size_t UncompressedSize = 0;
135+
std::vector<uint8_t> CompressedBytes;
136+
137+
bool isValid() const { return UncompressedSize && CompressedBytes.size(); }
138+
};
139+
std::optional<ObjectFile> ObjFile;
140+
125141
// How to aggregate measurements.
126142
enum ResultAggregationModeE { Min, Max, Mean, MinVariance };
127143

@@ -132,6 +148,10 @@ struct Benchmark {
132148
Benchmark &operator=(const Benchmark &) = delete;
133149
Benchmark &operator=(Benchmark &&) = delete;
134150

151+
// Compress raw object file bytes and assign the result and compression type
152+
// to CompressedObjectFile and ObjFileCompression, respectively.
153+
class Error setObjectFile(StringRef RawBytes);
154+
135155
// Read functions.
136156
static Expected<Benchmark> readYaml(const LLVMState &State,
137157
MemoryBufferRef Buffer);

llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,7 @@ Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
624624
Expected<BenchmarkRunner::RunnableConfiguration>
625625
BenchmarkRunner::getRunnableConfiguration(
626626
const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
627+
Benchmark::RepetitionModeE RepetitionMode,
627628
const SnippetRepetitor &Repetitor) const {
628629
RunnableConfiguration RC;
629630

@@ -668,12 +669,54 @@ BenchmarkRunner::getRunnableConfiguration(
668669
LoopBodySize, GenerateMemoryInstructions);
669670
if (Error E = Snippet.takeError())
670671
return std::move(E);
672+
// There is no need to serialize/deserialize the object file if we're
673+
// simply running end-to-end measurements.
674+
// Same goes for any repetition mode that requires more than a single
675+
// snippet.
676+
if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure &&
677+
(RepetitionMode == Benchmark::Loop ||
678+
RepetitionMode == Benchmark::Duplicate)) {
679+
if (Error E = BenchmarkResult.setObjectFile(*Snippet))
680+
return std::move(E);
681+
}
671682
RC.ObjectFile = getObjectFromBuffer(*Snippet);
672683
}
673684

674685
return std::move(RC);
675686
}
676687

688+
Expected<BenchmarkRunner::RunnableConfiguration>
689+
BenchmarkRunner::getRunnableConfiguration(Benchmark &&B) const {
690+
assert(B.ObjFile.has_value() && B.ObjFile->isValid() &&
691+
"No serialized obejct file is attached?");
692+
const Benchmark::ObjectFile &ObjFile = *B.ObjFile;
693+
SmallVector<uint8_t> DecompressedObjFile;
694+
switch (ObjFile.CompressionFormat) {
695+
case compression::Format::Zstd:
696+
if (!compression::zstd::isAvailable())
697+
return make_error<StringError>("zstd is not available for decompression.",
698+
inconvertibleErrorCode());
699+
if (Error E = compression::zstd::decompress(ObjFile.CompressedBytes,
700+
DecompressedObjFile,
701+
ObjFile.UncompressedSize))
702+
return std::move(E);
703+
break;
704+
case compression::Format::Zlib:
705+
if (!compression::zlib::isAvailable())
706+
return make_error<StringError>("zlib is not available for decompression.",
707+
inconvertibleErrorCode());
708+
if (Error E = compression::zlib::decompress(ObjFile.CompressedBytes,
709+
DecompressedObjFile,
710+
ObjFile.UncompressedSize))
711+
return std::move(E);
712+
break;
713+
}
714+
715+
StringRef Buffer(reinterpret_cast<const char *>(DecompressedObjFile.begin()),
716+
DecompressedObjFile.size());
717+
return RunnableConfiguration{std::move(B), getObjectFromBuffer(Buffer)};
718+
}
719+
677720
Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
678721
BenchmarkRunner::createFunctionExecutor(
679722
object::OwningBinary<object::ObjectFile> ObjectFile,

llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,25 @@ class BenchmarkRunner {
5454
RunnableConfiguration &operator=(RunnableConfiguration &&) = delete;
5555
RunnableConfiguration &operator=(const RunnableConfiguration &) = delete;
5656

57+
Benchmark BenchmarkResult;
58+
object::OwningBinary<object::ObjectFile> ObjectFile;
59+
5760
private:
5861
RunnableConfiguration() = default;
5962

60-
Benchmark BenchmarkResult;
61-
object::OwningBinary<object::ObjectFile> ObjectFile;
63+
RunnableConfiguration(Benchmark &&B,
64+
object::OwningBinary<object::ObjectFile> &&OF)
65+
: BenchmarkResult(std::move(B)), ObjectFile(std::move(OF)) {}
6266
};
6367

6468
Expected<RunnableConfiguration>
6569
getRunnableConfiguration(const BenchmarkCode &Configuration,
6670
unsigned MinInstructions, unsigned LoopUnrollFactor,
71+
Benchmark::RepetitionModeE RepetitionMode,
6772
const SnippetRepetitor &Repetitor) const;
6873

74+
Expected<RunnableConfiguration> getRunnableConfiguration(Benchmark &&B) const;
75+
6976
std::pair<Error, Benchmark>
7077
runConfiguration(RunnableConfiguration &&RC,
7178
const std::optional<StringRef> &DumpFile,

0 commit comments

Comments
 (0)