Skip to content

Merge preview/rvv-exegesis with main (dirty change history) #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: merge/rvv-exegesis
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/MC/MCSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,9 @@ MCSchedModel::getReciprocalThroughput(const MCSubtargetInfo &STI,
for (; I != E; ++I) {
if (!I->ReleaseAtCycle)
continue;
assert(I->ReleaseAtCycle > I->AcquireAtCycle);
unsigned NumUnits = SM.getProcResource(I->ProcResourceIdx)->NumUnits;
double Temp = NumUnits * 1.0 / I->ReleaseAtCycle;
double Temp = NumUnits * 1.0 / (I->ReleaseAtCycle - I->AcquireAtCycle);
Throughput = Throughput ? std::min(*Throughput, Temp) : Temp;
}
if (Throughput)
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,19 @@ char RISCVInsertWriteVXRM::ID = 0;
INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
false, false)

static unsigned getAndCacheRVVMCOpcode(unsigned VPseudoOpcode) {
// VPseudo opcode -> MC opcode
static DenseMap<unsigned, unsigned> OpcodeCache;
auto It = OpcodeCache.find(VPseudoOpcode);
if (It != OpcodeCache.end())
return It->second;
unsigned MCOpcode = RISCV::getRVVMCOpcode(VPseudoOpcode);
OpcodeCache.insert({VPseudoOpcode, MCOpcode});
return MCOpcode;
}

static bool ignoresVXRM(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
switch (getAndCacheRVVMCOpcode(MI.getOpcode())) {
default:
return false;
case RISCV::VNCLIP_WI:
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/deserialize-obj-file.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -start-before-phase=measure --mode=latency --dry-run-measurement --use-dummy-perf-counters \
# RUN: --dump-object-to-disk=%t.o %s > %t.result.yml
# RUN: llvm-objdump -d %t.o | FileCheck %s

# CHECK: vsetvli {{.*}}, zero, e32, m1, tu, ma
# CHECK: fsrmi {{.*}}, 0x0
# CHECK: vfwredusum.vs

---
mode: latency
key:
instructions:
- 'PseudoVFWREDUSUM_VS_M1_E32 V13 V13 V13 V7 i_0x0 i_0xffffffffffffffff i_0x5 i_0x0'
config: 'vtype = {FRM: rne, AVL: VLMAX, SEW: e32, Policy: tu/mu}'
register_initial_values:
- 'V13=0x0'
- 'V7=0x0'
cpu_name: sifive-x280
llvm_triple: riscv64
num_repetitions: 100
measurements: []
error: actual measurements skipped.
info: ''
assembled_snippet: 57730009F3532000D796D3C6D796D3C6D796D3C6D796D3C6739023008280
object_file:
compression: zlib
original_size: 5632
compressed_bytes: 'eJztWDFvEzEUfk6btEgMoWVAogMSHSokrJybRrCgIFQQEjAUKiYU3V3s9kQul5zN6egC4hd0YmTuL2FGYuB3oK5IYPt8SXBcIbYO/qTn973Pfs8v5zflw/6zxw2EoAaCc5hHC7heuaa0vmZ9WHef9PDw8PDw8PDw8PDw8PDwuGR4zeHK+ctb8OPz96/eLo/x09vw6ePDFgLIEx4XgH7J11ptN/Oi103IJBikZNIZhIoxMiGDoVpipRWBXE6SmOdEE0bHMU00Z8dB5dJkrFkUVi7SrqC7hM1YaVivO5wxNmNm11Qs5iWLUUDumXojster6S6p2V4wo72uZiVnskLEZI2O/EEqnKZhHE+zqdxWc9o284pODgCVCN282tDaDaN/+cdfUWvq68HP3+7dxpJydIEe6XV1SX+j1+aSfkfaxkKdus8tE9+3b8GClgL2S3pEecKfjln2inIBWE8BDoXIk+idoBxYlgEeZ4LiJy8O73IRxm/lKToKMT0esDxMKWAuchFG0r9Pld8eYqKWALZL3HF/iv/Ec2krDv10s/IjS7efCRlr2QXMgy+9a/vvEDtq6rxrDtFxVs2P7H9yUf6alWDnPzKaPSlnG5XfsfR1K34A1TT1Lb3cnPen+4Bquur8Wj903K3wzdx/ttB3y5H/B0zRwDY='
...
10 changes: 10 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/eligible-inst.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 | FileCheck %s --allow-empty --check-prefix=LATENCY
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVCOMPRESS_VM_M2_E8,PseudoVCPOP_M_B32 --min-instructions=100 | FileCheck %s --check-prefix=RTHROUGHPUT

# LATENCY-NOT: PseudoVCOMPRESS_VM_M2_E8
# LATENCY-NOT: PseudoVCPOP_M_B32

# RTHROUGHPUT: PseudoVCOMPRESS_VM_M2_E8
# RTHROUGHPUT: PseudoVCPOP_M_B32
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/explicit-sew.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# Make sure none of the config has SEW other than e32
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
# CHECK: SEW: e32
# CHECK-NOT: SEW: e{{(8|16|64)}}
6 changes: 6 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/filter.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput --opcode-name=PseudoVNCLIPU_WX_M1_MASK \
# RUN: --riscv-filter-config='vtype = {VXRM: rod, AVL: VLMAX, SEW: e(8|16), Policy: ta/mu}' --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e8, Policy: ta/mu}'
# CHECK: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e16, Policy: ta/mu}'
# CHECK-NOT: config: 'vtype = {VXRM: rod, AVL: VLMAX, SEW: e(32|64), Policy: ta/mu}'
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/reduction.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVWREDSUMU_VS_M8_E32 --min-instructions=100 | \
# RUN: FileCheck %s

# Make sure reduction ops don't have alias between vd and vs1
# CHECK: instructions:
# CHECK-NEXT: PseudoVWREDSUMU_VS_M8_E32
# CHECK-NOT: V[[REG:[0-9]+]] V[[REG]] V{{[0-9]+}}M8 V[[REG]]
6 changes: 6 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/self-aliasing.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVXOR_VX_M4 --min-instructions=100 | \
# RUN: FileCheck %s

# Make sure all def / use operands are the same in latency mode.
# CHECK: instructions:
# CHECK-NEXT: PseudoVXOR_VX_M4 V[[REG:[0-9]+]]M4 V[[REG]]M4 V[[REG]]M4 X{{.*}}
12 changes: 12 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/skip-rm.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVAADDU_VV_M1 \
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=VXRM
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFADD_VFPR16_M1_E16 \
# RUN: --riscv-enumerate-rounding-modes=false --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRM

# VXRM: PseudoVAADDU_VV_M1
# VXRM: VXRM: rnu
# VXRM-NOT: VXRM: {{(rne|rdn|rod)}}

# FRM: PseudoVFADD_VFPR16_M1_E16
# FRM: FRM: rne
# FRM-NOT: FRM: {{(rtz|rdn|rup|rmm|dyn)}}
30 changes: 30 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew-zvk.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVAESDF_VS_M1_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVGHSH_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM4K_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM3C_VI_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=ZVK
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSHA2MS_VV_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --allow-empty --check-prefix=ZVKNH
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p670 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSM3C_VI_M1 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --allow-empty --check-prefix=EMPTY

# Most vector crypto only supports SEW=32, except Zvknhb which also supports SEW=64
# ZVK-NOT: SEW: e{{(8|16)}}
# ZVK: SEW: e32
# ZVK-NOT: SEW: e64

# ZVKNH(A|B) can either have SEW=32 (EGW=128) or SEW=64 (EGW=256)

# ZVKNH-NOT: SEW: e{{(8|16)}}
# ZVKNH: SEW: e{{(32|64)}}

# EMPTY-NOT: SEW: e{{(8|16|32|64)}}
41 changes: 41 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/valid-sew.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVMUL_VV_MF4_MASK \
# RUN: --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s --check-prefix=FRAC-LMUL
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVFADD_VFPR16_M1_E16,PseudoVFADD_VV_M2_E16,PseudoVFCLASS_V_MF2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=FP
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=inverse_throughput \
# RUN: --opcode-name=PseudoVSEXT_VF8_M2,PseudoVZEXT_VF8_M2 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=VEXT
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-p470 -benchmark-phase=assemble-measured-code --mode=latency \
# RUN: --opcode-name=PseudoVFREDUSUM_VS_M1_E16 --max-configs-per-opcode=1000 --min-instructions=100 | \
# RUN: FileCheck %s --check-prefix=VFRED --allow-empty

# Make sure only the supported SEWs are generated for fractional LMUL.
# FRAC-LMUL: PseudoVMUL_VV_MF4_MASK
# FRAC-LMUL: SEW: e8
# FRAC-LMUL: SEW: e16
# FRAC-LMUL-NOT: SEW: e{{(32|64)}}

# Make sure only SEWs that are equal to the supported FLEN are generated
# FP: PseudoVFADD_VFPR16_M1_E16
# FP-NOT: SEW: e8
# FP: PseudoVFADD_VV_M2_E16
# FP-NOT: SEW: e8
# FP: PseudoVFCLASS_V_MF2
# FP-NOT: SEW: e8

# VS/ZEXT can only operate on SEW that will not lead to invalid EEW on the
# source operand.
# VEXT: PseudoVSEXT_VF8_M2
# VEXT-NOT: SEW: e8
# VEXT-NOT: SEW: e16
# VEXT-NOT: SEW: e32
# VEXT: SEW: e64
# VEXT: PseudoVZEXT_VF8_M2
# VEXT-NOT: SEW: e8
# VEXT-NOT: SEW: e16
# VEXT-NOT: SEW: e32
# VEXT: SEW: e64

# P470 doesn't have Zvfh so 16-bit vfredusum shouldn't exist
# VFRED-NOT: PseudoVFREDUSUM_VS_M1_E16
7 changes: 7 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/vlmax-only.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --riscv-vlmax-for-vl --max-configs-per-opcode=1000 --min-instructions=100 | FileCheck %s

# Only allow VLMAX for AVL when -riscv-vlmax-for-vl is present
# CHECK: PseudoVFWREDUSUM_VS_M1_E32
# CHECK: AVL: VLMAX
# CHECK-NOT: AVL: {{(simm5|<MCOperand: .*>)}}
13 changes: 13 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/rvv/vtype-rm-setup.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VFWREDUSUM
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVSSRL_VX_MF4 \
# RUN: --max-configs-per-opcode=1 --min-instructions=100 --dump-object-to-disk=%t.o > %t.txt
# RUN: llvm-objdump --triple=riscv64 -d %t.o | FileCheck %s --check-prefix=VSSRL

# Make sure the correct VSETVL / VXRM write / FRM write instructions are generated
# VFWREDUSUM: vsetvli {{.*}}, zero, e32, m1, tu, ma
# VFWREDUSUM: fsrmi {{.*}}, 0x0

# VSSRL: vsetvli {{.*}}, zero, e8, mf4, tu, ma
# VSSRL: csrwi vxrm, 0x0
8 changes: 8 additions & 0 deletions llvm/test/tools/llvm-exegesis/RISCV/serialize-obj-file.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# RUN: llvm-exegesis -mtriple=riscv64 -mcpu=sifive-x280 -benchmark-phase=assemble-measured-code --mode=latency --opcode-name=PseudoVFWREDUSUM_VS_M1_E32 \
# RUN: --max-configs-per-opcode=1 --min-instructions=100 | FileCheck %s

# A simple check on object file serialization
# CHECK: object_file:
# CHECK-NEXT: compression: {{(zlib|zstd)}}
# CHECK-NEXT: original_size: {{[0-9]+}}
# CHECK-NEXT: compressed_bytes: '{{.*}}'
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-exegesis/X86/analysis-noise.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-inconsistencies-output-file=- -analysis-clusters-output-file="" -analysis-numpoints=3 | FileCheck %s
# XFAIL: *

# CHECK: DOCTYPE
# CHECK: [noise] Cluster (1 points)
Expand Down
Loading