From 9bed3ae2f1bb98fc6f53a17cca98da4b1562e1a7 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Wed, 23 Apr 2025 12:27:02 +0000 Subject: [PATCH 1/4] [Clang][AArch64] Add pessimistic vscale_range when sve is in target-features The "target-features" function attribute is not currently considered when adding vscale_range to a function. When +sve is pushed onto functions with "#pragma attribute push(+sve)", the function potentially misses out on optimizations that rely on vscale_range being present. --- clang/include/clang/Basic/TargetInfo.h | 5 +++-- clang/lib/Basic/Targets/AArch64.cpp | 11 ++++++++++- clang/lib/Basic/Targets/AArch64.h | 8 ++++++-- clang/lib/Basic/Targets/RISCV.cpp | 3 ++- clang/lib/Basic/Targets/RISCV.h | 8 ++++++-- clang/lib/CodeGen/CodeGenFunction.cpp | 2 +- .../test/CodeGen/AArch64/cpu-supports-target.c | 2 +- clang/test/CodeGen/AArch64/targetattr.c | 18 +++++++++--------- 8 files changed, 38 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 8c3dcda25bc8d..ba24f2aebcf5b 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -48,6 +48,7 @@ namespace llvm { struct fltSemantics; +class Function; } namespace clang { @@ -1037,8 +1038,8 @@ class TargetInfo : public TransferrableTargetInfo, /// Returns target-specific min and max values VScale_Range. virtual std::optional> - getVScaleRange(const LangOptions &LangOpts, - bool IsArmStreamingFunction) const { + getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction, + llvm::Function *F = nullptr) const { return std::nullopt; } /// The __builtin_clz* and __builtin_ctz* built-in diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 3633bab6e0df9..045df30e6c77a 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Function.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/ARMTargetParserCommon.h" #include @@ -794,7 +795,8 @@ AArch64TargetInfo::getTargetBuiltins() const { std::optional> AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts, - bool IsArmStreamingFunction) const { + bool IsArmStreamingFunction, + llvm::Function *F) const { if (LangOpts.VScaleMin || LangOpts.VScaleMax) return std::pair( LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax); @@ -802,6 +804,13 @@ AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts, if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme"))) return std::pair(1, 16); + if (F && F->hasFnAttribute("target-features")) { + StringRef Str = F->getFnAttribute("target-features").getValueAsString(); + for (const auto &s : llvm::split(Str, ",")) { + if (s == "+sve") + return std::pair(1, 16); + } + } return std::nullopt; } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 2fab88cfca901..eb9fc24e51638 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -18,6 +18,10 @@ #include "llvm/TargetParser/AArch64TargetParser.h" #include +namespace llvm { +class Function; +} + namespace clang { namespace targets { @@ -187,8 +191,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { llvm::SmallVector getTargetBuiltins() const override; std::optional> - getVScaleRange(const LangOptions &LangOpts, - bool IsArmStreamingFunction) const override; + getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction, + llvm::Function *F = nullptr) const override; bool doesFeatureAffectCodeGen(StringRef Name) const override; bool validateCpuSupports(StringRef FeatureStr) const override; bool hasFeature(StringRef Feature) const override; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 390ef0f3ac884..95211cc7c5554 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -332,7 +332,8 @@ bool RISCVTargetInfo::initFeatureMap( std::optional> RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts, - bool IsArmStreamingFunction) const { + bool IsArmStreamingFunction, + llvm::Function *F) const { // RISCV::RVVBitsPerBlock is 64. unsigned VScaleMin = ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock; diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index c26aa19080162..b072ccfe28ac4 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -20,6 +20,10 @@ #include "llvm/TargetParser/Triple.h" #include +namespace llvm { +class Function; +} + namespace clang { namespace targets { @@ -99,8 +103,8 @@ class RISCVTargetInfo : public TargetInfo { const std::vector &FeaturesVec) const override; std::optional> - getVScaleRange(const LangOptions &LangOpts, - bool IsArmStreamingFunction) const override; + getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction, + llvm::Function *F = nullptr) const override; bool hasFeature(StringRef Feature) const override; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 4d29ceace646f..e9b1231e79289 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1117,7 +1117,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // Add vscale_range attribute if appropriate. std::optional> VScaleRange = getContext().getTargetInfo().getVScaleRange( - getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false); + getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false, CurFn); if (VScaleRange) { CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs( getLLVMContext(), VScaleRange->first, VScaleRange->second)); diff --git a/clang/test/CodeGen/AArch64/cpu-supports-target.c b/clang/test/CodeGen/AArch64/cpu-supports-target.c index a39ffd4e4a74d..9b551a0714e74 100644 --- a/clang/test/CodeGen/AArch64/cpu-supports-target.c +++ b/clang/test/CodeGen/AArch64/cpu-supports-target.c @@ -220,7 +220,7 @@ int test_versions() { //. // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } -// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } +// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} diff --git a/clang/test/CodeGen/AArch64/targetattr.c b/clang/test/CodeGen/AArch64/targetattr.c index cfe115bf97ed3..7052f2917cf71 100644 --- a/clang/test/CodeGen/AArch64/targetattr.c +++ b/clang/test/CodeGen/AArch64/targetattr.c @@ -201,21 +201,21 @@ void applem4() {} //. // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" } +// CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } +// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } +// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } +// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve-bitperm,+sve2,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" } // CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="cortex-a710" } // CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+ete,+fp-armv8,+neon,+trbe,+v8a" } // CHECK: attributes #[[ATTR7]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" } // CHECK: attributes #[[ATTR8]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+perfmon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.1a,+v8.2a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK: attributes #[[ATTR9]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" } -// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" } +// CHECK: attributes #[[ATTR9]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" } +// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" } // CHECK: attributes #[[ATTR11]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+ccpp,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" } -// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } +// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } // CHECK: attributes #[[ATTR13]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16" } -// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } +// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } +// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone vscale_range(1,16) "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } // CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" } // CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" } From 2e9664623a2e63661a8fb17b4e5e5c25d81f8d64 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Wed, 30 Apr 2025 09:39:11 +0000 Subject: [PATCH 2/4] use getFunctionFeatureMap --- clang/include/clang/Basic/TargetInfo.h | 3 +-- clang/lib/Basic/Targets/AArch64.cpp | 13 +++---------- clang/lib/Basic/Targets/AArch64.h | 6 +----- clang/lib/Basic/Targets/RISCV.cpp | 2 +- clang/lib/Basic/Targets/RISCV.h | 6 +----- clang/lib/CodeGen/CodeGenFunction.cpp | 10 ++++++++-- 6 files changed, 15 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index ba24f2aebcf5b..9377bf7c0f148 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -48,7 +48,6 @@ namespace llvm { struct fltSemantics; -class Function; } namespace clang { @@ -1039,7 +1038,7 @@ class TargetInfo : public TransferrableTargetInfo, /// Returns target-specific min and max values VScale_Range. virtual std::optional> getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction, - llvm::Function *F = nullptr) const { + llvm::StringMap *FeatureMap = nullptr) const { return std::nullopt; } /// The __builtin_clz* and __builtin_ctz* built-in diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 045df30e6c77a..9ac119fcf6eb7 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/Function.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/ARMTargetParserCommon.h" #include @@ -796,21 +795,15 @@ AArch64TargetInfo::getTargetBuiltins() const { std::optional> AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction, - llvm::Function *F) const { + llvm::StringMap *FeatureMap) const { if (LangOpts.VScaleMin || LangOpts.VScaleMax) return std::pair( LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax); - if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme"))) + if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")) || + (FeatureMap && FeatureMap->lookup("sve"))) return std::pair(1, 16); - if (F && F->hasFnAttribute("target-features")) { - StringRef Str = F->getFnAttribute("target-features").getValueAsString(); - for (const auto &s : llvm::split(Str, ",")) { - if (s == "+sve") - return std::pair(1, 16); - } - } return std::nullopt; } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index eb9fc24e51638..6eeac69af20df 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -18,10 +18,6 @@ #include "llvm/TargetParser/AArch64TargetParser.h" #include -namespace llvm { -class Function; -} - namespace clang { namespace targets { @@ -192,7 +188,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { std::optional> getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction, - llvm::Function *F = nullptr) const override; + llvm::StringMap *FeatureMap = nullptr) const override; bool doesFeatureAffectCodeGen(StringRef Name) const override; bool validateCpuSupports(StringRef FeatureStr) const override; bool hasFeature(StringRef Feature) const override; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 95211cc7c5554..a1a2437f288a0 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -333,7 +333,7 @@ bool RISCVTargetInfo::initFeatureMap( std::optional> RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction, - llvm::Function *F) const { + llvm::StringMap *FeatureMap) const { // RISCV::RVVBitsPerBlock is 64. unsigned VScaleMin = ISAInfo->getMinVLen() / llvm::RISCV::RVVBitsPerBlock; diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index b072ccfe28ac4..0b36c9d5d9cc8 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -20,10 +20,6 @@ #include "llvm/TargetParser/Triple.h" #include -namespace llvm { -class Function; -} - namespace clang { namespace targets { @@ -104,7 +100,7 @@ class RISCVTargetInfo : public TargetInfo { std::optional> getVScaleRange(const LangOptions &LangOpts, bool IsArmStreamingFunction, - llvm::Function *F = nullptr) const override; + llvm::StringMap *FeatureMap = nullptr) const override; bool hasFeature(StringRef Feature) const override; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index e9b1231e79289..10019443b2590 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1115,9 +1115,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, Fn->removeFnAttr("zero-call-used-regs"); // Add vscale_range attribute if appropriate. + llvm::StringMap FeatureMap; + bool IsArmStreaming = false; + if (FD) { + getContext().getFunctionFeatureMap(FeatureMap, FD); + IsArmStreaming = IsArmStreamingFunction(FD, true); + } std::optional> VScaleRange = - getContext().getTargetInfo().getVScaleRange( - getLangOpts(), FD ? IsArmStreamingFunction(FD, true) : false, CurFn); + getContext().getTargetInfo().getVScaleRange(getLangOpts(), IsArmStreaming, + &FeatureMap); if (VScaleRange) { CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs( getLLVMContext(), VScaleRange->first, VScaleRange->second)); From ad81417a96a71c2f476bbaa8d14ba06d42ea36f3 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Fri, 2 May 2025 09:26:03 +0000 Subject: [PATCH 3/4] add sme --- clang/lib/Basic/Targets/AArch64.cpp | 7 +++++-- clang/test/CodeGen/AArch64/targetattr.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 9ac119fcf6eb7..e1f6c7b834dc7 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -800,8 +800,11 @@ AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts, return std::pair( LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax); - if (hasFeature("sve") || (IsArmStreamingFunction && hasFeature("sme")) || - (FeatureMap && FeatureMap->lookup("sve"))) + if (hasFeature("sve") || (FeatureMap && (FeatureMap->lookup("sve")))) + return std::pair(1, 16); + + if (IsArmStreamingFunction && + (hasFeature("sme") || (FeatureMap && (FeatureMap->lookup("sme"))))) return std::pair(1, 16); return std::nullopt; diff --git a/clang/test/CodeGen/AArch64/targetattr.c b/clang/test/CodeGen/AArch64/targetattr.c index 7052f2917cf71..f1a7eda4a82d6 100644 --- a/clang/test/CodeGen/AArch64/targetattr.c +++ b/clang/test/CodeGen/AArch64/targetattr.c @@ -199,6 +199,22 @@ __attribute__((target("cpu=apple-m4"))) // void applem4() {} +__attribute__((target("+sme"))) +// CHECK-LABEL: define {{[^@]+}}@plussmestreaming +// CHECK-SAME: () #[[ATTR19:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +void plussmestreaming(void) __arm_streaming {} + +__attribute__((target("+sme"))) +// CHECK-LABEL: define {{[^@]+}}@plussmelocallystreaming +// CHECK-SAME: () #[[ATTR20:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +__arm_locally_streaming void plussmelocallystreaming(void) {} + //. // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone vscale_range(1,16) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } @@ -219,6 +235,8 @@ void applem4() {} // CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" } // CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" } +// CEHCK: attributes #[[ATTR19]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" } +// CEHCK: attributes #[[ATTR20]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} From 8338d13bef0964ecaf0acd25603917dea2d05e2b Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Fri, 2 May 2025 09:36:04 +0000 Subject: [PATCH 4/4] Fix typo in test --- clang/test/CodeGen/AArch64/targetattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/AArch64/targetattr.c b/clang/test/CodeGen/AArch64/targetattr.c index f1a7eda4a82d6..3a81dab31acc0 100644 --- a/clang/test/CodeGen/AArch64/targetattr.c +++ b/clang/test/CodeGen/AArch64/targetattr.c @@ -235,8 +235,8 @@ __arm_locally_streaming void plussmelocallystreaming(void) {} // CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" } // CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+spe-eef,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" } -// CEHCK: attributes #[[ATTR19]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" } -// CEHCK: attributes #[[ATTR20]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" } +// CHECK: attributes #[[ATTR19]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" } +// CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sme" } //. // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}