From 5df3e4bb39913552dc82b75286d1598cf6ce18be Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Mon, 28 Apr 2025 15:31:44 +0100 Subject: [PATCH] Reland [llvm] Add support for llvm IR atomicrmw fminimum/fmaximum instructions (#136759) This patch adds support for LLVM IR atomicrmw `fmaximum` and `fminimum` instructions. These mirror the `llvm.maximum.*` and `llvm.minimum.*` instructions, but are atomic and use IEEE754 2019 handling for NaNs, which is different to `fmax` and `fmin`. See: https://llvm.org/docs/LangRef.html#llvm-minimum-intrinsic for more details. Future changes will allow this LLVM IR to be lowered to specialised assembler instructions on suitable targets, such as AArch64. --- llvm/docs/GlobalISel/GenericOpcode.rst | 3 +- llvm/docs/LangRef.rst | 10 +- llvm/docs/ReleaseNotes.md | 3 + llvm/include/llvm-c/Core.h | 6 + llvm/include/llvm/AsmParser/LLToken.h | 2 + llvm/include/llvm/Bitcode/LLVMBitCodes.h | 4 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 36 +++ llvm/include/llvm/CodeGen/ISDOpcodes.h | 2 + llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 4 + llvm/include/llvm/IR/Instructions.h | 10 + llvm/include/llvm/Support/TargetOpcodes.def | 2 + llvm/include/llvm/Target/GenericOpcodes.td | 2 + .../Target/GlobalISel/SelectionDAGCompat.td | 2 + .../include/llvm/Target/TargetSelectionDAG.td | 4 + llvm/lib/AsmParser/LLLexer.cpp | 2 + llvm/lib/AsmParser/LLParser.cpp | 8 + llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 4 + llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 4 + llvm/lib/CodeGen/AtomicExpandPass.cpp | 4 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 6 + .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 16 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 + .../SelectionDAG/SelectionDAGBuilder.cpp | 6 + .../SelectionDAG/SelectionDAGDumper.cpp | 2 + llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 + llvm/lib/IR/Core.cpp | 8 + llvm/lib/IR/Instructions.cpp | 4 + .../AMDGPU/AMDGPULowerBufferFatPointers.cpp | 10 + llvm/lib/Transforms/Utils/LowerAtomic.cpp | 4 + llvm/test/Assembler/atomic.ll | 18 ++ llvm/test/Bitcode/compatibility.ll | 6 + .../GlobalISel/legalizer-info-validation.mir | 6 + .../GlobalISel/legalizer-info-validation.mir | 8 +- .../match-table-cxx.td | 132 ++++---- .../GlobalISelEmitter/GlobalISelEmitter.td | 2 +- .../AtomicExpand/AArch64/atomicrmw-fp.ll | 293 ++++++++++++++++++ llvm/test/Transforms/InstCombine/atomicrmw.ll | 72 +++++ .../Transforms/LowerAtomic/atomic-load.ll | 43 +++ 38 files changed, 679 insertions(+), 73 deletions(-) diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 5291b42d80870..987d19e2f6ce1 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -922,7 +922,8 @@ operands. G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_FADD, G_ATOMICRMW_FSUB, G_ATOMICRMW_FMAX, - G_ATOMICRMW_FMIN, G_ATOMICRMW_UINC_WRAP, + G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAXIMUM, + G_ATOMICRMW_FMINIMUM, G_ATOMICRMW_UINC_WRAP, G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_USUB_COND, G_ATOMICRMW_USUB_SAT diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1c1901be4d10e..c8cc38c23cff3 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -11597,6 +11597,8 @@ operation. The operation must be one of the following keywords: - fsub - fmax - fmin +- fmaximum +- fminimum - uinc_wrap - udec_wrap - usub_cond @@ -11606,7 +11608,7 @@ For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. For xchg, this may also be a floating point or a pointer type with the same size constraints -as integers. For fadd/fsub/fmax/fmin, this must be a floating-point +as integers. For fadd/fsub/fmax/fmin/fmaximum/fminimum, this must be a floating-point or fixed vector of floating-point type. The type of the '````' operand must be a pointer to that type. If the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not allowed to modify the @@ -11647,8 +11649,10 @@ operation argument: - umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned comparison) - fadd: ``*ptr = *ptr + val`` (using floating point arithmetic) - fsub: ``*ptr = *ptr - val`` (using floating point arithmetic) -- fmax: ``*ptr = maxnum(*ptr, val)`` (match the `llvm.maxnum.*`` intrinsic) -- fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic) +- fmax: ``*ptr = maxnum(*ptr, val)`` (match the `llvm.maxnum.*` intrinsic) +- fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*` intrinsic) +- fmaximum: ``*ptr = maximum(*ptr, val)`` (match the `llvm.maximum.*` intrinsic) +- fminimum: ``*ptr = minimum(*ptr, val)`` (match the `llvm.minimum.*` intrinsic) - uinc_wrap: ``*ptr = (*ptr u>= val) ? 0 : (*ptr + 1)`` (increment value with wraparound to zero when incremented above input value) - udec_wrap: ``*ptr = ((*ptr == 0) || (*ptr u> val)) ? val : (*ptr - 1)`` (decrement with wraparound to input value when decremented below zero). - usub_cond: ``*ptr = (*ptr u>= val) ? *ptr - val : *ptr`` (subtract only if no unsigned overflow). diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index aded1bd67ed76..3972e028abfd6 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -73,6 +73,9 @@ Changes to LLVM infrastructure themselves (i.e., the `TargetIntrinsicInfo` class). * Fix Microsoft demangling of string literals to be stricter (#GH129970)) +* Added the support for ``fmaximum`` and ``fminimum`` in ``atomicrmw`` instruction. The + comparison is expected to match the behavior of ``llvm.maximum.*`` and + ``llvm.minimum.*`` respectively. Changes to building LLVM ------------------------ diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 596531c93a949..6857944e6875f 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -393,6 +393,12 @@ typedef enum { LLVMAtomicRMWBinOpUSubCond, /** = G_ATOMICRMW_FMAXIMUM Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the floating point maximum of + /// \p Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWFMaximum(const DstOp &OldValRes, + const SrcOp &Addr, + const SrcOp &Val, + MachineMemOperand &MMO); + + /// Build and insert `OldValRes = G_ATOMICRMW_FMINIMUM Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the floating point minimum of + /// \p Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWFMinimum(const DstOp &OldValRes, + const SrcOp &Addr, + const SrcOp &Val, + MachineMemOperand &MMO); + /// Build and insert `OldValRes = G_ATOMICRMW_USUB_COND Addr, Val, MMO`. /// /// Atomically replace the value at \p Addr with the original value minus \p diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index ad8a95a353b56..80ef32aff62ae 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1367,6 +1367,8 @@ enum NodeType { ATOMIC_LOAD_FSUB, ATOMIC_LOAD_FMAX, ATOMIC_LOAD_FMIN, + ATOMIC_LOAD_FMAXIMUM, + ATOMIC_LOAD_FMINIMUM, ATOMIC_LOAD_UINC_WRAP, ATOMIC_LOAD_UDEC_WRAP, ATOMIC_LOAD_USUB_COND, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index c89eba1b30ed1..cfefceea8f0fe 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1517,6 +1517,8 @@ class MemSDNode : public SDNode { case ISD::ATOMIC_LOAD_FSUB: case ISD::ATOMIC_LOAD_FMAX: case ISD::ATOMIC_LOAD_FMIN: + case ISD::ATOMIC_LOAD_FMAXIMUM: + case ISD::ATOMIC_LOAD_FMINIMUM: case ISD::ATOMIC_LOAD_UINC_WRAP: case ISD::ATOMIC_LOAD_UDEC_WRAP: case ISD::ATOMIC_LOAD_USUB_COND: @@ -1603,6 +1605,8 @@ class AtomicSDNode : public MemSDNode { N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || N->getOpcode() == ISD::ATOMIC_LOAD_FMAX || N->getOpcode() == ISD::ATOMIC_LOAD_FMIN || + N->getOpcode() == ISD::ATOMIC_LOAD_FMAXIMUM || + N->getOpcode() == ISD::ATOMIC_LOAD_FMINIMUM || N->getOpcode() == ISD::ATOMIC_LOAD_UINC_WRAP || N->getOpcode() == ISD::ATOMIC_LOAD_UDEC_WRAP || N->getOpcode() == ISD::ATOMIC_LOAD_USUB_COND || diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 95f0ef875fc07..c164f76eb335b 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -751,6 +751,14 @@ class AtomicRMWInst : public Instruction { /// \p minnum matches the behavior of \p llvm.minnum.*. FMin, + /// *p = maximum(old, v) + /// \p maximum matches the behavior of \p llvm.maximum.*. + FMaximum, + + /// *p = minimum(old, v) + /// \p minimum matches the behavior of \p llvm.minimum.*. + FMinimum, + /// Increment one up to a maximum value. /// *p = (old u>= v) ? 0 : (old + 1) UIncWrap, @@ -812,6 +820,8 @@ class AtomicRMWInst : public Instruction { case AtomicRMWInst::FSub: case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: + case AtomicRMWInst::FMaximum: + case AtomicRMWInst::FMinimum: return true; default: return false; diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 5ef3707b81fe9..771c318da817d 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -426,6 +426,8 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_FADD) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAXIMUM) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMINIMUM) HANDLE_TARGET_OPCODE(G_ATOMICRMW_UINC_WRAP) HANDLE_TARGET_OPCODE(G_ATOMICRMW_UDEC_WRAP) HANDLE_TARGET_OPCODE(G_ATOMICRMW_USUB_COND) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index e134bab61bf63..c041c3cdfca5b 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1351,6 +1351,8 @@ def G_ATOMICRMW_FADD : G_ATOMICRMW_OP; def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP; def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP; +def G_ATOMICRMW_FMAXIMUM : G_ATOMICRMW_OP; +def G_ATOMICRMW_FMINIMUM : G_ATOMICRMW_OP; def G_ATOMICRMW_UINC_WRAP : G_ATOMICRMW_OP; def G_ATOMICRMW_UDEC_WRAP : G_ATOMICRMW_OP; def G_ATOMICRMW_USUB_COND : G_ATOMICRMW_OP; diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index c8c0eeb57099a..cb55a6fb112c5 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -266,6 +266,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 47ae1e1a0f615..41fed692c7025 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -781,6 +781,10 @@ def atomic_load_fmax : SDNode<"ISD::ATOMIC_LOAD_FMAX", SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fmin : SDNode<"ISD::ATOMIC_LOAD_FMIN", SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_fmaximum : SDNode<"ISD::ATOMIC_LOAD_FMAXIMUM", SDTFPAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_fminimum : SDNode<"ISD::ATOMIC_LOAD_FMINIMUM", SDTFPAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_uinc_wrap : SDNode<"ISD::ATOMIC_LOAD_UINC_WRAP", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_udec_wrap : SDNode<"ISD::ATOMIC_LOAD_UDEC_WRAP", SDTAtomic2, diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 4d25b12c9ab06..ce813e1d7b1c4 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -749,6 +749,8 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin); + KEYWORD(fmaximum); + KEYWORD(fminimum); KEYWORD(uinc_wrap); KEYWORD(udec_wrap); KEYWORD(usub_cond); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index dd37d9b9e3796..64645e18930cf 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -8628,6 +8628,14 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { Operation = AtomicRMWInst::FMin; IsFP = true; break; + case lltok::kw_fmaximum: + Operation = AtomicRMWInst::FMaximum; + IsFP = true; + break; + case lltok::kw_fminimum: + Operation = AtomicRMWInst::FMinimum; + IsFP = true; + break; } Lex.Lex(); // Eat the operation. diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 1d7aa189026a5..0e75c44333af5 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1356,6 +1356,10 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) { case bitc::RMW_FSUB: return AtomicRMWInst::FSub; case bitc::RMW_FMAX: return AtomicRMWInst::FMax; case bitc::RMW_FMIN: return AtomicRMWInst::FMin; + case bitc::RMW_FMAXIMUM: + return AtomicRMWInst::FMaximum; + case bitc::RMW_FMINIMUM: + return AtomicRMWInst::FMinimum; case bitc::RMW_UINC_WRAP: return AtomicRMWInst::UIncWrap; case bitc::RMW_UDEC_WRAP: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 5226db9db1e03..27ada0ddcd831 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -678,6 +678,10 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::FSub: return bitc::RMW_FSUB; case AtomicRMWInst::FMax: return bitc::RMW_FMAX; case AtomicRMWInst::FMin: return bitc::RMW_FMIN; + case AtomicRMWInst::FMaximum: + return bitc::RMW_FMAXIMUM; + case AtomicRMWInst::FMinimum: + return bitc::RMW_FMINIMUM; case AtomicRMWInst::UIncWrap: return bitc::RMW_UINC_WRAP; case AtomicRMWInst::UDecWrap: diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a3e9700fa3089..c376de877ac7d 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -931,6 +931,8 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, case AtomicRMWInst::FSub: case AtomicRMWInst::FMin: case AtomicRMWInst::FMax: + case AtomicRMWInst::FMaximum: + case AtomicRMWInst::FMinimum: case AtomicRMWInst::UIncWrap: case AtomicRMWInst::UDecWrap: case AtomicRMWInst::USubCond: @@ -1819,6 +1821,8 @@ static ArrayRef GetRMWLibcall(AtomicRMWInst::BinOp Op) { case AtomicRMWInst::UMin: case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: + case AtomicRMWInst::FMaximum: + case AtomicRMWInst::FMinimum: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: case AtomicRMWInst::UIncWrap: diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 5a4ee12d730cc..a4069ad896a92 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3435,6 +3435,12 @@ bool IRTranslator::translateAtomicRMW(const User &U, case AtomicRMWInst::FMin: Opcode = TargetOpcode::G_ATOMICRMW_FMIN; break; + case AtomicRMWInst::FMaximum: + Opcode = TargetOpcode::G_ATOMICRMW_FMAXIMUM; + break; + case AtomicRMWInst::FMinimum: + Opcode = TargetOpcode::G_ATOMICRMW_FMINIMUM; + break; case AtomicRMWInst::UIncWrap: Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP; break; diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 47e3351bb33d7..121d7e80251c7 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -1157,6 +1157,22 @@ MachineIRBuilder::buildAtomicRMWFMin(const DstOp &OldValRes, const SrcOp &Addr, MMO); } +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFMaximum(const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMAXIMUM, OldValRes, Addr, + Val, MMO); +} + +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFMinimum(const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMINIMUM, OldValRes, Addr, + Val, MMO); +} + MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { return buildInstr(TargetOpcode::G_FENCE) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8dab8b17b453a..d9d6d43c42430 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9081,6 +9081,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, Opcode == ISD::ATOMIC_LOAD_UMAX || Opcode == ISD::ATOMIC_LOAD_FADD || Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX || Opcode == ISD::ATOMIC_LOAD_FMIN || + Opcode == ISD::ATOMIC_LOAD_FMINIMUM || + Opcode == ISD::ATOMIC_LOAD_FMAXIMUM || Opcode == ISD::ATOMIC_LOAD_UINC_WRAP || Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP || Opcode == ISD::ATOMIC_LOAD_USUB_COND || diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4b05d89417d2c..881b1536a131f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5081,6 +5081,12 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; + case AtomicRMWInst::FMaximum: + NT = ISD::ATOMIC_LOAD_FMAXIMUM; + break; + case AtomicRMWInst::FMinimum: + NT = ISD::ATOMIC_LOAD_FMINIMUM; + break; case AtomicRMWInst::UIncWrap: NT = ISD::ATOMIC_LOAD_UINC_WRAP; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 8fcec6c6cd7c6..8faf97271d99e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -103,6 +103,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_LOAD_FSUB: return "AtomicLoadFSub"; case ISD::ATOMIC_LOAD_FMIN: return "AtomicLoadFMin"; case ISD::ATOMIC_LOAD_FMAX: return "AtomicLoadFMax"; + case ISD::ATOMIC_LOAD_FMINIMUM: return "AtomicLoadFMinimum"; + case ISD::ATOMIC_LOAD_FMAXIMUM: return "AtomicLoadFMaximum"; case ISD::ATOMIC_LOAD_UINC_WRAP: return "AtomicLoadUIncWrap"; case ISD::ATOMIC_LOAD_UDEC_WRAP: diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 0d18ffc7b511c..63d7171b06156 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -8788,6 +8788,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, case AtomicRMWInst::UMin: case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: + case AtomicRMWInst::FMaximum: + case AtomicRMWInst::FMinimum: case AtomicRMWInst::UIncWrap: case AtomicRMWInst::UDecWrap: case AtomicRMWInst::USubCond: diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 55be0dd6e523d..0e062ba819776 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -3955,6 +3955,10 @@ static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) { case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub; case LLVMAtomicRMWBinOpFMax: return AtomicRMWInst::FMax; case LLVMAtomicRMWBinOpFMin: return AtomicRMWInst::FMin; + case LLVMAtomicRMWBinOpFMaximum: + return AtomicRMWInst::FMaximum; + case LLVMAtomicRMWBinOpFMinimum: + return AtomicRMWInst::FMinimum; case LLVMAtomicRMWBinOpUIncWrap: return AtomicRMWInst::UIncWrap; case LLVMAtomicRMWBinOpUDecWrap: @@ -3985,6 +3989,10 @@ static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) { case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub; case AtomicRMWInst::FMax: return LLVMAtomicRMWBinOpFMax; case AtomicRMWInst::FMin: return LLVMAtomicRMWBinOpFMin; + case AtomicRMWInst::FMaximum: + return LLVMAtomicRMWBinOpFMaximum; + case AtomicRMWInst::FMinimum: + return LLVMAtomicRMWBinOpFMinimum; case AtomicRMWInst::UIncWrap: return LLVMAtomicRMWBinOpUIncWrap; case AtomicRMWInst::UDecWrap: diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index a37b2a2423391..f404e11b9c0f0 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1481,6 +1481,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) { return "fmax"; case AtomicRMWInst::FMin: return "fmin"; + case AtomicRMWInst::FMaximum: + return "fmaximum"; + case AtomicRMWInst::FMinimum: + return "fminimum"; case AtomicRMWInst::UIncWrap: return "uinc_wrap"; case AtomicRMWInst::UDecWrap: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 284f6ede2b85b..eb768ed9ad5a1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1749,6 +1749,16 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr, "buffer resources and should've been expanded away"); break; } + case AtomicRMWInst::FMaximum: { + report_fatal_error("atomic floating point fmaximum not supported for " + "buffer resources and should've been expanded away"); + break; + } + case AtomicRMWInst::FMinimum: { + report_fatal_error("atomic floating point fminimum not supported for " + "buffer resources and should've been expanded away"); + break; + } case AtomicRMWInst::Nand: report_fatal_error("atomic nand not supported for buffer resources and " "should've been expanded away"); diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp index b51c32485411d..e8b06415d4062 100644 --- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp +++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -88,6 +88,10 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op, return Builder.CreateMaxNum(Loaded, Val); case AtomicRMWInst::FMin: return Builder.CreateMinNum(Loaded, Val); + case AtomicRMWInst::FMaximum: + return Builder.CreateMaximum(Loaded, Val); + case AtomicRMWInst::FMinimum: + return Builder.CreateMinimum(Loaded, Val); case AtomicRMWInst::UIncWrap: { Constant *One = ConstantInt::get(Loaded->getType(), 1); Value *Inc = Builder.CreateAdd(Loaded, One); diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll index a44dcccc16bef..39f33f9fdcacb 100644 --- a/llvm/test/Assembler/atomic.ll +++ b/llvm/test/Assembler/atomic.ll @@ -80,6 +80,18 @@ define void @fp_atomics(ptr %x) { ; CHECK: atomicrmw volatile fmin ptr %x, float 1.000000e+00 seq_cst atomicrmw volatile fmin ptr %x, float 1.0 seq_cst + ; CHECK: atomicrmw fmaximum ptr %x, float 1.000000e+00 seq_cst + atomicrmw fmaximum ptr %x, float 1.0 seq_cst + + ; CHECK: atomicrmw volatile fmaximum ptr %x, float 1.000000e+00 seq_cst + atomicrmw volatile fmaximum ptr %x, float 1.0 seq_cst + + ; CHECK: atomicrmw fminimum ptr %x, float 1.000000e+00 seq_cst + atomicrmw fminimum ptr %x, float 1.0 seq_cst + + ; CHECK: atomicrmw volatile fminimum ptr %x, float 1.000000e+00 seq_cst + atomicrmw volatile fminimum ptr %x, float 1.0 seq_cst + ret void } @@ -96,5 +108,11 @@ define void @fp_vector_atomicrmw(ptr %x, <2 x half> %val) { ; CHECK: %atomic.fmin = atomicrmw fmin ptr %x, <2 x half> %val seq_cst %atomic.fmin = atomicrmw fmin ptr %x, <2 x half> %val seq_cst + ; CHECK: %atomic.fmaximum = atomicrmw fmaximum ptr %x, <2 x half> %val seq_cst + %atomic.fmaximum = atomicrmw fmaximum ptr %x, <2 x half> %val seq_cst + + ; CHECK: %atomic.fminimum = atomicrmw fminimum ptr %x, <2 x half> %val seq_cst + %atomic.fminimum = atomicrmw fminimum ptr %x, <2 x half> %val seq_cst + ret void } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index d9e594abcd50c..9cf3fdbe550b4 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -923,6 +923,12 @@ define void @fp_atomics(ptr %word) { ; CHECK: %atomicrmw.fmin = atomicrmw fmin ptr %word, float 1.000000e+00 monotonic %atomicrmw.fmin = atomicrmw fmin ptr %word, float 1.0 monotonic +; CHECK: %atomicrmw.fmaximum = atomicrmw fmaximum ptr %word, float 1.000000e+00 monotonic + %atomicrmw.fmaximum = atomicrmw fmaximum ptr %word, float 1.0 monotonic + +; CHECK: %atomicrmw.fminimum = atomicrmw fminimum ptr %word, float 1.000000e+00 monotonic + %atomicrmw.fminimum = atomicrmw fminimum ptr %word, float 1.0 monotonic + ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 0260e65520774..7c5af4b7baa36 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -267,6 +267,12 @@ # DEBUG-NEXT: G_ATOMICRMW_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ATOMICRMW_FMAXIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ATOMICRMW_FMINIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_ATOMICRMW_UINC_WRAP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index dbc13840a0265..dd05eacef2408 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -263,6 +263,12 @@ # DEBUG-NEXT: G_ATOMICRMW_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ATOMICRMW_FMAXIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ATOMICRMW_FMINIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_ATOMICRMW_UINC_WRAP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined @@ -579,7 +585,7 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FMINIMUM (opcode {{[0-9]+}}): 1 type index -# DEBUG-NEXT: .. opcode 212 is aliased to 213 +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FMAXIMUM (opcode {{[0-9]+}}): 1 type index diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td index 2cc0700b66fee..dfb2c173f2c88 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-cxx.td @@ -96,71 +96,71 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(99), GIMT_Encode2(205), /*)*//*default:*//*Label 5*/ GIMT_Encode4(500), -// CHECK-NEXT: /* 10 */ /*TargetOpcode::G_STORE*//*Label 0*/ GIMT_Encode4(434), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /* 162 */ /*TargetOpcode::G_SEXT*//*Label 1*/ GIMT_Encode4(452), GIMT_Encode4(0), -// CHECK-NEXT: /* 170 */ /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(464), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /* 394 */ /*TargetOpcode::G_FNEG*//*Label 3*/ GIMT_Encode4(476), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /* 430 */ /*TargetOpcode::G_FABS*//*Label 4*/ GIMT_Encode4(488), -// CHECK-NEXT: /* 434 */ // Label 0: @434 -// CHECK-NEXT: /* 434 */ GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(451), // Rule ID 2 // -// CHECK-NEXT: /* 439 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), -// CHECK-NEXT: /* 442 */ // MIs[0] x -// CHECK-NEXT: /* 442 */ // No operand predicates -// CHECK-NEXT: /* 442 */ // MIs[0] y -// CHECK-NEXT: /* 442 */ // No operand predicates -// CHECK-NEXT: /* 442 */ GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIMT_Encode2(GICXXPred_MI_Predicate_GICombiner0), -// CHECK-NEXT: /* 446 */ GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIMT_Encode2(GICXXPred_MI_Predicate_GICombiner1), -// CHECK-NEXT: /* 450 */ // Combiner Rule #2: TwoMatchNoApply -// CHECK-NEXT: /* 450 */ GIR_EraseRootFromParent_Done, -// CHECK-NEXT: /* 451 */ // Label 6: @451 -// CHECK-NEXT: /* 451 */ GIM_Reject, -// CHECK-NEXT: /* 452 */ // Label 1: @452 -// CHECK-NEXT: /* 452 */ GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(463), // Rule ID 3 // -// CHECK-NEXT: /* 457 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled), -// CHECK-NEXT: /* 460 */ // MIs[0] a -// CHECK-NEXT: /* 460 */ // No operand predicates -// CHECK-NEXT: /* 460 */ // MIs[0] y -// CHECK-NEXT: /* 460 */ // No operand predicates -// CHECK-NEXT: /* 460 */ // Combiner Rule #3: NoMatchTwoApply -// CHECK-NEXT: /* 460 */ GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner2), -// CHECK-NEXT: /* 463 */ // Label 7: @463 -// CHECK-NEXT: /* 463 */ GIM_Reject, -// CHECK-NEXT: /* 464 */ // Label 2: @464 -// CHECK-NEXT: /* 464 */ GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(475), // Rule ID 4 // -// CHECK-NEXT: /* 469 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled), -// CHECK-NEXT: /* 472 */ // MIs[0] a -// CHECK-NEXT: /* 472 */ // No operand predicates -// CHECK-NEXT: /* 472 */ // MIs[0] y -// CHECK-NEXT: /* 472 */ // No operand predicates -// CHECK-NEXT: /* 472 */ // Combiner Rule #4: CombineCXXOrder -// CHECK-NEXT: /* 472 */ GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner3), -// CHECK-NEXT: /* 475 */ // Label 8: @475 -// CHECK-NEXT: /* 475 */ GIM_Reject, -// CHECK-NEXT: /* 476 */ // Label 3: @476 -// CHECK-NEXT: /* 476 */ GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(487), // Rule ID 1 // -// CHECK-NEXT: /* 481 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), -// CHECK-NEXT: /* 484 */ // MIs[0] a -// CHECK-NEXT: /* 484 */ // No operand predicates -// CHECK-NEXT: /* 484 */ // MIs[0] b -// CHECK-NEXT: /* 484 */ // No operand predicates -// CHECK-NEXT: /* 484 */ // Combiner Rule #1: TwoMatchTwoApply -// CHECK-NEXT: /* 484 */ GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner1), -// CHECK-NEXT: /* 487 */ // Label 9: @487 -// CHECK-NEXT: /* 487 */ GIM_Reject, -// CHECK-NEXT: /* 488 */ // Label 4: @488 -// CHECK-NEXT: /* 488 */ GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(499), // Rule ID 0 // -// CHECK-NEXT: /* 493 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), -// CHECK-NEXT: /* 496 */ // MIs[0] a -// CHECK-NEXT: /* 496 */ // No operand predicates -// CHECK-NEXT: /* 496 */ // MIs[0] b -// CHECK-NEXT: /* 496 */ // No operand predicates -// CHECK-NEXT: /* 496 */ // Combiner Rule #0: OneMatchOneApply -// CHECK-NEXT: /* 496 */ GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), -// CHECK-NEXT: /* 499 */ // Label 10: @499 -// CHECK-NEXT: /* 499 */ GIM_Reject, -// CHECK-NEXT: /* 500 */ // Label 5: @500 -// CHECK-NEXT: /* 500 */ GIM_Reject, -// CHECK-NEXT: /* 501 */ }; // Size: 501 bytes +// CHECK-NEXT: /* 0 */ GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(99), GIMT_Encode2(207), /*)*//*default:*//*Label 5*/ GIMT_Encode4(508), +// CHECK-NEXT: /* 10 */ /*TargetOpcode::G_STORE*//*Label 0*/ GIMT_Encode4(442), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /* 170 */ /*TargetOpcode::G_SEXT*//*Label 1*/ GIMT_Encode4(460), GIMT_Encode4(0), +// CHECK-NEXT: /* 178 */ /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(472), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /* 402 */ /*TargetOpcode::G_FNEG*//*Label 3*/ GIMT_Encode4(484), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /* 438 */ /*TargetOpcode::G_FABS*//*Label 4*/ GIMT_Encode4(496), +// CHECK-NEXT: /* 442 */ // Label 0: @442 +// CHECK-NEXT: /* 442 */ GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(459), // Rule ID 2 // +// CHECK-NEXT: /* 447 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), +// CHECK-NEXT: /* 450 */ // MIs[0] x +// CHECK-NEXT: /* 450 */ // No operand predicates +// CHECK-NEXT: /* 450 */ // MIs[0] y +// CHECK-NEXT: /* 450 */ // No operand predicates +// CHECK-NEXT: /* 450 */ GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIMT_Encode2(GICXXPred_MI_Predicate_GICombiner0), +// CHECK-NEXT: /* 454 */ GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIMT_Encode2(GICXXPred_MI_Predicate_GICombiner1), +// CHECK-NEXT: /* 458 */ // Combiner Rule #2: TwoMatchNoApply +// CHECK-NEXT: /* 458 */ GIR_EraseRootFromParent_Done, +// CHECK-NEXT: /* 459 */ // Label 6: @459 +// CHECK-NEXT: /* 459 */ GIM_Reject, +// CHECK-NEXT: /* 460 */ // Label 1: @460 +// CHECK-NEXT: /* 460 */ GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(471), // Rule ID 3 // +// CHECK-NEXT: /* 465 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled), +// CHECK-NEXT: /* 468 */ // MIs[0] a +// CHECK-NEXT: /* 468 */ // No operand predicates +// CHECK-NEXT: /* 468 */ // MIs[0] y +// CHECK-NEXT: /* 468 */ // No operand predicates +// CHECK-NEXT: /* 468 */ // Combiner Rule #3: NoMatchTwoApply +// CHECK-NEXT: /* 468 */ GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner2), +// CHECK-NEXT: /* 471 */ // Label 7: @471 +// CHECK-NEXT: /* 471 */ GIM_Reject, +// CHECK-NEXT: /* 472 */ // Label 2: @472 +// CHECK-NEXT: /* 472 */ GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(483), // Rule ID 4 // +// CHECK-NEXT: /* 477 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled), +// CHECK-NEXT: /* 480 */ // MIs[0] a +// CHECK-NEXT: /* 480 */ // No operand predicates +// CHECK-NEXT: /* 480 */ // MIs[0] y +// CHECK-NEXT: /* 480 */ // No operand predicates +// CHECK-NEXT: /* 480 */ // Combiner Rule #4: CombineCXXOrder +// CHECK-NEXT: /* 480 */ GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner3), +// CHECK-NEXT: /* 483 */ // Label 8: @483 +// CHECK-NEXT: /* 483 */ GIM_Reject, +// CHECK-NEXT: /* 484 */ // Label 3: @484 +// CHECK-NEXT: /* 484 */ GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(495), // Rule ID 1 // +// CHECK-NEXT: /* 489 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), +// CHECK-NEXT: /* 492 */ // MIs[0] a +// CHECK-NEXT: /* 492 */ // No operand predicates +// CHECK-NEXT: /* 492 */ // MIs[0] b +// CHECK-NEXT: /* 492 */ // No operand predicates +// CHECK-NEXT: /* 492 */ // Combiner Rule #1: TwoMatchTwoApply +// CHECK-NEXT: /* 492 */ GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner1), +// CHECK-NEXT: /* 495 */ // Label 9: @495 +// CHECK-NEXT: /* 495 */ GIM_Reject, +// CHECK-NEXT: /* 496 */ // Label 4: @496 +// CHECK-NEXT: /* 496 */ GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(507), // Rule ID 0 // +// CHECK-NEXT: /* 501 */ GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), +// CHECK-NEXT: /* 504 */ // MIs[0] a +// CHECK-NEXT: /* 504 */ // No operand predicates +// CHECK-NEXT: /* 504 */ // MIs[0] b +// CHECK-NEXT: /* 504 */ // No operand predicates +// CHECK-NEXT: /* 504 */ // Combiner Rule #0: OneMatchOneApply +// CHECK-NEXT: /* 504 */ GIR_DoneWithCustomAction, /*Fn*/GIMT_Encode2(GICXXCustomAction_GICombiner0), +// CHECK-NEXT: /* 507 */ // Label 10: @507 +// CHECK-NEXT: /* 507 */ GIM_Reject, +// CHECK-NEXT: /* 508 */ // Label 5: @508 +// CHECK-NEXT: /* 508 */ GIM_Reject, +// CHECK-NEXT: /* 509 */ }; // Size: 509 bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td index b7132bf2bcd8c..22ec34026768c 100644 --- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td @@ -513,7 +513,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3), // R00O-NEXT: GIM_Reject, // R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]] // R00O-NEXT: GIM_Reject, -// R00O-NEXT: }; // Size: 1840 bytes +// R00O-NEXT: }; // Size: 1848 bytes def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4), [(set GPR32:$dst, diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll index fa2ad60db7c29..8ffacb9bdd5f6 100644 --- a/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll @@ -43,3 +43,296 @@ define float @test_atomicrmw_fsub_f32(ptr %ptr, float %value) { ret float %res } +define float @atomicrmw_fmin_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fmin_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minnum.f32(float [[LOADED]], float [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fmin ptr %ptr, float %value seq_cst + ret float %res +} + +define float @atomicrmw_fmax_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fmax_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maxnum.f32(float [[LOADED]], float [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fmax ptr %ptr, float %value seq_cst + ret float %res +} + +define double @atomicrmw_fmin_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fmin_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP6]] +; + %res = atomicrmw fmin ptr %ptr, double %value seq_cst + ret double %res +} + +define double @atomicrmw_fmax_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fmax_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP6]] +; + %res = atomicrmw fmax ptr %ptr, double %value seq_cst + ret double %res +} + +define float @atomicrmw_fminimum_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fminimum_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.minimum.f32(float [[LOADED]], float [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fminimum ptr %ptr, float %value seq_cst + ret float %res +} + +define float @atomicrmw_fmaximum_float(ptr %ptr, float %value) { +; CHECK-LABEL: @atomicrmw_fmaximum_float( +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.maximum.f32(float [[LOADED]], float [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fmaximum ptr %ptr, float %value seq_cst + ret float %res +} + +define double @atomicrmw_fminimum_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fminimum_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.minimum.f64(double [[LOADED]], double [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP6]] +; + %res = atomicrmw fminimum ptr %ptr, double %value seq_cst + ret double %res +} + +define double @atomicrmw_fmaximum_double(ptr %ptr, double %value) { +; CHECK-LABEL: @atomicrmw_fmaximum_double( +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[PTR:%.*]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.maximum.f64(double [[LOADED]], double [[VALUE:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i64 [[TMP4]], i64 [[TMP3]] seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret double [[TMP6]] +; + %res = atomicrmw fmaximum ptr %ptr, double %value seq_cst + ret double %res +} + +define bfloat @atomicrmw_fmaximum_bfloat(ptr %ptr, bfloat %val) { +; CHECK-LABEL: @atomicrmw_fmaximum_bfloat( +; CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi bfloat [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call bfloat @llvm.maximum.bf16(bfloat [[LOADED]], bfloat [[VAL:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat [[TMP2]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast bfloat [[LOADED]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst, align 2 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to bfloat +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret bfloat [[TMP6]] +; + %res = atomicrmw fmaximum ptr %ptr, bfloat %val seq_cst + ret bfloat %res +} + +define half @atomicrmw_fmaximum_half(ptr %ptr, half %val) { +; CHECK-LABEL: @atomicrmw_fmaximum_half( +; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi half [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.maximum.f16(half [[LOADED]], half [[VAL:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[LOADED]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst, align 2 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to half +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret half [[TMP6]] +; + %res = atomicrmw fmaximum ptr %ptr, half %val seq_cst + ret half %res +} + +define <2 x half> @atomicrmw_fmaximum_2_x_half(ptr %ptr, <2 x half> %val) { +; CHECK-LABEL: @atomicrmw_fmaximum_2_x_half( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.maximum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VAL:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[TMP6]] +; + %res = atomicrmw fmaximum ptr %ptr, <2 x half> %val seq_cst + ret <2 x half> %res +} + +define bfloat @atomicrmw_fminimum_bfloat(ptr %ptr, bfloat %val) { +; CHECK-LABEL: @atomicrmw_fminimum_bfloat( +; CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi bfloat [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call bfloat @llvm.minimum.bf16(bfloat [[LOADED]], bfloat [[VAL:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat [[TMP2]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast bfloat [[LOADED]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst, align 2 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to bfloat +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret bfloat [[TMP6]] +; + %res = atomicrmw fminimum ptr %ptr, bfloat %val seq_cst + ret bfloat %res +} + +define half @atomicrmw_fminimum_half(ptr %ptr, half %val) { +; CHECK-LABEL: @atomicrmw_fminimum_half( +; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi half [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.minimum.f16(half [[LOADED]], half [[VAL:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[LOADED]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst, align 2 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to half +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret half [[TMP6]] +; + %res = atomicrmw fminimum ptr %ptr, half %val seq_cst + ret half %res +} + +define <2 x half> @atomicrmw_fminimum_2_x_half(ptr %ptr, <2 x half> %val) { +; CHECK-LABEL: @atomicrmw_fminimum_2_x_half( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.minimum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VAL:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[TMP6]] +; + %res = atomicrmw fminimum ptr %ptr, <2 x half> %val seq_cst + ret <2 x half> %res +} diff --git a/llvm/test/Transforms/InstCombine/atomicrmw.ll b/llvm/test/Transforms/InstCombine/atomicrmw.ll index 763b234698cc5..ca5ffd110ad61 100644 --- a/llvm/test/Transforms/InstCombine/atomicrmw.ll +++ b/llvm/test/Transforms/InstCombine/atomicrmw.ll @@ -397,6 +397,42 @@ define double @no_sat_fmin_inf(ptr %addr) { ret double %res } +define double @sat_fmaximum_inf(ptr %addr) { +; CHECK-LABEL: @sat_fmaximum_inf( +; CHECK-NEXT: [[RES:%.*]] = atomicrmw fmaximum ptr [[ADDR:%.*]], double 0x7FF0000000000000 monotonic, align 8 +; CHECK-NEXT: ret double [[RES]] +; + %res = atomicrmw fmaximum ptr %addr, double 0x7FF0000000000000 monotonic + ret double %res +} + +define double @no_sat_fmaximum_inf(ptr %addr) { +; CHECK-LABEL: @no_sat_fmaximum_inf( +; CHECK-NEXT: [[RES:%.*]] = atomicrmw fmaximum ptr [[ADDR:%.*]], double 1.000000e-01 monotonic, align 8 +; CHECK-NEXT: ret double [[RES]] +; + %res = atomicrmw fmaximum ptr %addr, double 1.000000e-01 monotonic + ret double %res +} + +define double @sat_fminimum_inf(ptr %addr) { +; CHECK-LABEL: @sat_fminimum_inf( +; CHECK-NEXT: [[RES:%.*]] = atomicrmw fminimum ptr [[ADDR:%.*]], double 0xFFF0000000000000 monotonic, align 8 +; CHECK-NEXT: ret double [[RES]] +; + %res = atomicrmw fminimum ptr %addr, double 0xFFF0000000000000 monotonic + ret double %res +} + +define double @no_sat_fminimum_inf(ptr %addr) { +; CHECK-LABEL: @no_sat_fminimum_inf( +; CHECK-NEXT: [[RES:%.*]] = atomicrmw fminimum ptr [[ADDR:%.*]], double 1.000000e-01 monotonic, align 8 +; CHECK-NEXT: ret double [[RES]] +; + %res = atomicrmw fminimum ptr %addr, double 1.000000e-01 monotonic + ret double %res +} + ; Idempotent atomicrmw are still canonicalized. define i32 @atomic_add_zero_preserve_md(ptr %addr) { ; CHECK-LABEL: @atomic_add_zero_preserve_md( @@ -788,5 +824,41 @@ define double @no_sat_fmin_inf_preserve_md(ptr %addr) { ret double %res } +define double @sat_fmaximum_inf_preserve_md(ptr %addr) { +; CHECK-LABEL: @sat_fmaximum_inf_preserve_md( +; CHECK-NEXT: [[RES:%.*]] = atomicrmw fmaximum ptr [[ADDR:%.*]], double 0x7FF0000000000000 syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] +; CHECK-NEXT: ret double [[RES]] +; + %res = atomicrmw fmaximum ptr %addr, double 0x7FF0000000000000 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 + ret double %res +} + +define double @no_sat_fmaximum_inf_preserve_md(ptr %addr) { +; CHECK-LABEL: @no_sat_fmaximum_inf_preserve_md( +; CHECK-NEXT: [[RES:%.*]] = atomicrmw fmaximum ptr [[ADDR:%.*]], double 1.000000e-01 syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] +; CHECK-NEXT: ret double [[RES]] +; + %res = atomicrmw fmaximum ptr %addr, double 1.000000e-01 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 + ret double %res +} + +define double @sat_fminimum_inf_preserve_md(ptr %addr) { +; CHECK-LABEL: @sat_fminimum_inf_preserve_md( +; CHECK-NEXT: [[RES:%.*]] = atomicrmw fminimum ptr [[ADDR:%.*]], double 0xFFF0000000000000 syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] +; CHECK-NEXT: ret double [[RES]] +; + %res = atomicrmw fminimum ptr %addr, double 0xFFF0000000000000 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 + ret double %res +} + +define double @no_sat_fminimum_inf_preserve_md(ptr %addr) { +; CHECK-LABEL: @no_sat_fminimum_inf_preserve_md( +; CHECK-NEXT: [[RES:%.*]] = atomicrmw fminimum ptr [[ADDR:%.*]], double 1.000000e-01 syncscope("agent") monotonic, align 8, !mmra [[META0]], !amdgpu.no.fine.grained.host.memory [[META1]], !amdgpu.no.remote.memory.access [[META1]] +; CHECK-NEXT: ret double [[RES]] +; + %res = atomicrmw fminimum ptr %addr, double 1.000000e-01 syncscope("agent") monotonic, !amdgpu.no.fine.grained.host.memory !0, !amdgpu.no.remote.memory.access !0, !mmra !1 + ret double %res +} + !0 = !{} !1 = !{!"foo", !"bar"} diff --git a/llvm/test/Transforms/LowerAtomic/atomic-load.ll b/llvm/test/Transforms/LowerAtomic/atomic-load.ll index 4ab8fbb86cb60..cf41d14547d2e 100644 --- a/llvm/test/Transforms/LowerAtomic/atomic-load.ll +++ b/llvm/test/Transforms/LowerAtomic/atomic-load.ll @@ -78,3 +78,46 @@ define float @fmin() { ret float %j ; CHECK: ret float [[INST]] } + +define float @fmaximum() { +; CHECK-LABEL: @fmaximum( + %i = alloca float + %j = atomicrmw fmaximum ptr %i, float 42.0 monotonic +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: call float @llvm.maximum.f32 +; CHECK-NEXT: store + ret float %j +; CHECK: ret float [[INST]] +} + +define float @fminimum() { +; CHECK-LABEL: @fminimum( + %i = alloca float + %j = atomicrmw fminimum ptr %i, float 42.0 monotonic +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: call float @llvm.minimum.f32 +; CHECK-NEXT: store + ret float %j +; CHECK: ret float [[INST]] +} + +define <2 x half> @fmaximum_2xhalf(<2 x half> %val) { +; CHECK-LABEL: @fmaximum_2xhalf( + %i = alloca <2 x half>, align 4 + %j = atomicrmw fmaximum ptr %i, <2 x half> %val monotonic +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: call <2 x half> @llvm.maximum.v2f16 +; CHECK-NEXT: store + ret <2 x half> %j +} + +define <2 x half> @fminimum_2xhalf(<2 x half> %val) { +; CHECK-LABEL: @fminimum_2xhalf( + %i = alloca <2 x half>, align 4 + %j = atomicrmw fminimum ptr %i, <2 x half> %val monotonic +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: call <2 x half> @llvm.minimum.v2f16 +; CHECK-NEXT: store + ret <2 x half> %j +} +