diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -727,7 +727,10 @@ Generic atomic cmpxchg. Expects a MachineMemOperand in addition to explicit operands. -G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_NAND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_FADD, G_ATOMICRMW_FSUB +G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, +G_ATOMICRMW_NAND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MAX, +G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_FADD, +G_ATOMICRMW_FSUB, G_ATOMICRMW_FMAX, G_ATOMICRMW_FMIN ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Generic atomicrmw. Expects a MachineMemOperand in addition to explicit diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1577,9 +1577,9 @@ Specify the desired alignment, which must be a power of two, in parentheses. ``"alloc-family"="FAMILY"`` - This indicates which "family" an allocator function is part of. To avoid - collisions, the family name should match the mangled name of the primary - allocator function, that is "malloc" for malloc/calloc/realloc/free, + This indicates which "family" an allocator function is part of. To avoid + collisions, the family name should match the mangled name of the primary + allocator function, that is "malloc" for malloc/calloc/realloc/free, "_Znwm" for ``::operator::new`` and ``::operator::delete``, and "_ZnwmSt11align_val_t" for aligned ``::operator::new`` and ``::operator::delete``. Matching malloc/realloc/free calls within a family @@ -1595,13 +1595,13 @@ will match that of the ``allocptr`` argument and the ``allocptr`` argument is invalidated, even if the function returns the same address. * "free": the function frees the block of memory specified by ``allocptr``. - * "uninitialized": Any newly-allocated memory (either a new block from + * "uninitialized": Any newly-allocated memory (either a new block from a "alloc" function or the enlarged capacity from a "realloc" function) will be uninitialized. * "zeroed": Any newly-allocated memory (either a new block from a "alloc" function or the enlarged capacity from a "realloc" function) will be zeroed. - * "aligned": the function returns memory aligned according to the + * "aligned": the function returns memory aligned according to the ``allocalign`` parameter. The first three options are mutually exclusive, and the remaining options @@ -10289,12 +10289,14 @@ - umin - fadd - fsub +- fmax +- fmin For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. For xchg, this may also be a floating point or a pointer type with the same size constraints -as integers. For fadd/fsub, this must be a floating point type. The +as integers. For fadd/fsub/fmax/fmin, this must be a floating point type. The type of the '````' operand must be a pointer to that type. If the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not allowed to modify the number or order of execution of this @@ -10331,6 +10333,8 @@ - umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned comparison) - fadd: ``*ptr = *ptr + val`` (using floating point arithmetic) - fsub: ``*ptr = *ptr - val`` (using floating point arithmetic) +- fmax: ``*ptr = llvm.maxnum(*ptr, val)`` (match the `llvm.maxnum.*`` intrinsic) +- fmin: ``*ptr = llvm.minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic) Example: """""""" @@ -20491,7 +20495,7 @@ The '``llvm.vp.fpext``' intrinsic extends the ``value`` from a smaller :ref:`floating-point ` type to a larger :ref:`floating-point ` type. The '``llvm.vp.fpext``' cannot be used to make a -*no-op cast* because it always changes bits. Use ``bitcast`` to make a +*no-op cast* because it always changes bits. Use ``bitcast`` to make a *no-op cast* for a floating-point cast. The conversion is performed on lane positions below the explicit vector length and where the vector mask is true. Masked-off lanes are undefined. diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -381,8 +381,14 @@ the old one */ LLVMAtomicRMWBinOpFAdd, /**< Add a floating point value and return the old one */ - LLVMAtomicRMWBinOpFSub /**< Subtract a floating point value and return the - old one */ + LLVMAtomicRMWBinOpFSub, /**< Subtract a floating point value and return the + old one */ + LLVMAtomicRMWBinOpFMax, /**< Sets the value if it's greater than the + original using an floating point comparison and + return the old one */ + LLVMAtomicRMWBinOpFMin, /**< Sets the value if it's smaller than the + original using an floating point comparison and + return the old one */ } LLVMAtomicRMWBinOp; typedef enum { diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -221,6 +221,8 @@ kw_min, kw_umax, kw_umin, + kw_fmax, + kw_fmin, // Instruction Opcodes (Opcode in UIntVal). kw_fneg, diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -458,7 +458,9 @@ RMW_UMAX = 9, RMW_UMIN = 10, RMW_FADD = 11, - RMW_FSUB = 12 + RMW_FSUB = 12, + RMW_FMAX = 13, + RMW_FMIN = 14 }; /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1417,6 +1417,40 @@ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, MachineMemOperand &MMO); + /// Build and insert `OldValRes = G_ATOMICRMW_FMAX Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the floating point maximum of + /// \p Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWFMax( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + + /// Build and insert `OldValRes = G_ATOMICRMW_FMIN Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the floating point minimum of + /// \p Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWFMin( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + /// Build and insert `G_FENCE Ordering, Scope`. MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope); diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1188,6 +1188,8 @@ ATOMIC_LOAD_UMAX, ATOMIC_LOAD_FADD, ATOMIC_LOAD_FSUB, + ATOMIC_LOAD_FMAX, + ATOMIC_LOAD_FMIN, // Masked load and store - consecutive vector load and store operations // with additional mask operand that prevents memory accesses to the diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1403,6 +1403,8 @@ case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_LOAD_FADD: case ISD::ATOMIC_LOAD_FSUB: + case ISD::ATOMIC_LOAD_FMAX: + case ISD::ATOMIC_LOAD_FMIN: case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: case ISD::MLOAD: @@ -1468,6 +1470,8 @@ N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || N->getOpcode() == ISD::ATOMIC_LOAD_FADD || N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || + N->getOpcode() == ISD::ATOMIC_LOAD_FMAX || + N->getOpcode() == ISD::ATOMIC_LOAD_FMIN || N->getOpcode() == ISD::ATOMIC_LOAD || N->getOpcode() == ISD::ATOMIC_STORE; } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -766,8 +766,16 @@ /// *p = old - v FSub, + /// *p = maxnum(old, v) + /// \p maxnum matches the behavior of \p llvm.maxnum.*. + FMax, + + /// *p = llvm.minnum(old, v) + /// \p minnum matches the behavior of \p llvm.minnum.*. + FMin, + FIRST_BINOP = Xchg, - LAST_BINOP = FSub, + LAST_BINOP = FMin, BAD_BINOP }; @@ -810,6 +818,8 @@ switch (Op) { case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: return true; default: return false; diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -384,6 +384,8 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FADD) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN) // Generic atomic fence HANDLE_TARGET_OPCODE(G_FENCE) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1126,6 +1126,8 @@ def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP; def G_ATOMICRMW_FADD : G_ATOMICRMW_OP; def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; +def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP; +def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP; def G_FENCE : GenericInstruction { let OutOperandList = (outs); diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -206,6 +206,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -651,6 +651,10 @@ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fsub : SDNode<"ISD::ATOMIC_LOAD_FSUB" , SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_fmax : SDNode<"ISD::ATOMIC_LOAD_FMAX", SDTFPAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_fmin : SDNode<"ISD::ATOMIC_LOAD_FMIN", SDTFPAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -656,7 +656,7 @@ KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); - KEYWORD(umin); + KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin); KEYWORD(vscale); KEYWORD(x); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7457,6 +7457,14 @@ Operation = AtomicRMWInst::FSub; IsFP = true; break; + case lltok::kw_fmax: + Operation = AtomicRMWInst::FMax; + IsFP = true; + break; + case lltok::kw_fmin: + Operation = AtomicRMWInst::FMin; + IsFP = true; + break; } Lex.Lex(); // Eat the operation. diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1137,6 +1137,8 @@ case bitc::RMW_UMIN: return AtomicRMWInst::UMin; case bitc::RMW_FADD: return AtomicRMWInst::FAdd; case bitc::RMW_FSUB: return AtomicRMWInst::FSub; + case bitc::RMW_FMAX: return AtomicRMWInst::FMax; + case bitc::RMW_FMIN: return AtomicRMWInst::FMin; } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -577,6 +577,8 @@ case AtomicRMWInst::UMin: return bitc::RMW_UMIN; case AtomicRMWInst::FAdd: return bitc::RMW_FADD; case AtomicRMWInst::FSub: return bitc::RMW_FSUB; + case AtomicRMWInst::FMax: return bitc::RMW_FMAX; + case AtomicRMWInst::FMin: return bitc::RMW_FMIN; } } diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1646,6 +1646,8 @@ case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: // No atomic libcalls are available for max/min/umax/umin. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2883,6 +2883,12 @@ case AtomicRMWInst::FSub: Opcode = TargetOpcode::G_ATOMICRMW_FSUB; break; + case AtomicRMWInst::FMax: + Opcode = TargetOpcode::G_ATOMICRMW_FMAX; + break; + case AtomicRMWInst::FMin: + Opcode = TargetOpcode::G_ATOMICRMW_FMIN; + break; } MIRBuilder.buildAtomicRMW( diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -978,6 +978,20 @@ MMO); } +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFMax(const DstOp &OldValRes, const SrcOp &Addr, + const SrcOp &Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMAX, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFMin(const DstOp &OldValRes, const SrcOp &Addr, + const SrcOp &Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMIN, OldValRes, Addr, Val, + MMO); +} + MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { return buildInstr(TargetOpcode::G_FENCE) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7456,6 +7456,8 @@ Opcode == ISD::ATOMIC_LOAD_UMAX || Opcode == ISD::ATOMIC_LOAD_FADD || Opcode == ISD::ATOMIC_LOAD_FSUB || + Opcode == ISD::ATOMIC_LOAD_FMAX || + Opcode == ISD::ATOMIC_LOAD_FMIN || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4621,6 +4621,8 @@ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break; case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; + case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; + case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; } AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3954,6 +3954,8 @@ case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: llvm_unreachable("Unsupported atomic update operation"); } llvm_unreachable("Unsupported atomic update operation"); diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -3666,6 +3666,8 @@ case LLVMAtomicRMWBinOpUMin: return AtomicRMWInst::UMin; case LLVMAtomicRMWBinOpFAdd: return AtomicRMWInst::FAdd; case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub; + case LLVMAtomicRMWBinOpFMax: return AtomicRMWInst::FMax; + case LLVMAtomicRMWBinOpFMin: return AtomicRMWInst::FMin; } llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!"); @@ -3686,6 +3688,8 @@ case AtomicRMWInst::UMin: return LLVMAtomicRMWBinOpUMin; case AtomicRMWInst::FAdd: return LLVMAtomicRMWBinOpFAdd; case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub; + case AtomicRMWInst::FMax: return LLVMAtomicRMWBinOpFMax; + case AtomicRMWInst::FMin: return LLVMAtomicRMWBinOpFMin; default: break; } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1696,6 +1696,10 @@ return "fadd"; case AtomicRMWInst::FSub: return "fsub"; + case AtomicRMWInst::FMax: + return "fmax"; + case AtomicRMWInst::FMin: + return "fmin"; case AtomicRMWInst::BAD_BINOP: return ""; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4799,6 +4799,8 @@ case AtomicRMWInst::Nand: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: return AtomicExpansionKind::CmpXChg; default: return AtomicExpansionKind::None; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -562,6 +562,10 @@ return bitc::RMW_FADD; case AtomicRMWInst::FSub: return bitc::RMW_FSUB; + case AtomicRMWInst::FMax: + return bitc::RMW_FMAX; + case AtomicRMWInst::FMin: + return bitc::RMW_FMIN; } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30596,6 +30596,8 @@ case AtomicRMWInst::UMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. return AtomicExpansionKind::CmpXChg; @@ -44385,7 +44387,7 @@ // Attempt to convert a (vXi1 bitcast(iX Cond)) selection mask before it might // get split by legalization. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST && - CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() && + CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() && TLI.isTypeLegal(VT.getScalarType())) { EVT ExtCondVT = VT.changeVectorElementTypeToInteger(); if (SDValue ExtCond = combineToExtendBoolVectorInReg( diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -64,6 +64,8 @@ switch(RMWI.getOperation()) { case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: return CF->isNaN(); default: return false; diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp --- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp +++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -74,6 +74,12 @@ return Builder.CreateFAdd(Loaded, Inc, "new"); case AtomicRMWInst::FSub: return Builder.CreateFSub(Loaded, Inc, "new"); + case AtomicRMWInst::FMax: + NewVal = Builder.CreateFCmpUGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::FMin: + NewVal = Builder.CreateFCmpULT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); default: llvm_unreachable("Unknown atomic op"); } diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll --- a/llvm/test/Assembler/atomic.ll +++ b/llvm/test/Assembler/atomic.ll @@ -47,5 +47,11 @@ ; CHECK: atomicrmw volatile fadd float* %x, float 1.000000e+00 seq_cst atomicrmw volatile fadd float* %x, float 1.0 seq_cst +; CHECK: atomicrmw fmax float* %x, float 1.000000e+00 seq_cst + atomicrmw fmax float* %x, float 1.0 seq_cst + + ; CHECK: atomicrmw volatile fmax float* %x, float 1.000000e+00 seq_cst + atomicrmw volatile fmax float* %x, float 1.0 seq_cst + ret void } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -851,6 +851,12 @@ ; CHECK: %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.000000e+00 monotonic %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.0 monotonic +; CHECK: %atomicrmw.fmax = atomicrmw fmax float* %word, float 1.000000e+00 monotonic + %atomicrmw.fmax = atomicrmw fmax float* %word, float 1.0 monotonic + +; CHECK: %atomicrmw.fmin = atomicrmw fmin float* %word, float 1.000000e+00 monotonic + %atomicrmw.fmin = atomicrmw fmin float* %word, float 1.0 monotonic + ret void } diff --git a/llvm/test/Transforms/LowerAtomic/atomic-load.ll b/llvm/test/Transforms/LowerAtomic/atomic-load.ll --- a/llvm/test/Transforms/LowerAtomic/atomic-load.ll +++ b/llvm/test/Transforms/LowerAtomic/atomic-load.ll @@ -57,3 +57,27 @@ ret float %j ; CHECK: ret float [[INST]] } + +define float @fmax() { +; CHECK-LABEL: @fmax( + %i = alloca float + %j = atomicrmw fmax float* %i, float 42.0 monotonic +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: fcmp ugt +; CHECK-NEXT: select +; CHECK-NEXT: store + ret float %j +; CHECK: ret float [[INST]] +} + +define float @fmin() { +; CHECK-LABEL: @fmin( + %i = alloca float + %j = atomicrmw fmin float* %i, float 42.0 monotonic +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: fcmp ult +; CHECK-NEXT: select +; CHECK-NEXT: store + ret float %j +; CHECK: ret float [[INST]] +}