Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -10534,6 +10534,8 @@ - fsub - fmax - fmin +- uinc_wrap +- udec_wrap For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight @@ -10578,6 +10580,9 @@ - fsub: ``*ptr = *ptr - val`` (using floating point arithmetic) - fmax: ``*ptr = maxnum(*ptr, val)`` (match the `llvm.maxnum.*`` intrinsic) - fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic) +- uinc_wrap: ``*ptr = (*ptr u>= val) ? 0 : (*ptr + 1)`` (increment value with wraparound to zero when incremented above input value) +- udec_wrap: ``*ptr = ((*ptr == 0) || (*ptr u> val)) ? val : (*ptr - 1)`` (decrement with wraparound to input value when decremented below zero). + Example: """""""" Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -108,6 +108,8 @@ types that need to be preserved through the optimizer, but otherwise are not introspectable by target-independent optimizations. +* Added ``uinc_wrap`` and ``udec_wrap`` operations to ``atomicrmw`` + Changes to building LLVM ------------------------ Index: llvm/include/llvm/AsmParser/LLToken.h =================================================================== --- llvm/include/llvm/AsmParser/LLToken.h +++ llvm/include/llvm/AsmParser/LLToken.h @@ -237,6 +237,8 @@ kw_umin, kw_fmax, kw_fmin, + kw_uinc_wrap, + kw_udec_wrap, // Instruction Opcodes (Opcode in UIntVal). kw_fneg, Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -479,7 +479,9 @@ RMW_FADD = 11, RMW_FSUB = 12, RMW_FMAX = 13, - RMW_FMIN = 14 + RMW_FMIN = 14, + RMW_UINC_WRAP = 15, + RMW_UDEC_WRAP = 16 }; /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1199,6 +1199,8 @@ ATOMIC_LOAD_FSUB, ATOMIC_LOAD_FMAX, ATOMIC_LOAD_FMIN, + ATOMIC_LOAD_UINC_WRAP, + ATOMIC_LOAD_UDEC_WRAP, // Masked load and store - consecutive vector load and store operations // with additional mask operand that prevents memory accesses to the Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1419,6 +1419,8 @@ case ISD::ATOMIC_LOAD_FSUB: case ISD::ATOMIC_LOAD_FMAX: case ISD::ATOMIC_LOAD_FMIN: + case ISD::ATOMIC_LOAD_UINC_WRAP: + case ISD::ATOMIC_LOAD_UDEC_WRAP: case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: case ISD::MLOAD: @@ -1486,6 +1488,8 @@ N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || N->getOpcode() == ISD::ATOMIC_LOAD_FMAX || N->getOpcode() == ISD::ATOMIC_LOAD_FMIN || + N->getOpcode() == ISD::ATOMIC_LOAD_UINC_WRAP || + N->getOpcode() == ISD::ATOMIC_LOAD_UDEC_WRAP || N->getOpcode() == ISD::ATOMIC_LOAD || N->getOpcode() == ISD::ATOMIC_STORE; } Index: llvm/include/llvm/IR/Instructions.h =================================================================== --- llvm/include/llvm/IR/Instructions.h +++ llvm/include/llvm/IR/Instructions.h @@ -765,8 +765,16 @@ /// \p minnum matches the behavior of \p llvm.minnum.*. FMin, + /// Increment one up to a maximum value. + /// *p = (old u>= v) ? 0 : (old + 1) + UIncWrap, + + /// Decrement one until a minimum value or zero. + /// *p = ((old == 0) || (old u> v)) ? v : (old - 1) + UDecWrap, + FIRST_BINOP = Xchg, - LAST_BINOP = FMin, + LAST_BINOP = UDecWrap, BAD_BINOP }; @@ -778,7 +786,7 @@ template using BinOpBitfieldElement = - typename Bitfield::Element; + typename Bitfield::Element; public: AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, Align Alignment, Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -392,12 +392,14 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_UINC_WRAP) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_UDEC_WRAP) // Marker for start of Generic AtomicRMW opcodes HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_START, G_ATOMICRMW_XCHG) // Marker for end of Generic AtomicRMW opcodes -HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_FMIN) +HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_UDEC_WRAP) // Generic atomic fence HANDLE_TARGET_OPCODE(G_FENCE) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -1128,6 +1128,8 @@ def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP; def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP; +def G_ATOMICRMW_UINC_WRAP : G_ATOMICRMW_OP; +def G_ATOMICRMW_UDEC_WRAP : G_ATOMICRMW_OP; def G_FENCE : GenericInstruction { let OutOperandList = (outs); Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -214,6 +214,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. Index: llvm/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/include/llvm/Target/TargetSelectionDAG.td +++ llvm/include/llvm/Target/TargetSelectionDAG.td @@ -667,6 +667,10 @@ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fmin : SDNode<"ISD::ATOMIC_LOAD_FMIN", SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_uinc_wrap : SDNode<"ISD::ATOMIC_LOAD_UINC_WRAP", SDTAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_udec_wrap : SDNode<"ISD::ATOMIC_LOAD_UDEC_WRAP", SDTAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; Index: llvm/include/llvm/Transforms/Utils/LowerAtomic.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LowerAtomic.h +++ llvm/include/llvm/Transforms/Utils/LowerAtomic.h @@ -31,7 +31,7 @@ /// Emit IR to implement the given atomicrmw operation on values in registers, /// returning the new value. Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, - Value *Loaded, Value *Inc); + Value *Loaded, Value *Val); } #endif // LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -672,6 +672,8 @@ KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin); + KEYWORD(uinc_wrap); + KEYWORD(udec_wrap); KEYWORD(vscale); KEYWORD(x); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -7758,6 +7758,12 @@ case lltok::kw_min: Operation = AtomicRMWInst::Min; break; case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break; case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break; + case lltok::kw_uinc_wrap: + Operation = AtomicRMWInst::UIncWrap; + break; + case lltok::kw_udec_wrap: + Operation = AtomicRMWInst::UDecWrap; + break; case lltok::kw_fadd: Operation = AtomicRMWInst::FAdd; IsFP = true; Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1257,6 +1257,10 @@ case bitc::RMW_FSUB: return AtomicRMWInst::FSub; case bitc::RMW_FMAX: return AtomicRMWInst::FMax; case bitc::RMW_FMIN: return AtomicRMWInst::FMin; + case bitc::RMW_UINC_WRAP: + return AtomicRMWInst::UIncWrap; + case bitc::RMW_UDEC_WRAP: + return AtomicRMWInst::UDecWrap; } } Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -604,6 +604,10 @@ case AtomicRMWInst::FSub: return bitc::RMW_FSUB; case AtomicRMWInst::FMax: return bitc::RMW_FMAX; case AtomicRMWInst::FMin: return bitc::RMW_FMIN; + case AtomicRMWInst::UIncWrap: + return bitc::RMW_UINC_WRAP; + case AtomicRMWInst::UDecWrap: + return bitc::RMW_UDEC_WRAP; } } Index: llvm/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -815,7 +815,9 @@ case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: case AtomicRMWInst::FMin: - case AtomicRMWInst::FMax: { + case AtomicRMWInst::FMax: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: { // Finally, other ops will operate on the full value, so truncate down to // the original size, and expand out again after doing the // operation. Bitcasts will be inserted for FP values. @@ -1685,6 +1687,8 @@ case AtomicRMWInst::FMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: // No atomic libcalls are available for max/min/umax/umin. return {}; } Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2957,6 +2957,12 @@ case AtomicRMWInst::FMin: Opcode = TargetOpcode::G_ATOMICRMW_FMIN; break; + case AtomicRMWInst::UIncWrap: + Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP; + break; + case AtomicRMWInst::UDecWrap: + Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP; + break; } MIRBuilder.buildAtomicRMW( Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7802,6 +7802,8 @@ Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX || Opcode == ISD::ATOMIC_LOAD_FMIN || + Opcode == ISD::ATOMIC_LOAD_UINC_WRAP || + Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4676,6 +4676,12 @@ case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; + case AtomicRMWInst::UIncWrap: + NT = ISD::ATOMIC_LOAD_UINC_WRAP; + break; + case AtomicRMWInst::UDecWrap: + NT = ISD::ATOMIC_LOAD_UDEC_WRAP; + break; } AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -96,6 +96,10 @@ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd"; + case ISD::ATOMIC_LOAD_UINC_WRAP: + return "AtomicLoadUIncWrap"; + case ISD::ATOMIC_LOAD_UDEC_WRAP: + return "AtomicLoadUDecWrap"; case ISD::ATOMIC_LOAD: return "AtomicLoad"; case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4334,6 +4334,7 @@ return Builder.saveIP(); } +// FIXME: Duplicating AtomicExpand Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, AtomicRMWInst::BinOp RMWOp) { switch (RMWOp) { @@ -4359,6 +4360,8 @@ case AtomicRMWInst::UMin: case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: llvm_unreachable("Unsupported atomic update operation"); } llvm_unreachable("Unsupported atomic update operation"); Index: llvm/lib/IR/Instructions.cpp =================================================================== --- llvm/lib/IR/Instructions.cpp +++ llvm/lib/IR/Instructions.cpp @@ -1801,6 +1801,10 @@ return "fmax"; case AtomicRMWInst::FMin: return "fmin"; + case AtomicRMWInst::UIncWrap: + return "uinc_wrap"; + case AtomicRMWInst::UDecWrap: + return "udec_wrap"; case AtomicRMWInst::BAD_BINOP: return ""; } Index: llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp =================================================================== --- llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2753,7 +2753,9 @@ // Since floating-point operation requires a non-trivial set of data // operations, use CmpXChg to expand. - if (AI->isFloatingPointOperation()) + if (AI->isFloatingPointOperation() || + AI->getOperation() == AtomicRMWInst::UIncWrap || + AI->getOperation() == AtomicRMWInst::UDecWrap) return AtomicExpansionKind::CmpXChg; unsigned Size = AI->getType()->getPrimitiveSizeInBits(); Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18343,7 +18343,16 @@ unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (shouldInlineQuadwordAtomics() && Size == 128) return AtomicExpansionKind::MaskedIntrinsic; - return TargetLowering::shouldExpandAtomicRMWInIR(AI); + + switch (AI->getOperation()) { + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: + return AtomicExpansionKind::CmpXChg; + default: + return TargetLowering::shouldExpandAtomicRMWInIR(AI); + } + + llvm_unreachable("unreachable atomicrmw operation"); } TargetLowering::AtomicExpansionKind Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13572,7 +13572,9 @@ // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating // point operations can't be used in an lr/sc sequence without breaking the // forward-progress guarantee. - if (AI->isFloatingPointOperation()) + if (AI->isFloatingPointOperation() || + AI->getOperation() == AtomicRMWInst::UIncWrap || + AI->getOperation() == AtomicRMWInst::UDecWrap) return AtomicExpansionKind::CmpXChg; // Don't expand forced atomics, we want to have __sync libcalls instead. Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31588,8 +31588,6 @@ AtomicRMWInst::BinOp Op = AI->getOperation(); switch (Op) { - default: - llvm_unreachable("Unknown atomic operation"); case AtomicRMWInst::Xchg: return AtomicExpansionKind::None; case AtomicRMWInst::Add: @@ -31613,6 +31611,9 @@ case AtomicRMWInst::FSub: case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: + default: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. return AtomicExpansionKind::CmpXChg; Index: llvm/lib/Transforms/Utils/LowerAtomic.cpp =================================================================== --- llvm/lib/Transforms/Utils/LowerAtomic.cpp +++ llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -41,43 +41,60 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, - Value *Inc) { + Value *Val) { Value *NewVal; switch (Op) { case AtomicRMWInst::Xchg: - return Inc; + return Val; case AtomicRMWInst::Add: - return Builder.CreateAdd(Loaded, Inc, "new"); + return Builder.CreateAdd(Loaded, Val, "new"); case AtomicRMWInst::Sub: - return Builder.CreateSub(Loaded, Inc, "new"); + return Builder.CreateSub(Loaded, Val, "new"); case AtomicRMWInst::And: - return Builder.CreateAnd(Loaded, Inc, "new"); + return Builder.CreateAnd(Loaded, Val, "new"); case AtomicRMWInst::Nand: - return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + return Builder.CreateNot(Builder.CreateAnd(Loaded, Val), "new"); case AtomicRMWInst::Or: - return Builder.CreateOr(Loaded, Inc, "new"); + return Builder.CreateOr(Loaded, Val, "new"); case AtomicRMWInst::Xor: - return Builder.CreateXor(Loaded, Inc, "new"); + return Builder.CreateXor(Loaded, Val, "new"); case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + NewVal = Builder.CreateICmpSGT(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + NewVal = Builder.CreateICmpSLE(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + NewVal = Builder.CreateICmpUGT(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + NewVal = Builder.CreateICmpULE(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); case AtomicRMWInst::FAdd: - return Builder.CreateFAdd(Loaded, Inc, "new"); + return Builder.CreateFAdd(Loaded, Val, "new"); case AtomicRMWInst::FSub: - return Builder.CreateFSub(Loaded, Inc, "new"); + return Builder.CreateFSub(Loaded, Val, "new"); case AtomicRMWInst::FMax: - return Builder.CreateMaxNum(Loaded, Inc); + return Builder.CreateMaxNum(Loaded, Val); case AtomicRMWInst::FMin: - return Builder.CreateMinNum(Loaded, Inc); + return Builder.CreateMinNum(Loaded, Val); + case AtomicRMWInst::UIncWrap: { + Constant *One = ConstantInt::get(Loaded->getType(), 1); + Value *Inc = Builder.CreateAdd(Loaded, One); + Value *Cmp = Builder.CreateICmpUGE(Loaded, Val); + Constant *Zero = ConstantInt::get(Loaded->getType(), 0); + return Builder.CreateSelect(Cmp, Zero, Inc, "new"); + } + case AtomicRMWInst::UDecWrap: { + Constant *Zero = ConstantInt::get(Loaded->getType(), 0); + Constant *One = ConstantInt::get(Loaded->getType(), 1); + + Value *Dec = Builder.CreateSub(Loaded, One); + Value *CmpEq0 = Builder.CreateICmpEQ(Loaded, Zero); + Value *CmpOldGtVal = Builder.CreateICmpUGT(Loaded, Val); + Value *Or = Builder.CreateOr(CmpEq0, CmpOldGtVal); + return Builder.CreateSelect(Or, Val, Dec, "new"); + } default: llvm_unreachable("Unknown atomic op"); } Index: llvm/test/Assembler/atomic.ll =================================================================== --- llvm/test/Assembler/atomic.ll +++ llvm/test/Assembler/atomic.ll @@ -31,6 +31,17 @@ atomicrmw volatile xchg ptr %x, i32 10 monotonic ; CHECK: atomicrmw volatile xchg ptr %x, i32 10 syncscope("agent") monotonic atomicrmw volatile xchg ptr %x, i32 10 syncscope("agent") monotonic + + ; CHECK: atomicrmw volatile uinc_wrap ptr %x, i32 10 monotonic + atomicrmw volatile uinc_wrap ptr %x, i32 10 monotonic + ; CHECK: atomicrmw volatile uinc_wrap ptr %x, i32 10 syncscope("agent") monotonic + atomicrmw volatile uinc_wrap ptr %x, i32 10 syncscope("agent") monotonic + + ; CHECK: atomicrmw volatile udec_wrap ptr %x, i32 10 monotonic + atomicrmw volatile udec_wrap ptr %x, i32 10 monotonic + ; CHECK: atomicrmw volatile udec_wrap ptr %x, i32 10 syncscope("agent") monotonic + atomicrmw volatile udec_wrap ptr %x, i32 10 syncscope("agent") monotonic + ; CHECK: fence syncscope("singlethread") release fence syncscope("singlethread") release ; CHECK: fence seq_cst Index: llvm/test/Bitcode/compatibility.ll =================================================================== --- llvm/test/Bitcode/compatibility.ll +++ llvm/test/Bitcode/compatibility.ll @@ -874,6 +874,34 @@ ret void } +define void @uinc_udec_wrap_atomics(ptr %word) { +; CHECK: %atomicrmw.inc0 = atomicrmw uinc_wrap ptr %word, i32 64 monotonic + %atomicrmw.inc0 = atomicrmw uinc_wrap ptr %word, i32 64 monotonic + +; CHECK: %atomicrmw.inc1 = atomicrmw uinc_wrap ptr %word, i32 128 seq_cst + %atomicrmw.inc1 = atomicrmw uinc_wrap ptr %word, i32 128 seq_cst + +; CHECK: %atomicrmw.inc2 = atomicrmw volatile uinc_wrap ptr %word, i32 128 seq_cst + %atomicrmw.inc2 = atomicrmw volatile uinc_wrap ptr %word, i32 128 seq_cst + +; CHECK: %atomicrmw.inc0.syncscope = atomicrmw uinc_wrap ptr %word, i32 27 syncscope("agent") monotonic + %atomicrmw.inc0.syncscope = atomicrmw uinc_wrap ptr %word, i32 27 syncscope("agent") monotonic + +; CHECK: %atomicrmw.dec0 = atomicrmw udec_wrap ptr %word, i32 99 monotonic + %atomicrmw.dec0 = atomicrmw udec_wrap ptr %word, i32 99 monotonic + +; CHECK: %atomicrmw.dec1 = atomicrmw udec_wrap ptr %word, i32 12 seq_cst + %atomicrmw.dec1 = atomicrmw udec_wrap ptr %word, i32 12 seq_cst + +; CHECK: %atomicrmw.dec2 = atomicrmw volatile udec_wrap ptr %word, i32 12 seq_cst + %atomicrmw.dec2 = atomicrmw volatile udec_wrap ptr %word, i32 12 seq_cst + +; CHECK: %atomicrmw.dec0.syncscope = atomicrmw udec_wrap ptr %word, i32 5 syncscope("system") monotonic + %atomicrmw.dec0.syncscope = atomicrmw udec_wrap ptr %word, i32 5 syncscope("system") monotonic + + ret void +} + define void @pointer_atomics(ptr %word) { ; CHECK: %atomicrmw.xchg = atomicrmw xchg ptr %word, ptr null monotonic %atomicrmw.xchg = atomicrmw xchg ptr %word, ptr null monotonic Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -239,6 +239,12 @@ # DEBUG-NEXT: G_ATOMICRMW_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ATOMICRMW_UINC_WRAP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ATOMICRMW_UDEC_WRAP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FENCE (opcode {{[0-9]+}}): 0 type indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined Index: llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: .LBB0_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w8, [x0] +; CHECK-NEXT: cmp w8, w1, uxtb +; CHECK-NEXT: csinc w9, wzr, w8, hs +; CHECK-NEXT: stlxrb w10, w9, [x0] +; CHECK-NEXT: cbnz w10, .LBB0_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: .LBB1_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrh w8, [x0] +; CHECK-NEXT: cmp w8, w1, uxth +; CHECK-NEXT: csinc w9, wzr, w8, hs +; CHECK-NEXT: stlxrh w10, w9, [x0] +; CHECK-NEXT: cbnz w10, .LBB1_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: .LBB2_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: csinc w9, wzr, w8, hs +; CHECK-NEXT: stlxr w10, w9, [x0] +; CHECK-NEXT: cbnz w10, .LBB2_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: .LBB3_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: cmp x0, x1 +; CHECK-NEXT: csinc x9, xzr, x0, hs +; CHECK-NEXT: stlxr w10, x9, [x8] +; CHECK-NEXT: cbnz w10, .LBB3_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: .LBB4_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrb w8, [x0] +; CHECK-NEXT: cmp w8, w1, uxtb +; CHECK-NEXT: sub w9, w8, #1 +; CHECK-NEXT: ccmp w8, #0, #4, ls +; CHECK-NEXT: csel w9, w1, w9, eq +; CHECK-NEXT: stlxrb w10, w9, [x0] +; CHECK-NEXT: cbnz w10, .LBB4_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: .LBB5_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxrh w8, [x0] +; CHECK-NEXT: cmp w8, w1, uxth +; CHECK-NEXT: sub w9, w8, #1 +; CHECK-NEXT: ccmp w8, #0, #4, ls +; CHECK-NEXT: csel w9, w1, w9, eq +; CHECK-NEXT: stlxrh w10, w9, [x0] +; CHECK-NEXT: cbnz w10, .LBB5_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: .LBB6_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: sub w9, w8, #1 +; CHECK-NEXT: ccmp w8, #0, #4, ls +; CHECK-NEXT: csel w9, w1, w9, eq +; CHECK-NEXT: stlxr w10, w9, [x0] +; CHECK-NEXT: cbnz w10, .LBB6_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: .LBB7_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: cmp x0, x1 +; CHECK-NEXT: sub x9, x0, #1 +; CHECK-NEXT: ccmp x0, #0, #4, ls +; CHECK-NEXT: csel x9, x1, x9, eq +; CHECK-NEXT: stlxr w10, x9, [x8] +; CHECK-NEXT: cbnz w10, .LBB7_1 +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll +++ llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll @@ -529,3 +529,57 @@ store i32 %val, ptr addrspace(1) %out ret void } + +define i32 @atomicrmw_inc_private_i32(ptr addrspace(5) %ptr) { +; IR-LABEL: @atomicrmw_inc_private_i32( +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; IR-NEXT: [[TMP3:%.*]] = icmp uge i32 [[TMP1]], 4 +; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]] +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 +; IR-NEXT: ret i32 [[TMP1]] +; +; GCN-LABEL: atomicrmw_inc_private_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_i32_e32 v2, vcc, 1, v1 +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 4, v1 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw uinc_wrap ptr addrspace(5) %ptr, i32 4 seq_cst + ret i32 %result +} + +define i32 @atomicrmw_dec_private_i32(ptr addrspace(5) %ptr) { +; IR-LABEL: @atomicrmw_dec_private_i32( +; IR-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1 +; IR-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0 +; IR-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP1]], 4 +; IR-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 4, i32 [[TMP2]] +; IR-NEXT: store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4 +; IR-NEXT: ret i32 [[TMP1]] +; +; GCN-LABEL: atomicrmw_dec_private_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_i32_e32 v2, vcc, -1, v1 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GCN-NEXT: v_cmp_lt_u32_e64 s[4:5], 4, v1 +; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 4, s[4:5] +; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw udec_wrap ptr addrspace(5) %ptr, i32 4 seq_cst + ret i32 %result +} Index: llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,202 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 < %s | FileCheck %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r12, r1 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB0_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r0] +; CHECK-NEXT: cmp r1, r12 +; CHECK-NEXT: add r3, r1, #1 +; CHECK-NEXT: movwhs r3, #0 +; CHECK-NEXT: strexb r2, r3, [r0] +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: bx lr + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxth r12, r1 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB1_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r0] +; CHECK-NEXT: cmp r1, r12 +; CHECK-NEXT: add r3, r1, #1 +; CHECK-NEXT: movwhs r3, #0 +; CHECK-NEXT: strexh r2, r3, [r0] +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: bne .LBB1_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: bx lr + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB2_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r2, [r0] +; CHECK-NEXT: cmp r2, r1 +; CHECK-NEXT: add r12, r2, #1 +; CHECK-NEXT: movwhs r12, #0 +; CHECK-NEXT: strex r3, r12, [r0] +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: bne .LBB2_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: bx lr + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexd r4, r5, [r0] +; CHECK-NEXT: adds r6, r4, #1 +; CHECK-NEXT: adc r7, r5, #0 +; CHECK-NEXT: subs r1, r4, r2 +; CHECK-NEXT: sbcs r1, r5, r3 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwhs r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movwne r7, #0 +; CHECK-NEXT: movwne r6, #0 +; CHECK-NEXT: strexd r1, r6, r7, [r0] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB3_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB4_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: uxtb r3, r1 +; CHECK-NEXT: ldrexb r12, [r0] +; CHECK-NEXT: cmp r12, r3 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: subls r3, r12, #1 +; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: strexb r2, r3, [r0] +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: bne .LBB4_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: bx lr + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB5_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: uxth r3, r1 +; CHECK-NEXT: ldrexh r12, [r0] +; CHECK-NEXT: cmp r12, r3 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: subls r3, r12, #1 +; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: strexh r2, r3, [r0] +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: bne .LBB5_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: bx lr + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB6_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r12, [r0] +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: cmp r12, r1 +; CHECK-NEXT: subls r3, r12, #1 +; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: strex r2, r3, [r0] +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: bne .LBB6_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: bx lr + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: dmb ish +; CHECK-NEXT: .LBB7_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexd r4, r5, [r0] +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: subs r1, r2, r4 +; CHECK-NEXT: sbcs r1, r3, r5 +; CHECK-NEXT: orr r1, r4, r5 +; CHECK-NEXT: clz r1, r1 +; CHECK-NEXT: movwlo r12, #1 +; CHECK-NEXT: lsr r1, r1, #5 +; CHECK-NEXT: subs r6, r4, #1 +; CHECK-NEXT: sbc r7, r5, #0 +; CHECK-NEXT: orr r1, r1, r12 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movne r7, r3 +; CHECK-NEXT: movne r6, r2 +; CHECK-NEXT: strexd r1, r6, r7, [r0] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB7_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/Hexagon/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Hexagon/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,377 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i8: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = and(#24,asl(r0,#3)) +; CHECK-NEXT: r2 = and(r0,#-4) +; CHECK-NEXT: r3 = #255 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = asl(r3,r0) +; CHECK-NEXT: r3 = and(r1,#255) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = sub(#-1,r4) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r5 = memw_locked(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = lsr(r5,r0) +; CHECK-NEXT: r5 = and(r5,r4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = and(r1,#255) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gtu(r3,r6) +; CHECK-NEXT: if (p0.new) r6 = add(r1,#1) +; CHECK-NEXT: if (!p0.new) r6 = #0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = and(r6,#255) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 |= asl(r6,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memw_locked(r2,p0) = r5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB0_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i16: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = and(#24,asl(r0,#3)) +; CHECK-NEXT: r2 = and(r0,#-4) +; CHECK-NEXT: r3 = ##65535 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = asl(r3,r0) +; CHECK-NEXT: r3 = zxth(r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = sub(#-1,r4) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r5 = memw_locked(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = lsr(r5,r0) +; CHECK-NEXT: r5 = and(r5,r4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = zxth(r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gtu(r3,r6) +; CHECK-NEXT: if (p0.new) r6 = add(r1,#1) +; CHECK-NEXT: if (!p0.new) r6 = #0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = zxth(r6) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 |= asl(r6,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memw_locked(r2,p0) = r5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB1_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i32: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB2_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = memw_locked(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gtu(r1,r2) +; CHECK-NEXT: if (p0.new) r3 = add(r2,#1) +; CHECK-NEXT: if (!p0.new) r3 = #0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memw_locked(r0,p0) = r3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB2_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i64: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = #0 +; CHECK-NEXT: r7:6 = combine(#0,#1) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB3_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r5:4 = memd_locked(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r9:8 = add(r5:4,r7:6) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gtu(r3:2,r5:4) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r8 = mux(p0,r8,r1) +; CHECK-NEXT: r9 = mux(p0,r9,r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memd_locked(r0,p0) = r9:8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB3_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = combine(r5,r4) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i8: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = and(#24,asl(r0,#3)) +; CHECK-NEXT: r3 = and(r0,#-4) +; CHECK-NEXT: r4 = #255 +; CHECK-NEXT: r5 = and(r1,#255) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = asl(r4,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = sub(#-1,r2) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB4_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r7 = memw_locked(r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = lsr(r7,r0) +; CHECK-NEXT: r7 = and(r7,r6) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = bitsclr(r2,r4) +; CHECK-NEXT: r8 = and(r2,#255) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p1 = cmp.gtu(r8,r5) +; CHECK-NEXT: if (p1.new) r8 = add(r1,#0) +; CHECK-NEXT: if (!p1.new) r8 = add(r2,#-1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) r8 = add(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r8 = and(r8,#255) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r7 |= asl(r8,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memw_locked(r3,p0) = r7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB4_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i16: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = and(#24,asl(r0,#3)) +; CHECK-NEXT: r3 = and(r0,#-4) +; CHECK-NEXT: r4 = ##65535 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = asl(r4,r0) +; CHECK-NEXT: r5 = zxth(r1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r6 = sub(#-1,r2) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB5_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r7 = memw_locked(r3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = lsr(r7,r0) +; CHECK-NEXT: r7 = and(r7,r6) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = bitsclr(r2,r4) +; CHECK-NEXT: r8 = zxth(r2) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p1 = cmp.gtu(r8,r5) +; CHECK-NEXT: if (p1.new) r8 = add(r1,#0) +; CHECK-NEXT: if (!p1.new) r8 = add(r2,#-1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) r8 = add(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r8 = zxth(r8) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r7 |= asl(r8,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memw_locked(r3,p0) = r7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB5_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i32: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB6_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = memw_locked(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gtu(r2,r1) +; CHECK-NEXT: p1 = cmp.eq(r2,#0) +; CHECK-NEXT: if (p0.new) r3 = add(r1,#0) +; CHECK-NEXT: if (!p0.new) r3 = add(r2,#-1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p1) r3 = add(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memw_locked(r0,p0) = r3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB6_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i64: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r7:6 = combine(#-1,#-1) +; CHECK-NEXT: r9:8 = combine(#0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB7_1: // %atomicrmw.start +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: r5:4 = memd_locked(r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r13:12 = add(r5:4,r7:6) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.gtu(r5:4,r3:2) +; CHECK-NEXT: p1 = cmp.eq(r5:4,r9:8) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = mux(p0,r2,r12) +; CHECK-NEXT: r14 = mux(p0,r3,r13) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r10 = mux(p1,r2,r1) +; CHECK-NEXT: r11 = mux(p1,r3,r14) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: memd_locked(r0,p0) = r11:10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) jump:nt .LBB7_1 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %atomicrmw.end +; CHECK-NEXT: { +; CHECK-NEXT: r1:0 = combine(r5,r4) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,394 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA64 %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; LA64-LABEL: atomicrmw_uinc_wrap_i8: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: ld.w $a3, $a2, 0 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: andi $a0, $a0, 24 +; LA64-NEXT: nor $a4, $a4, $zero +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: .LBB0_1: # %atomicrmw.start +; LA64-NEXT: # =>This Loop Header: Depth=1 +; LA64-NEXT: # Child Loop BB0_3 Depth 2 +; LA64-NEXT: srl.w $a5, $a3, $a0 +; LA64-NEXT: andi $a6, $a5, 255 +; LA64-NEXT: sltu $a6, $a6, $a1 +; LA64-NEXT: addi.d $a5, $a5, 1 +; LA64-NEXT: xori $a6, $a6, 1 +; LA64-NEXT: masknez $a5, $a5, $a6 +; LA64-NEXT: maskeqz $a6, $zero, $a6 +; LA64-NEXT: or $a5, $a6, $a5 +; LA64-NEXT: andi $a5, $a5, 255 +; LA64-NEXT: sll.w $a5, $a5, $a0 +; LA64-NEXT: and $a6, $a3, $a4 +; LA64-NEXT: or $a6, $a6, $a5 +; LA64-NEXT: .LBB0_3: # %atomicrmw.start +; LA64-NEXT: # Parent Loop BB0_1 Depth=1 +; LA64-NEXT: # => This Inner Loop Header: Depth=2 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: bne $a5, $a3, .LBB0_5 +; LA64-NEXT: # %bb.4: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a7, $a6 +; LA64-NEXT: sc.w $a7, $a2, 0 +; LA64-NEXT: beqz $a7, .LBB0_3 +; LA64-NEXT: b .LBB0_6 +; LA64-NEXT: .LBB0_5: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB0_6: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: addi.w $a6, $a3, 0 +; LA64-NEXT: move $a3, $a5 +; LA64-NEXT: bne $a5, $a6, .LBB0_1 +; LA64-NEXT: # %bb.2: # %atomicrmw.end +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; LA64-LABEL: atomicrmw_uinc_wrap_i16: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: ld.w $a3, $a2, 0 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: andi $a0, $a0, 24 +; LA64-NEXT: nor $a4, $a4, $zero +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: .LBB1_1: # %atomicrmw.start +; LA64-NEXT: # =>This Loop Header: Depth=1 +; LA64-NEXT: # Child Loop BB1_3 Depth 2 +; LA64-NEXT: srl.w $a5, $a3, $a0 +; LA64-NEXT: bstrpick.d $a6, $a5, 15, 0 +; LA64-NEXT: sltu $a6, $a6, $a1 +; LA64-NEXT: addi.d $a5, $a5, 1 +; LA64-NEXT: xori $a6, $a6, 1 +; LA64-NEXT: masknez $a5, $a5, $a6 +; LA64-NEXT: maskeqz $a6, $zero, $a6 +; LA64-NEXT: or $a5, $a6, $a5 +; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 +; LA64-NEXT: sll.w $a5, $a5, $a0 +; LA64-NEXT: and $a6, $a3, $a4 +; LA64-NEXT: or $a6, $a6, $a5 +; LA64-NEXT: .LBB1_3: # %atomicrmw.start +; LA64-NEXT: # Parent Loop BB1_1 Depth=1 +; LA64-NEXT: # => This Inner Loop Header: Depth=2 +; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: bne $a5, $a3, .LBB1_5 +; LA64-NEXT: # %bb.4: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a7, $a6 +; LA64-NEXT: sc.w $a7, $a2, 0 +; LA64-NEXT: beqz $a7, .LBB1_3 +; LA64-NEXT: b .LBB1_6 +; LA64-NEXT: .LBB1_5: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB1_6: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: addi.w $a6, $a3, 0 +; LA64-NEXT: move $a3, $a5 +; LA64-NEXT: bne $a5, $a6, .LBB1_1 +; LA64-NEXT: # %bb.2: # %atomicrmw.end +; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; LA64-LABEL: atomicrmw_uinc_wrap_i32: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a3, $a0, 0 +; LA64-NEXT: bstrpick.d $a2, $a1, 31, 0 +; LA64-NEXT: .LBB2_1: # %atomicrmw.start +; LA64-NEXT: # =>This Loop Header: Depth=1 +; LA64-NEXT: # Child Loop BB2_3 Depth 2 +; LA64-NEXT: bstrpick.d $a1, $a3, 31, 0 +; LA64-NEXT: sltu $a1, $a1, $a2 +; LA64-NEXT: xori $a1, $a1, 1 +; LA64-NEXT: addi.d $a4, $a3, 1 +; LA64-NEXT: masknez $a4, $a4, $a1 +; LA64-NEXT: maskeqz $a1, $zero, $a1 +; LA64-NEXT: or $a4, $a1, $a4 +; LA64-NEXT: .LBB2_3: # %atomicrmw.start +; LA64-NEXT: # Parent Loop BB2_1 Depth=1 +; LA64-NEXT: # => This Inner Loop Header: Depth=2 +; LA64-NEXT: ll.w $a1, $a0, 0 +; LA64-NEXT: bne $a1, $a3, .LBB2_5 +; LA64-NEXT: # %bb.4: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB2_3 +; LA64-NEXT: b .LBB2_6 +; LA64-NEXT: .LBB2_5: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB2_6: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 +; LA64-NEXT: addi.w $a4, $a3, 0 +; LA64-NEXT: move $a3, $a1 +; LA64-NEXT: bne $a1, $a4, .LBB2_1 +; LA64-NEXT: # %bb.2: # %atomicrmw.end +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; LA64-LABEL: atomicrmw_uinc_wrap_i64: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a2, $a0, 0 +; LA64-NEXT: .LBB3_1: # %atomicrmw.start +; LA64-NEXT: # =>This Loop Header: Depth=1 +; LA64-NEXT: # Child Loop BB3_3 Depth 2 +; LA64-NEXT: move $a3, $a2 +; LA64-NEXT: sltu $a2, $a2, $a1 +; LA64-NEXT: xori $a2, $a2, 1 +; LA64-NEXT: addi.d $a4, $a3, 1 +; LA64-NEXT: masknez $a4, $a4, $a2 +; LA64-NEXT: maskeqz $a2, $zero, $a2 +; LA64-NEXT: or $a4, $a2, $a4 +; LA64-NEXT: .LBB3_3: # %atomicrmw.start +; LA64-NEXT: # Parent Loop BB3_1 Depth=1 +; LA64-NEXT: # => This Inner Loop Header: Depth=2 +; LA64-NEXT: ll.d $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a3, .LBB3_5 +; LA64-NEXT: # %bb.4: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: sc.d $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB3_3 +; LA64-NEXT: b .LBB3_6 +; LA64-NEXT: .LBB3_5: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB3_6: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; LA64-NEXT: bne $a2, $a3, .LBB3_1 +; LA64-NEXT: # %bb.2: # %atomicrmw.end +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; LA64-LABEL: atomicrmw_udec_wrap_i8: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: ld.w $a3, $a2, 0 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: andi $a0, $a0, 24 +; LA64-NEXT: nor $a4, $a4, $zero +; LA64-NEXT: andi $a5, $a1, 255 +; LA64-NEXT: .LBB4_1: # %atomicrmw.start +; LA64-NEXT: # =>This Loop Header: Depth=1 +; LA64-NEXT: # Child Loop BB4_3 Depth 2 +; LA64-NEXT: srl.w $a6, $a3, $a0 +; LA64-NEXT: andi $a7, $a6, 255 +; LA64-NEXT: sltu $t0, $a5, $a7 +; LA64-NEXT: addi.d $a6, $a6, -1 +; LA64-NEXT: masknez $a6, $a6, $t0 +; LA64-NEXT: maskeqz $t0, $a1, $t0 +; LA64-NEXT: or $a6, $t0, $a6 +; LA64-NEXT: sltui $a7, $a7, 1 +; LA64-NEXT: masknez $a6, $a6, $a7 +; LA64-NEXT: maskeqz $a7, $a1, $a7 +; LA64-NEXT: or $a6, $a7, $a6 +; LA64-NEXT: andi $a6, $a6, 255 +; LA64-NEXT: sll.w $a6, $a6, $a0 +; LA64-NEXT: and $a7, $a3, $a4 +; LA64-NEXT: or $a7, $a7, $a6 +; LA64-NEXT: .LBB4_3: # %atomicrmw.start +; LA64-NEXT: # Parent Loop BB4_1 Depth=1 +; LA64-NEXT: # => This Inner Loop Header: Depth=2 +; LA64-NEXT: ll.w $a6, $a2, 0 +; LA64-NEXT: bne $a6, $a3, .LBB4_5 +; LA64-NEXT: # %bb.4: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $t0, $a7 +; LA64-NEXT: sc.w $t0, $a2, 0 +; LA64-NEXT: beqz $t0, .LBB4_3 +; LA64-NEXT: b .LBB4_6 +; LA64-NEXT: .LBB4_5: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB4_6: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: addi.w $a7, $a3, 0 +; LA64-NEXT: move $a3, $a6 +; LA64-NEXT: bne $a6, $a7, .LBB4_1 +; LA64-NEXT: # %bb.2: # %atomicrmw.end +; LA64-NEXT: srl.w $a0, $a6, $a0 +; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; LA64-LABEL: atomicrmw_udec_wrap_i16: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -4 +; LA64-NEXT: and $a2, $a0, $a2 +; LA64-NEXT: ld.w $a3, $a2, 0 +; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: andi $a0, $a0, 24 +; LA64-NEXT: nor $a4, $a4, $zero +; LA64-NEXT: bstrpick.d $a5, $a1, 15, 0 +; LA64-NEXT: .LBB5_1: # %atomicrmw.start +; LA64-NEXT: # =>This Loop Header: Depth=1 +; LA64-NEXT: # Child Loop BB5_3 Depth 2 +; LA64-NEXT: srl.w $a6, $a3, $a0 +; LA64-NEXT: bstrpick.d $a7, $a6, 15, 0 +; LA64-NEXT: sltu $t0, $a5, $a7 +; LA64-NEXT: addi.d $a6, $a6, -1 +; LA64-NEXT: masknez $a6, $a6, $t0 +; LA64-NEXT: maskeqz $t0, $a1, $t0 +; LA64-NEXT: or $a6, $t0, $a6 +; LA64-NEXT: sltui $a7, $a7, 1 +; LA64-NEXT: masknez $a6, $a6, $a7 +; LA64-NEXT: maskeqz $a7, $a1, $a7 +; LA64-NEXT: or $a6, $a7, $a6 +; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 +; LA64-NEXT: sll.w $a6, $a6, $a0 +; LA64-NEXT: and $a7, $a3, $a4 +; LA64-NEXT: or $a7, $a7, $a6 +; LA64-NEXT: .LBB5_3: # %atomicrmw.start +; LA64-NEXT: # Parent Loop BB5_1 Depth=1 +; LA64-NEXT: # => This Inner Loop Header: Depth=2 +; LA64-NEXT: ll.w $a6, $a2, 0 +; LA64-NEXT: bne $a6, $a3, .LBB5_5 +; LA64-NEXT: # %bb.4: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $t0, $a7 +; LA64-NEXT: sc.w $t0, $a2, 0 +; LA64-NEXT: beqz $t0, .LBB5_3 +; LA64-NEXT: b .LBB5_6 +; LA64-NEXT: .LBB5_5: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB5_6: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: addi.w $a7, $a3, 0 +; LA64-NEXT: move $a3, $a6 +; LA64-NEXT: bne $a6, $a7, .LBB5_1 +; LA64-NEXT: # %bb.2: # %atomicrmw.end +; LA64-NEXT: srl.w $a0, $a6, $a0 +; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; LA64-LABEL: atomicrmw_udec_wrap_i32: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a4, $a0, 0 +; LA64-NEXT: bstrpick.d $a3, $a1, 31, 0 +; LA64-NEXT: .LBB6_1: # %atomicrmw.start +; LA64-NEXT: # =>This Loop Header: Depth=1 +; LA64-NEXT: # Child Loop BB6_3 Depth 2 +; LA64-NEXT: bstrpick.d $a2, $a4, 31, 0 +; LA64-NEXT: sltu $a5, $a3, $a2 +; LA64-NEXT: addi.d $a6, $a4, -1 +; LA64-NEXT: masknez $a6, $a6, $a5 +; LA64-NEXT: maskeqz $a5, $a1, $a5 +; LA64-NEXT: or $a5, $a5, $a6 +; LA64-NEXT: sltui $a2, $a2, 1 +; LA64-NEXT: masknez $a5, $a5, $a2 +; LA64-NEXT: maskeqz $a2, $a1, $a2 +; LA64-NEXT: or $a5, $a2, $a5 +; LA64-NEXT: .LBB6_3: # %atomicrmw.start +; LA64-NEXT: # Parent Loop BB6_1 Depth=1 +; LA64-NEXT: # => This Inner Loop Header: Depth=2 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a4, .LBB6_5 +; LA64-NEXT: # %bb.4: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sc.w $a6, $a0, 0 +; LA64-NEXT: beqz $a6, .LBB6_3 +; LA64-NEXT: b .LBB6_6 +; LA64-NEXT: .LBB6_5: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB6_6: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 +; LA64-NEXT: addi.w $a5, $a4, 0 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: bne $a2, $a5, .LBB6_1 +; LA64-NEXT: # %bb.2: # %atomicrmw.end +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; LA64-LABEL: atomicrmw_udec_wrap_i64: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a2, $a0, 0 +; LA64-NEXT: .LBB7_1: # %atomicrmw.start +; LA64-NEXT: # =>This Loop Header: Depth=1 +; LA64-NEXT: # Child Loop BB7_3 Depth 2 +; LA64-NEXT: move $a3, $a2 +; LA64-NEXT: sltu $a2, $a1, $a2 +; LA64-NEXT: addi.d $a4, $a3, -1 +; LA64-NEXT: masknez $a4, $a4, $a2 +; LA64-NEXT: maskeqz $a2, $a1, $a2 +; LA64-NEXT: or $a2, $a2, $a4 +; LA64-NEXT: sltui $a4, $a3, 1 +; LA64-NEXT: masknez $a2, $a2, $a4 +; LA64-NEXT: maskeqz $a4, $a1, $a4 +; LA64-NEXT: or $a4, $a4, $a2 +; LA64-NEXT: .LBB7_3: # %atomicrmw.start +; LA64-NEXT: # Parent Loop BB7_1 Depth=1 +; LA64-NEXT: # => This Inner Loop Header: Depth=2 +; LA64-NEXT: ll.d $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a3, .LBB7_5 +; LA64-NEXT: # %bb.4: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2 +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: sc.d $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB7_3 +; LA64-NEXT: b .LBB7_6 +; LA64-NEXT: .LBB7_5: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB7_6: # %atomicrmw.start +; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 +; LA64-NEXT: bne $a2, $a3, .LBB7_1 +; LA64-NEXT: # %bb.2: # %atomicrmw.end +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,421 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: rlwinm 7, 5, 3, 27, 28 +; CHECK-NEXT: lbz 3, 0(3) +; CHECK-NEXT: xori 7, 7, 24 +; CHECK-NEXT: li 8, 255 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: clrlwi 4, 4, 24 +; CHECK-NEXT: rldicr 5, 5, 0, 61 +; CHECK-NEXT: slw 8, 8, 7 +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: srw 3, 11, 7 +; CHECK-NEXT: cmplw 3, 9 +; CHECK-NEXT: beq 0, .LBB0_8 +; CHECK-NEXT: .LBB0_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_6 Depth 2 +; CHECK-NEXT: clrlwi 9, 3, 24 +; CHECK-NEXT: addi 10, 3, 1 +; CHECK-NEXT: cmplw 9, 4 +; CHECK-NEXT: bc 12, 0, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: ori 3, 6, 0 +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: addi 3, 10, 0 +; CHECK-NEXT: .LBB0_5: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: slw 11, 9, 7 +; CHECK-NEXT: slw 3, 3, 7 +; CHECK-NEXT: and 3, 3, 8 +; CHECK-NEXT: and 10, 11, 8 +; CHECK-NEXT: .LBB0_6: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: lwarx 12, 0, 5 +; CHECK-NEXT: and 11, 12, 8 +; CHECK-NEXT: cmpw 11, 10 +; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: # %bb.7: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: andc 12, 12, 8 +; CHECK-NEXT: or 12, 12, 3 +; CHECK-NEXT: stwcx. 12, 0, 5 +; CHECK-NEXT: bne 0, .LBB0_6 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_8: # %atomicrmw.end +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: lhz 3, 0(3) +; CHECK-NEXT: rlwinm 7, 5, 3, 27, 27 +; CHECK-NEXT: xori 7, 7, 16 +; CHECK-NEXT: ori 8, 6, 65535 +; CHECK-NEXT: clrlwi 4, 4, 16 +; CHECK-NEXT: rldicr 5, 5, 0, 61 +; CHECK-NEXT: slw 8, 8, 7 +; CHECK-NEXT: b .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: srw 3, 11, 7 +; CHECK-NEXT: cmplw 3, 9 +; CHECK-NEXT: beq 0, .LBB1_8 +; CHECK-NEXT: .LBB1_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB1_6 Depth 2 +; CHECK-NEXT: clrlwi 9, 3, 16 +; CHECK-NEXT: addi 10, 3, 1 +; CHECK-NEXT: cmplw 9, 4 +; CHECK-NEXT: bc 12, 0, .LBB1_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: ori 3, 6, 0 +; CHECK-NEXT: b .LBB1_5 +; CHECK-NEXT: .LBB1_4: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: addi 3, 10, 0 +; CHECK-NEXT: .LBB1_5: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: slw 11, 9, 7 +; CHECK-NEXT: slw 3, 3, 7 +; CHECK-NEXT: and 3, 3, 8 +; CHECK-NEXT: and 10, 11, 8 +; CHECK-NEXT: .LBB1_6: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: lwarx 12, 0, 5 +; CHECK-NEXT: and 11, 12, 8 +; CHECK-NEXT: cmpw 11, 10 +; CHECK-NEXT: bne 0, .LBB1_1 +; CHECK-NEXT: # %bb.7: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: andc 12, 12, 8 +; CHECK-NEXT: or 12, 12, 3 +; CHECK-NEXT: stwcx. 12, 0, 5 +; CHECK-NEXT: bne 0, .LBB1_6 +; CHECK-NEXT: b .LBB1_1 +; CHECK-NEXT: .LBB1_8: # %atomicrmw.end +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: lwz 7, 0(3) +; CHECK-NEXT: b .LBB2_2 +; CHECK-NEXT: .LBB2_1: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: cmplw 5, 7 +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: beq 0, .LBB2_7 +; CHECK-NEXT: .LBB2_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB2_5 Depth 2 +; CHECK-NEXT: addi 5, 7, 1 +; CHECK-NEXT: cmplw 7, 4 +; CHECK-NEXT: bc 12, 0, .LBB2_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: ori 8, 6, 0 +; CHECK-NEXT: b .LBB2_5 +; CHECK-NEXT: .LBB2_4: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: addi 8, 5, 0 +; CHECK-NEXT: .LBB2_5: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB2_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: lwarx 5, 0, 3 +; CHECK-NEXT: cmpw 5, 7 +; CHECK-NEXT: bne 0, .LBB2_1 +; CHECK-NEXT: # %bb.6: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: stwcx. 8, 0, 3 +; CHECK-NEXT: bne 0, .LBB2_5 +; CHECK-NEXT: b .LBB2_1 +; CHECK-NEXT: .LBB2_7: # %atomicrmw.end +; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: ld 7, 0(3) +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: b .LBB3_2 +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: cmpld 5, 7 +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: beq 0, .LBB3_7 +; CHECK-NEXT: .LBB3_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB3_5 Depth 2 +; CHECK-NEXT: addi 5, 7, 1 +; CHECK-NEXT: cmpld 7, 4 +; CHECK-NEXT: bc 12, 0, .LBB3_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: ori 8, 6, 0 +; CHECK-NEXT: b .LBB3_5 +; CHECK-NEXT: .LBB3_4: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: addi 8, 5, 0 +; CHECK-NEXT: .LBB3_5: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB3_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ldarx 5, 0, 3 +; CHECK-NEXT: cmpd 5, 7 +; CHECK-NEXT: bne 0, .LBB3_1 +; CHECK-NEXT: # %bb.6: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: stdcx. 8, 0, 3 +; CHECK-NEXT: bne 0, .LBB3_5 +; CHECK-NEXT: b .LBB3_1 +; CHECK-NEXT: .LBB3_7: # %atomicrmw.end +; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: rlwinm 7, 5, 3, 27, 28 +; CHECK-NEXT: lbz 3, 0(3) +; CHECK-NEXT: xori 7, 7, 24 +; CHECK-NEXT: li 8, 255 +; CHECK-NEXT: clrlwi 6, 4, 24 +; CHECK-NEXT: rldicr 5, 5, 0, 61 +; CHECK-NEXT: slw 8, 8, 7 +; CHECK-NEXT: b .LBB4_2 +; CHECK-NEXT: .LBB4_1: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: srw 3, 11, 7 +; CHECK-NEXT: cmplw 3, 9 +; CHECK-NEXT: beq 0, .LBB4_8 +; CHECK-NEXT: .LBB4_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB4_6 Depth 2 +; CHECK-NEXT: andi. 9, 3, 255 +; CHECK-NEXT: cmplw 1, 9, 6 +; CHECK-NEXT: addi 10, 3, -1 +; CHECK-NEXT: cror 20, 2, 5 +; CHECK-NEXT: bc 12, 20, .LBB4_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: ori 3, 10, 0 +; CHECK-NEXT: b .LBB4_5 +; CHECK-NEXT: .LBB4_4: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: addi 3, 4, 0 +; CHECK-NEXT: .LBB4_5: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: slw 11, 9, 7 +; CHECK-NEXT: slw 3, 3, 7 +; CHECK-NEXT: and 3, 3, 8 +; CHECK-NEXT: and 10, 11, 8 +; CHECK-NEXT: .LBB4_6: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB4_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: lwarx 12, 0, 5 +; CHECK-NEXT: and 11, 12, 8 +; CHECK-NEXT: cmpw 11, 10 +; CHECK-NEXT: bne 0, .LBB4_1 +; CHECK-NEXT: # %bb.7: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: andc 12, 12, 8 +; CHECK-NEXT: or 12, 12, 3 +; CHECK-NEXT: stwcx. 12, 0, 5 +; CHECK-NEXT: bne 0, .LBB4_6 +; CHECK-NEXT: b .LBB4_1 +; CHECK-NEXT: .LBB4_8: # %atomicrmw.end +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: li 8, 0 +; CHECK-NEXT: lhz 3, 0(3) +; CHECK-NEXT: rlwinm 7, 5, 3, 27, 27 +; CHECK-NEXT: xori 7, 7, 16 +; CHECK-NEXT: ori 8, 8, 65535 +; CHECK-NEXT: clrlwi 6, 4, 16 +; CHECK-NEXT: rldicr 5, 5, 0, 61 +; CHECK-NEXT: slw 8, 8, 7 +; CHECK-NEXT: b .LBB5_2 +; CHECK-NEXT: .LBB5_1: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: srw 3, 11, 7 +; CHECK-NEXT: cmplw 3, 9 +; CHECK-NEXT: beq 0, .LBB5_8 +; CHECK-NEXT: .LBB5_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB5_6 Depth 2 +; CHECK-NEXT: andi. 9, 3, 65535 +; CHECK-NEXT: cmplw 1, 9, 6 +; CHECK-NEXT: addi 10, 3, -1 +; CHECK-NEXT: cror 20, 2, 5 +; CHECK-NEXT: bc 12, 20, .LBB5_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: ori 3, 10, 0 +; CHECK-NEXT: b .LBB5_5 +; CHECK-NEXT: .LBB5_4: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: addi 3, 4, 0 +; CHECK-NEXT: .LBB5_5: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: slw 11, 9, 7 +; CHECK-NEXT: slw 3, 3, 7 +; CHECK-NEXT: and 3, 3, 8 +; CHECK-NEXT: and 10, 11, 8 +; CHECK-NEXT: .LBB5_6: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB5_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: lwarx 12, 0, 5 +; CHECK-NEXT: and 11, 12, 8 +; CHECK-NEXT: cmpw 11, 10 +; CHECK-NEXT: bne 0, .LBB5_1 +; CHECK-NEXT: # %bb.7: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: andc 12, 12, 8 +; CHECK-NEXT: or 12, 12, 3 +; CHECK-NEXT: stwcx. 12, 0, 5 +; CHECK-NEXT: bne 0, .LBB5_6 +; CHECK-NEXT: b .LBB5_1 +; CHECK-NEXT: .LBB5_8: # %atomicrmw.end +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: lwz 6, 0(3) +; CHECK-NEXT: b .LBB6_2 +; CHECK-NEXT: .LBB6_1: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: cmplw 5, 6 +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: beq 0, .LBB6_7 +; CHECK-NEXT: .LBB6_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB6_5 Depth 2 +; CHECK-NEXT: cmpwi 6, 0 +; CHECK-NEXT: cmplw 1, 6, 4 +; CHECK-NEXT: addi 5, 6, -1 +; CHECK-NEXT: cror 20, 2, 5 +; CHECK-NEXT: bc 12, 20, .LBB6_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: ori 7, 5, 0 +; CHECK-NEXT: b .LBB6_5 +; CHECK-NEXT: .LBB6_4: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: addi 7, 4, 0 +; CHECK-NEXT: .LBB6_5: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB6_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: lwarx 5, 0, 3 +; CHECK-NEXT: cmpw 5, 6 +; CHECK-NEXT: bne 0, .LBB6_1 +; CHECK-NEXT: # %bb.6: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: stwcx. 7, 0, 3 +; CHECK-NEXT: bne 0, .LBB6_5 +; CHECK-NEXT: b .LBB6_1 +; CHECK-NEXT: .LBB6_7: # %atomicrmw.end +; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: sync +; CHECK-NEXT: ld 6, 0(3) +; CHECK-NEXT: b .LBB7_2 +; CHECK-NEXT: .LBB7_1: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: cmpld 5, 6 +; CHECK-NEXT: mr 6, 5 +; CHECK-NEXT: beq 0, .LBB7_7 +; CHECK-NEXT: .LBB7_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB7_5 Depth 2 +; CHECK-NEXT: cmpdi 6, 0 +; CHECK-NEXT: cmpld 1, 6, 4 +; CHECK-NEXT: addi 5, 6, -1 +; CHECK-NEXT: cror 20, 2, 5 +; CHECK-NEXT: bc 12, 20, .LBB7_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: ori 7, 5, 0 +; CHECK-NEXT: b .LBB7_5 +; CHECK-NEXT: .LBB7_4: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: addi 7, 4, 0 +; CHECK-NEXT: .LBB7_5: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB7_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ldarx 5, 0, 3 +; CHECK-NEXT: cmpd 5, 6 +; CHECK-NEXT: bne 0, .LBB7_1 +; CHECK-NEXT: # %bb.6: # %atomicrmw.start +; CHECK-NEXT: # +; CHECK-NEXT: stdcx. 7, 0, 3 +; CHECK-NEXT: bne 0, .LBB7_5 +; CHECK-NEXT: b .LBB7_1 +; CHECK-NEXT: .LBB7_7: # %atomicrmw.end +; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,1424 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IA %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32IA %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IA %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64IA %s + + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; RV32I-LABEL: atomicrmw_uinc_wrap_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: .LBB0_1: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: addi a0, a3, 1 +; RV32I-NEXT: andi a1, a3, 255 +; RV32I-NEXT: sltu a1, a1, s1 +; RV32I-NEXT: xori a1, a1, 1 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a2, a1, a0 +; RV32I-NEXT: sb a3, 3(sp) +; RV32I-NEXT: addi a1, sp, 3 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __atomic_compare_exchange_1@plt +; RV32I-NEXT: lb a3, 3(sp) +; RV32I-NEXT: beqz a0, .LBB0_1 +; RV32I-NEXT: # %bb.2: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_uinc_wrap_i8: +; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a3, 24 +; RV32IA-NEXT: li a5, 255 +; RV32IA-NEXT: lw a4, 0(a2) +; RV32IA-NEXT: sll a3, a5, a3 +; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: andi a1, a1, 255 +; RV32IA-NEXT: .LBB0_1: # %atomicrmw.start +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB0_3 Depth 2 +; RV32IA-NEXT: mv a5, a4 +; RV32IA-NEXT: srl a4, a4, a0 +; RV32IA-NEXT: andi a6, a4, 255 +; RV32IA-NEXT: addi a4, a4, 1 +; RV32IA-NEXT: sltu a6, a6, a1 +; RV32IA-NEXT: xori a6, a6, 1 +; RV32IA-NEXT: addi a6, a6, -1 +; RV32IA-NEXT: and a4, a6, a4 +; RV32IA-NEXT: andi a4, a4, 255 +; RV32IA-NEXT: sll a4, a4, a0 +; RV32IA-NEXT: and a6, a5, a3 +; RV32IA-NEXT: or a6, a6, a4 +; RV32IA-NEXT: .LBB0_3: # %atomicrmw.start +; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-NEXT: bne a4, a5, .LBB0_1 +; RV32IA-NEXT: # %bb.4: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2 +; RV32IA-NEXT: sc.w.aqrl a7, a6, (a2) +; RV32IA-NEXT: bnez a7, .LBB0_3 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start +; RV32IA-NEXT: # %bb.2: # %atomicrmw.end +; RV32IA-NEXT: srl a0, a4, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_uinc_wrap_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: .LBB0_1: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: addi a0, a3, 1 +; RV64I-NEXT: andi a1, a3, 255 +; RV64I-NEXT: sltu a1, a1, s1 +; RV64I-NEXT: xori a1, a1, 1 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a2, a1, a0 +; RV64I-NEXT: sb a3, 7(sp) +; RV64I-NEXT: addi a1, sp, 7 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __atomic_compare_exchange_1@plt +; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: beqz a0, .LBB0_1 +; RV64I-NEXT: # %bb.2: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_uinc_wrap_i8: +; RV64IA: # %bb.0: +; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: slli a0, a0, 3 +; RV64IA-NEXT: li a3, 255 +; RV64IA-NEXT: sllw a3, a3, a0 +; RV64IA-NEXT: lw a4, 0(a2) +; RV64IA-NEXT: andi a0, a0, 24 +; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: andi a1, a1, 255 +; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 +; RV64IA-NEXT: srlw a5, a4, a0 +; RV64IA-NEXT: sext.w a6, a4 +; RV64IA-NEXT: andi a7, a5, 255 +; RV64IA-NEXT: addiw a5, a5, 1 +; RV64IA-NEXT: sltu a7, a7, a1 +; RV64IA-NEXT: xori a7, a7, 1 +; RV64IA-NEXT: addiw a7, a7, -1 +; RV64IA-NEXT: and a5, a7, a5 +; RV64IA-NEXT: andi a5, a5, 255 +; RV64IA-NEXT: sllw a5, a5, a0 +; RV64IA-NEXT: and a4, a4, a3 +; RV64IA-NEXT: or a5, a4, a5 +; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start +; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: bne a4, a6, .LBB0_1 +; RV64IA-NEXT: # %bb.4: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 +; RV64IA-NEXT: sc.w.aqrl a7, a5, (a2) +; RV64IA-NEXT: bnez a7, .LBB0_3 +; RV64IA-NEXT: # %bb.5: # %atomicrmw.start +; RV64IA-NEXT: # %bb.2: # %atomicrmw.end +; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; RV32I-LABEL: atomicrmw_uinc_wrap_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a3, 0(a0) +; RV32I-NEXT: lui s1, 16 +; RV32I-NEXT: addi s1, s1, -1 +; RV32I-NEXT: and s2, a1, s1 +; RV32I-NEXT: .LBB1_1: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a0, a3, s1 +; RV32I-NEXT: addi a1, a3, 1 +; RV32I-NEXT: sltu a0, a0, s2 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: addi a1, sp, 14 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __atomic_compare_exchange_2@plt +; RV32I-NEXT: lh a3, 14(sp) +; RV32I-NEXT: beqz a0, .LBB1_1 +; RV32I-NEXT: # %bb.2: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_uinc_wrap_i16: +; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a0, a4, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: lw a5, 0(a2) +; RV32IA-NEXT: sll a4, a3, a4 +; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: and a1, a1, a3 +; RV32IA-NEXT: .LBB1_1: # %atomicrmw.start +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB1_3 Depth 2 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: srl a5, a5, a0 +; RV32IA-NEXT: and a7, a5, a3 +; RV32IA-NEXT: addi a5, a5, 1 +; RV32IA-NEXT: sltu a7, a7, a1 +; RV32IA-NEXT: xori a7, a7, 1 +; RV32IA-NEXT: addi a7, a7, -1 +; RV32IA-NEXT: and a5, a5, a3 +; RV32IA-NEXT: and a5, a7, a5 +; RV32IA-NEXT: sll a5, a5, a0 +; RV32IA-NEXT: and a7, a6, a4 +; RV32IA-NEXT: or a7, a7, a5 +; RV32IA-NEXT: .LBB1_3: # %atomicrmw.start +; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: bne a5, a6, .LBB1_1 +; RV32IA-NEXT: # %bb.4: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2 +; RV32IA-NEXT: sc.w.aqrl t0, a7, (a2) +; RV32IA-NEXT: bnez t0, .LBB1_3 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start +; RV32IA-NEXT: # %bb.2: # %atomicrmw.end +; RV32IA-NEXT: srl a0, a5, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_uinc_wrap_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a3, 0(a0) +; RV64I-NEXT: lui s1, 16 +; RV64I-NEXT: addiw s1, s1, -1 +; RV64I-NEXT: and s2, a1, s1 +; RV64I-NEXT: .LBB1_1: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a0, a3, s1 +; RV64I-NEXT: addi a1, a3, 1 +; RV64I-NEXT: sltu a0, a0, s2 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a2, a0, a1 +; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: addi a1, sp, 14 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __atomic_compare_exchange_2@plt +; RV64I-NEXT: lh a3, 14(sp) +; RV64I-NEXT: beqz a0, .LBB1_1 +; RV64I-NEXT: # %bb.2: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_uinc_wrap_i16: +; RV64IA: # %bb.0: +; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a0, a4, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: lw a5, 0(a2) +; RV64IA-NEXT: sllw a4, a3, a4 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: and a1, a1, a3 +; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 +; RV64IA-NEXT: srlw a6, a5, a0 +; RV64IA-NEXT: sext.w a7, a5 +; RV64IA-NEXT: and t0, a6, a3 +; RV64IA-NEXT: addiw a6, a6, 1 +; RV64IA-NEXT: sltu t0, t0, a1 +; RV64IA-NEXT: xori t0, t0, 1 +; RV64IA-NEXT: addiw t0, t0, -1 +; RV64IA-NEXT: and a6, a6, a3 +; RV64IA-NEXT: and a6, t0, a6 +; RV64IA-NEXT: sllw a6, a6, a0 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: or a6, a5, a6 +; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start +; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: bne a5, a7, .LBB1_1 +; RV64IA-NEXT: # %bb.4: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 +; RV64IA-NEXT: sc.w.aqrl t0, a6, (a2) +; RV64IA-NEXT: bnez t0, .LBB1_3 +; RV64IA-NEXT: # %bb.5: # %atomicrmw.start +; RV64IA-NEXT: # %bb.2: # %atomicrmw.end +; RV64IA-NEXT: srlw a0, a5, a0 +; RV64IA-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; RV32I-LABEL: atomicrmw_uinc_wrap_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: .LBB2_1: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: addi a0, a3, 1 +; RV32I-NEXT: sltu a1, a3, s1 +; RV32I-NEXT: xori a1, a1, 1 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a2, a1, a0 +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __atomic_compare_exchange_4@plt +; RV32I-NEXT: lw a3, 0(sp) +; RV32I-NEXT: beqz a0, .LBB2_1 +; RV32I-NEXT: # %bb.2: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_uinc_wrap_i32: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: .LBB2_1: # %atomicrmw.start +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB2_3 Depth 2 +; RV32IA-NEXT: mv a3, a2 +; RV32IA-NEXT: addi a2, a2, 1 +; RV32IA-NEXT: sltu a4, a3, a1 +; RV32IA-NEXT: xori a4, a4, 1 +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: and a4, a4, a2 +; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start +; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a2, (a0) +; RV32IA-NEXT: bne a2, a3, .LBB2_1 +; RV32IA-NEXT: # %bb.4: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB2_3 Depth=2 +; RV32IA-NEXT: sc.w.aqrl a5, a4, (a0) +; RV32IA-NEXT: bnez a5, .LBB2_3 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start +; RV32IA-NEXT: # %bb.2: # %atomicrmw.end +; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_uinc_wrap_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lw a3, 0(a0) +; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: .LBB2_1: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: addiw a0, a3, 1 +; RV64I-NEXT: sltu a1, a3, s1 +; RV64I-NEXT: xori a1, a1, 1 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a2, a1, a0 +; RV64I-NEXT: sw a3, 4(sp) +; RV64I-NEXT: addi a1, sp, 4 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __atomic_compare_exchange_4@plt +; RV64I-NEXT: lw a3, 4(sp) +; RV64I-NEXT: beqz a0, .LBB2_1 +; RV64I-NEXT: # %bb.2: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_uinc_wrap_i32: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a2, 0(a0) +; RV64IA-NEXT: sext.w a1, a1 +; RV64IA-NEXT: .LBB2_1: # %atomicrmw.start +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB2_3 Depth 2 +; RV64IA-NEXT: addiw a3, a2, 1 +; RV64IA-NEXT: sext.w a4, a2 +; RV64IA-NEXT: sltu a2, a4, a1 +; RV64IA-NEXT: xori a2, a2, 1 +; RV64IA-NEXT: addi a2, a2, -1 +; RV64IA-NEXT: and a3, a2, a3 +; RV64IA-NEXT: .LBB2_3: # %atomicrmw.start +; RV64IA-NEXT: # Parent Loop BB2_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a2, (a0) +; RV64IA-NEXT: bne a2, a4, .LBB2_1 +; RV64IA-NEXT: # %bb.4: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB2_3 Depth=2 +; RV64IA-NEXT: sc.w.aqrl a5, a3, (a0) +; RV64IA-NEXT: bnez a5, .LBB2_3 +; RV64IA-NEXT: # %bb.5: # %atomicrmw.start +; RV64IA-NEXT: # %bb.2: # %atomicrmw.end +; RV64IA-NEXT: mv a0, a2 +; RV64IA-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; RV32I-LABEL: atomicrmw_uinc_wrap_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: j .LBB3_3 +; RV32I-NEXT: .LBB3_1: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 +; RV32I-NEXT: sltu a0, a5, s1 +; RV32I-NEXT: .LBB3_2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: addi a1, a4, 1 +; RV32I-NEXT: sltu a2, a1, a4 +; RV32I-NEXT: add a3, a5, a2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a2, a0, a1 +; RV32I-NEXT: and a3, a0, a3 +; RV32I-NEXT: sw a4, 8(sp) +; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: li a5, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __atomic_compare_exchange_8@plt +; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: bnez a0, .LBB3_5 +; RV32I-NEXT: .LBB3_3: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: bne a5, s1, .LBB3_1 +; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 +; RV32I-NEXT: sltu a0, a4, s2 +; RV32I-NEXT: j .LBB3_2 +; RV32I-NEXT: .LBB3_5: # %atomicrmw.end +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_uinc_wrap_i64: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -32 +; RV32IA-NEXT: .cfi_def_cfa_offset 32 +; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IA-NEXT: .cfi_offset ra, -4 +; RV32IA-NEXT: .cfi_offset s0, -8 +; RV32IA-NEXT: .cfi_offset s1, -12 +; RV32IA-NEXT: .cfi_offset s2, -16 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 +; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: j .LBB3_3 +; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 +; RV32IA-NEXT: sltu a0, a5, s1 +; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: addi a1, a4, 1 +; RV32IA-NEXT: sltu a2, a1, a4 +; RV32IA-NEXT: add a3, a5, a2 +; RV32IA-NEXT: addi a0, a0, -1 +; RV32IA-NEXT: and a2, a0, a1 +; RV32IA-NEXT: and a3, a0, a3 +; RV32IA-NEXT: sw a4, 8(sp) +; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: addi a1, sp, 8 +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: li a5, 5 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: bnez a0, .LBB3_5 +; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start +; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: bne a5, s1, .LBB3_1 +; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 +; RV32IA-NEXT: sltu a0, a4, s2 +; RV32IA-NEXT: j .LBB3_2 +; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 32 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_uinc_wrap_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: .LBB3_1: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: addi a0, a3, 1 +; RV64I-NEXT: sltu a1, a3, s1 +; RV64I-NEXT: xori a1, a1, 1 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a2, a1, a0 +; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __atomic_compare_exchange_8@plt +; RV64I-NEXT: ld a3, 0(sp) +; RV64I-NEXT: beqz a0, .LBB3_1 +; RV64I-NEXT: # %bb.2: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_uinc_wrap_i64: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a2, 0(a0) +; RV64IA-NEXT: .LBB3_1: # %atomicrmw.start +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB3_3 Depth 2 +; RV64IA-NEXT: mv a3, a2 +; RV64IA-NEXT: addi a2, a2, 1 +; RV64IA-NEXT: sltu a4, a3, a1 +; RV64IA-NEXT: xori a4, a4, 1 +; RV64IA-NEXT: addi a4, a4, -1 +; RV64IA-NEXT: and a4, a4, a2 +; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start +; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.d.aqrl a2, (a0) +; RV64IA-NEXT: bne a2, a3, .LBB3_1 +; RV64IA-NEXT: # %bb.4: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB3_3 Depth=2 +; RV64IA-NEXT: sc.d.aqrl a5, a4, (a0) +; RV64IA-NEXT: bnez a5, .LBB3_3 +; RV64IA-NEXT: # %bb.5: # %atomicrmw.start +; RV64IA-NEXT: # %bb.2: # %atomicrmw.end +; RV64IA-NEXT: mv a0, a2 +; RV64IA-NEXT: ret + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; RV32I-LABEL: atomicrmw_udec_wrap_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 +; RV32I-NEXT: j .LBB4_2 +; RV32I-NEXT: .LBB4_1: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: addi a1, sp, 15 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __atomic_compare_exchange_1@plt +; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: bnez a0, .LBB4_4 +; RV32I-NEXT: .LBB4_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a0, a3, 255 +; RV32I-NEXT: seqz a1, a0 +; RV32I-NEXT: sltu a0, s2, a0 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: bnez a0, .LBB4_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV32I-NEXT: addi a2, a3, -1 +; RV32I-NEXT: j .LBB4_1 +; RV32I-NEXT: .LBB4_4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_udec_wrap_i8: +; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: andi a0, a3, 24 +; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: lw a6, 0(a2) +; RV32IA-NEXT: sll a3, a4, a3 +; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: andi a4, a1, 255 +; RV32IA-NEXT: j .LBB4_2 +; RV32IA-NEXT: .LBB4_1: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV32IA-NEXT: andi a6, a7, 255 +; RV32IA-NEXT: sll a6, a6, a0 +; RV32IA-NEXT: and a7, a5, a3 +; RV32IA-NEXT: or a7, a7, a6 +; RV32IA-NEXT: .LBB4_5: # %atomicrmw.start +; RV32IA-NEXT: # Parent Loop BB4_2 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a6, (a2) +; RV32IA-NEXT: bne a6, a5, .LBB4_7 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB4_5 Depth=2 +; RV32IA-NEXT: sc.w.aqrl t0, a7, (a2) +; RV32IA-NEXT: bnez t0, .LBB4_5 +; RV32IA-NEXT: .LBB4_7: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV32IA-NEXT: beq a6, a5, .LBB4_4 +; RV32IA-NEXT: .LBB4_2: # %atomicrmw.start +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB4_5 Depth 2 +; RV32IA-NEXT: mv a5, a6 +; RV32IA-NEXT: srl a6, a6, a0 +; RV32IA-NEXT: andi a7, a6, 255 +; RV32IA-NEXT: seqz t0, a7 +; RV32IA-NEXT: sltu a7, a4, a7 +; RV32IA-NEXT: or t0, t0, a7 +; RV32IA-NEXT: mv a7, a1 +; RV32IA-NEXT: bnez t0, .LBB4_1 +; RV32IA-NEXT: # %bb.3: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV32IA-NEXT: addi a7, a6, -1 +; RV32IA-NEXT: j .LBB4_1 +; RV32IA-NEXT: .LBB4_4: # %atomicrmw.end +; RV32IA-NEXT: srl a0, a6, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_udec_wrap_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 +; RV64I-NEXT: j .LBB4_2 +; RV64I-NEXT: .LBB4_1: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: addi a1, sp, 15 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __atomic_compare_exchange_1@plt +; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: bnez a0, .LBB4_4 +; RV64I-NEXT: .LBB4_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a0, a3, 255 +; RV64I-NEXT: seqz a1, a0 +; RV64I-NEXT: sltu a0, s2, a0 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: bnez a0, .LBB4_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV64I-NEXT: addi a2, a3, -1 +; RV64I-NEXT: j .LBB4_1 +; RV64I-NEXT: .LBB4_4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_udec_wrap_i8: +; RV64IA: # %bb.0: +; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a0, a3, 24 +; RV64IA-NEXT: li a5, 255 +; RV64IA-NEXT: lw a4, 0(a2) +; RV64IA-NEXT: sllw a3, a5, a3 +; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: andi a5, a1, 255 +; RV64IA-NEXT: j .LBB4_2 +; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV64IA-NEXT: sext.w a6, a4 +; RV64IA-NEXT: andi a7, a7, 255 +; RV64IA-NEXT: sllw a7, a7, a0 +; RV64IA-NEXT: and a4, a4, a3 +; RV64IA-NEXT: or a7, a4, a7 +; RV64IA-NEXT: .LBB4_5: # %atomicrmw.start +; RV64IA-NEXT: # Parent Loop BB4_2 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: bne a4, a6, .LBB4_7 +; RV64IA-NEXT: # %bb.6: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB4_5 Depth=2 +; RV64IA-NEXT: sc.w.aqrl t0, a7, (a2) +; RV64IA-NEXT: bnez t0, .LBB4_5 +; RV64IA-NEXT: .LBB4_7: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV64IA-NEXT: beq a4, a6, .LBB4_4 +; RV64IA-NEXT: .LBB4_2: # %atomicrmw.start +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB4_5 Depth 2 +; RV64IA-NEXT: srlw a6, a4, a0 +; RV64IA-NEXT: andi a7, a6, 255 +; RV64IA-NEXT: seqz t0, a7 +; RV64IA-NEXT: sltu a7, a5, a7 +; RV64IA-NEXT: or t0, t0, a7 +; RV64IA-NEXT: mv a7, a1 +; RV64IA-NEXT: bnez t0, .LBB4_1 +; RV64IA-NEXT: # %bb.3: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 +; RV64IA-NEXT: addi a7, a6, -1 +; RV64IA-NEXT: j .LBB4_1 +; RV64IA-NEXT: .LBB4_4: # %atomicrmw.end +; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; RV32I-LABEL: atomicrmw_udec_wrap_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: .cfi_offset s3, -20 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lui s2, 16 +; RV32I-NEXT: addi s2, s2, -1 +; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: j .LBB5_2 +; RV32I-NEXT: .LBB5_1: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: addi a1, sp, 10 +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __atomic_compare_exchange_2@plt +; RV32I-NEXT: lh a1, 10(sp) +; RV32I-NEXT: bnez a0, .LBB5_4 +; RV32I-NEXT: .LBB5_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a0, a1, s2 +; RV32I-NEXT: seqz a2, a0 +; RV32I-NEXT: sltu a0, s3, a0 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: bnez a0, .LBB5_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV32I-NEXT: addi a2, a1, -1 +; RV32I-NEXT: j .LBB5_1 +; RV32I-NEXT: .LBB5_4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_udec_wrap_i16: +; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a2, a0, -4 +; RV32IA-NEXT: slli a4, a0, 3 +; RV32IA-NEXT: andi a0, a4, 24 +; RV32IA-NEXT: lui a3, 16 +; RV32IA-NEXT: addi a3, a3, -1 +; RV32IA-NEXT: lw a7, 0(a2) +; RV32IA-NEXT: sll a4, a3, a4 +; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: and a5, a1, a3 +; RV32IA-NEXT: j .LBB5_2 +; RV32IA-NEXT: .LBB5_1: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV32IA-NEXT: and a7, t0, a3 +; RV32IA-NEXT: sll a7, a7, a0 +; RV32IA-NEXT: and t0, a6, a4 +; RV32IA-NEXT: or t0, t0, a7 +; RV32IA-NEXT: .LBB5_5: # %atomicrmw.start +; RV32IA-NEXT: # Parent Loop BB5_2 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a7, (a2) +; RV32IA-NEXT: bne a7, a6, .LBB5_7 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB5_5 Depth=2 +; RV32IA-NEXT: sc.w.aqrl t1, t0, (a2) +; RV32IA-NEXT: bnez t1, .LBB5_5 +; RV32IA-NEXT: .LBB5_7: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV32IA-NEXT: beq a7, a6, .LBB5_4 +; RV32IA-NEXT: .LBB5_2: # %atomicrmw.start +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB5_5 Depth 2 +; RV32IA-NEXT: mv a6, a7 +; RV32IA-NEXT: srl a7, a7, a0 +; RV32IA-NEXT: and t0, a7, a3 +; RV32IA-NEXT: seqz t1, t0 +; RV32IA-NEXT: sltu t0, a5, t0 +; RV32IA-NEXT: or t1, t1, t0 +; RV32IA-NEXT: mv t0, a1 +; RV32IA-NEXT: bnez t1, .LBB5_1 +; RV32IA-NEXT: # %bb.3: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV32IA-NEXT: addi t0, a7, -1 +; RV32IA-NEXT: j .LBB5_1 +; RV32IA-NEXT: .LBB5_4: # %atomicrmw.end +; RV32IA-NEXT: srl a0, a7, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_udec_wrap_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: .cfi_offset s3, -40 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lui s2, 16 +; RV64I-NEXT: addiw s2, s2, -1 +; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: j .LBB5_2 +; RV64I-NEXT: .LBB5_1: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: addi a1, sp, 6 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __atomic_compare_exchange_2@plt +; RV64I-NEXT: lh a1, 6(sp) +; RV64I-NEXT: bnez a0, .LBB5_4 +; RV64I-NEXT: .LBB5_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a0, a1, s2 +; RV64I-NEXT: seqz a2, a0 +; RV64I-NEXT: sltu a0, s3, a0 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: bnez a0, .LBB5_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV64I-NEXT: addi a2, a1, -1 +; RV64I-NEXT: j .LBB5_1 +; RV64I-NEXT: .LBB5_4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_udec_wrap_i16: +; RV64IA: # %bb.0: +; RV64IA-NEXT: andi a2, a0, -4 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a0, a4, 24 +; RV64IA-NEXT: lui a3, 16 +; RV64IA-NEXT: addiw a3, a3, -1 +; RV64IA-NEXT: lw a5, 0(a2) +; RV64IA-NEXT: sllw a4, a3, a4 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: and a6, a1, a3 +; RV64IA-NEXT: j .LBB5_2 +; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV64IA-NEXT: sext.w a7, a5 +; RV64IA-NEXT: and t0, t0, a3 +; RV64IA-NEXT: sllw t0, t0, a0 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: or t0, a5, t0 +; RV64IA-NEXT: .LBB5_5: # %atomicrmw.start +; RV64IA-NEXT: # Parent Loop BB5_2 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: bne a5, a7, .LBB5_7 +; RV64IA-NEXT: # %bb.6: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB5_5 Depth=2 +; RV64IA-NEXT: sc.w.aqrl t1, t0, (a2) +; RV64IA-NEXT: bnez t1, .LBB5_5 +; RV64IA-NEXT: .LBB5_7: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV64IA-NEXT: beq a5, a7, .LBB5_4 +; RV64IA-NEXT: .LBB5_2: # %atomicrmw.start +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB5_5 Depth 2 +; RV64IA-NEXT: srlw a7, a5, a0 +; RV64IA-NEXT: and t0, a7, a3 +; RV64IA-NEXT: seqz t1, t0 +; RV64IA-NEXT: sltu t0, a6, t0 +; RV64IA-NEXT: or t1, t1, t0 +; RV64IA-NEXT: mv t0, a1 +; RV64IA-NEXT: bnez t1, .LBB5_1 +; RV64IA-NEXT: # %bb.3: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 +; RV64IA-NEXT: addi t0, a7, -1 +; RV64IA-NEXT: j .LBB5_1 +; RV64IA-NEXT: .LBB5_4: # %atomicrmw.end +; RV64IA-NEXT: srlw a0, a5, a0 +; RV64IA-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; RV32I-LABEL: atomicrmw_udec_wrap_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: j .LBB6_2 +; RV32I-NEXT: .LBB6_1: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: mv a1, sp +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __atomic_compare_exchange_4@plt +; RV32I-NEXT: lw a3, 0(sp) +; RV32I-NEXT: bnez a0, .LBB6_4 +; RV32I-NEXT: .LBB6_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: seqz a0, a3 +; RV32I-NEXT: sltu a1, s1, a3 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: bnez a0, .LBB6_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV32I-NEXT: addi a2, a3, -1 +; RV32I-NEXT: j .LBB6_1 +; RV32I-NEXT: .LBB6_4: # %atomicrmw.end +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_udec_wrap_i32: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a2, 0(a0) +; RV32IA-NEXT: j .LBB6_2 +; RV32IA-NEXT: .LBB6_1: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV32IA-NEXT: .LBB6_5: # %atomicrmw.start +; RV32IA-NEXT: # Parent Loop BB6_2 Depth=1 +; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV32IA-NEXT: lr.w.aqrl a2, (a0) +; RV32IA-NEXT: bne a2, a3, .LBB6_7 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB6_5 Depth=2 +; RV32IA-NEXT: sc.w.aqrl a5, a4, (a0) +; RV32IA-NEXT: bnez a5, .LBB6_5 +; RV32IA-NEXT: .LBB6_7: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV32IA-NEXT: beq a2, a3, .LBB6_4 +; RV32IA-NEXT: .LBB6_2: # %atomicrmw.start +; RV32IA-NEXT: # =>This Loop Header: Depth=1 +; RV32IA-NEXT: # Child Loop BB6_5 Depth 2 +; RV32IA-NEXT: mv a3, a2 +; RV32IA-NEXT: seqz a2, a2 +; RV32IA-NEXT: sltu a4, a1, a3 +; RV32IA-NEXT: or a2, a2, a4 +; RV32IA-NEXT: mv a4, a1 +; RV32IA-NEXT: bnez a2, .LBB6_1 +; RV32IA-NEXT: # %bb.3: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV32IA-NEXT: addi a4, a3, -1 +; RV32IA-NEXT: j .LBB6_1 +; RV32IA-NEXT: .LBB6_4: # %atomicrmw.end +; RV32IA-NEXT: mv a0, a2 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_udec_wrap_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: .cfi_offset s2, -32 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lw a3, 0(a0) +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 +; RV64I-NEXT: j .LBB6_2 +; RV64I-NEXT: .LBB6_1: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: addi a1, sp, 12 +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __atomic_compare_exchange_4@plt +; RV64I-NEXT: lw a3, 12(sp) +; RV64I-NEXT: bnez a0, .LBB6_4 +; RV64I-NEXT: .LBB6_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: seqz a0, a3 +; RV64I-NEXT: sltu a1, s2, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: bnez a0, .LBB6_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV64I-NEXT: addiw a2, a3, -1 +; RV64I-NEXT: j .LBB6_1 +; RV64I-NEXT: .LBB6_4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_udec_wrap_i32: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a2, 0(a0) +; RV64IA-NEXT: sext.w a3, a1 +; RV64IA-NEXT: j .LBB6_2 +; RV64IA-NEXT: .LBB6_1: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV64IA-NEXT: .LBB6_5: # %atomicrmw.start +; RV64IA-NEXT: # Parent Loop BB6_2 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.w.aqrl a2, (a0) +; RV64IA-NEXT: bne a2, a4, .LBB6_7 +; RV64IA-NEXT: # %bb.6: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB6_5 Depth=2 +; RV64IA-NEXT: sc.w.aqrl a6, a5, (a0) +; RV64IA-NEXT: bnez a6, .LBB6_5 +; RV64IA-NEXT: .LBB6_7: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV64IA-NEXT: beq a2, a4, .LBB6_4 +; RV64IA-NEXT: .LBB6_2: # %atomicrmw.start +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB6_5 Depth 2 +; RV64IA-NEXT: sext.w a4, a2 +; RV64IA-NEXT: seqz a5, a4 +; RV64IA-NEXT: sltu a6, a3, a4 +; RV64IA-NEXT: or a6, a5, a6 +; RV64IA-NEXT: mv a5, a1 +; RV64IA-NEXT: bnez a6, .LBB6_1 +; RV64IA-NEXT: # %bb.3: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB6_2 Depth=1 +; RV64IA-NEXT: addiw a5, a2, -1 +; RV64IA-NEXT: j .LBB6_1 +; RV64IA-NEXT: .LBB6_4: # %atomicrmw.end +; RV64IA-NEXT: mv a0, a2 +; RV64IA-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; RV32I-LABEL: atomicrmw_udec_wrap_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: .cfi_offset s2, -16 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a5, 4(a0) +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: j .LBB7_2 +; RV32I-NEXT: .LBB7_1: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: sw a4, 8(sp) +; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: addi a1, sp, 8 +; RV32I-NEXT: li a4, 5 +; RV32I-NEXT: li a5, 5 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __atomic_compare_exchange_8@plt +; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: bnez a0, .LBB7_7 +; RV32I-NEXT: .LBB7_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: beq a5, s1, .LBB7_4 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: sltu a0, s1, a5 +; RV32I-NEXT: j .LBB7_5 +; RV32I-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: .LBB7_5: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: or a1, a4, a5 +; RV32I-NEXT: seqz a1, a1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: bnez a0, .LBB7_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: addi a2, a4, -1 +; RV32I-NEXT: sltu a0, a2, a4 +; RV32I-NEXT: add a0, a5, a0 +; RV32I-NEXT: addi a3, a0, -1 +; RV32I-NEXT: j .LBB7_1 +; RV32I-NEXT: .LBB7_7: # %atomicrmw.end +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomicrmw_udec_wrap_i64: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -32 +; RV32IA-NEXT: .cfi_def_cfa_offset 32 +; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32IA-NEXT: .cfi_offset ra, -4 +; RV32IA-NEXT: .cfi_offset s0, -8 +; RV32IA-NEXT: .cfi_offset s1, -12 +; RV32IA-NEXT: .cfi_offset s2, -16 +; RV32IA-NEXT: mv s0, a0 +; RV32IA-NEXT: lw a5, 4(a0) +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: mv s1, a2 +; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: j .LBB7_2 +; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: sw a4, 8(sp) +; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: addi a1, sp, 8 +; RV32IA-NEXT: li a4, 5 +; RV32IA-NEXT: li a5, 5 +; RV32IA-NEXT: mv a0, s0 +; RV32IA-NEXT: call __atomic_compare_exchange_8@plt +; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: bnez a0, .LBB7_7 +; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start +; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32IA-NEXT: beq a5, s1, .LBB7_4 +; RV32IA-NEXT: # %bb.3: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: sltu a0, s1, a5 +; RV32IA-NEXT: j .LBB7_5 +; RV32IA-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: .LBB7_5: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: or a1, a4, a5 +; RV32IA-NEXT: seqz a1, a1 +; RV32IA-NEXT: or a0, a1, a0 +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s1 +; RV32IA-NEXT: bnez a0, .LBB7_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: addi a2, a4, -1 +; RV32IA-NEXT: sltu a0, a2, a4 +; RV32IA-NEXT: add a0, a5, a0 +; RV32IA-NEXT: addi a3, a0, -1 +; RV32IA-NEXT: j .LBB7_1 +; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end +; RV32IA-NEXT: mv a0, a4 +; RV32IA-NEXT: mv a1, a5 +; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 32 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomicrmw_udec_wrap_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: j .LBB7_2 +; RV64I-NEXT: .LBB7_1: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: mv a1, sp +; RV64I-NEXT: li a3, 5 +; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __atomic_compare_exchange_8@plt +; RV64I-NEXT: ld a3, 0(sp) +; RV64I-NEXT: bnez a0, .LBB7_4 +; RV64I-NEXT: .LBB7_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: seqz a0, a3 +; RV64I-NEXT: sltu a1, s1, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: bnez a0, .LBB7_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV64I-NEXT: addi a2, a3, -1 +; RV64I-NEXT: j .LBB7_1 +; RV64I-NEXT: .LBB7_4: # %atomicrmw.end +; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomicrmw_udec_wrap_i64: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a2, 0(a0) +; RV64IA-NEXT: j .LBB7_2 +; RV64IA-NEXT: .LBB7_1: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV64IA-NEXT: .LBB7_5: # %atomicrmw.start +; RV64IA-NEXT: # Parent Loop BB7_2 Depth=1 +; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 +; RV64IA-NEXT: lr.d.aqrl a2, (a0) +; RV64IA-NEXT: bne a2, a3, .LBB7_7 +; RV64IA-NEXT: # %bb.6: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB7_5 Depth=2 +; RV64IA-NEXT: sc.d.aqrl a5, a4, (a0) +; RV64IA-NEXT: bnez a5, .LBB7_5 +; RV64IA-NEXT: .LBB7_7: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV64IA-NEXT: beq a2, a3, .LBB7_4 +; RV64IA-NEXT: .LBB7_2: # %atomicrmw.start +; RV64IA-NEXT: # =>This Loop Header: Depth=1 +; RV64IA-NEXT: # Child Loop BB7_5 Depth 2 +; RV64IA-NEXT: mv a3, a2 +; RV64IA-NEXT: seqz a2, a2 +; RV64IA-NEXT: sltu a4, a1, a3 +; RV64IA-NEXT: or a2, a2, a4 +; RV64IA-NEXT: mv a4, a1 +; RV64IA-NEXT: bnez a2, .LBB7_1 +; RV64IA-NEXT: # %bb.3: # %atomicrmw.start +; RV64IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV64IA-NEXT: addi a4, a3, -1 +; RV64IA-NEXT: j .LBB7_1 +; RV64IA-NEXT: .LBB7_4: # %atomicrmw.end +; RV64IA-NEXT: mv a0, a2 +; RV64IA-NEXT: ret + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,327 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=sparc -mcpu=v9 < %s | FileCheck %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i8: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: and %o0, -4, %o2 +; CHECK-NEXT: mov 3, %o3 +; CHECK-NEXT: andn %o3, %o0, %o0 +; CHECK-NEXT: sll %o0, 3, %o0 +; CHECK-NEXT: mov 255, %o3 +; CHECK-NEXT: ld [%o2], %o4 +; CHECK-NEXT: sll %o3, %o0, %o3 +; CHECK-NEXT: xor %o3, -1, %o3 +; CHECK-NEXT: and %o1, 255, %o1 +; CHECK-NEXT: .LBB0_1: ! %atomicrmw.start +; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov %o4, %o5 +; CHECK-NEXT: srl %o4, %o0, %o4 +; CHECK-NEXT: and %o4, 255, %g2 +; CHECK-NEXT: add %o4, 1, %o4 +; CHECK-NEXT: cmp %g2, %o1 +; CHECK-NEXT: movcc %icc, 0, %o4 +; CHECK-NEXT: and %o4, 255, %o4 +; CHECK-NEXT: sll %o4, %o0, %o4 +; CHECK-NEXT: and %o5, %o3, %g2 +; CHECK-NEXT: or %g2, %o4, %o4 +; CHECK-NEXT: cas [%o2], %o5, %o4 +; CHECK-NEXT: mov %g0, %g2 +; CHECK-NEXT: cmp %o4, %o5 +; CHECK-NEXT: move %icc, 1, %g2 +; CHECK-NEXT: cmp %g2, 1 +; CHECK-NEXT: bne %icc, .LBB0_1 +; CHECK-NEXT: nop +; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end +; CHECK-NEXT: srl %o4, %o0, %o0 +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: retl +; CHECK-NEXT: nop + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i16: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: and %o0, -4, %o2 +; CHECK-NEXT: and %o0, 3, %o0 +; CHECK-NEXT: xor %o0, 2, %o0 +; CHECK-NEXT: sll %o0, 3, %o0 +; CHECK-NEXT: sethi 63, %o3 +; CHECK-NEXT: or %o3, 1023, %o3 +; CHECK-NEXT: ld [%o2], %o5 +; CHECK-NEXT: sll %o3, %o0, %o4 +; CHECK-NEXT: xor %o4, -1, %o4 +; CHECK-NEXT: and %o1, %o3, %o1 +; CHECK-NEXT: .LBB1_1: ! %atomicrmw.start +; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov %o5, %g2 +; CHECK-NEXT: srl %o5, %o0, %o5 +; CHECK-NEXT: and %o5, %o3, %g3 +; CHECK-NEXT: add %o5, 1, %o5 +; CHECK-NEXT: cmp %g3, %o1 +; CHECK-NEXT: movcc %icc, 0, %o5 +; CHECK-NEXT: and %o5, %o3, %o5 +; CHECK-NEXT: sll %o5, %o0, %o5 +; CHECK-NEXT: and %g2, %o4, %g3 +; CHECK-NEXT: or %g3, %o5, %o5 +; CHECK-NEXT: cas [%o2], %g2, %o5 +; CHECK-NEXT: mov %g0, %g3 +; CHECK-NEXT: cmp %o5, %g2 +; CHECK-NEXT: move %icc, 1, %g3 +; CHECK-NEXT: cmp %g3, 1 +; CHECK-NEXT: bne %icc, .LBB1_1 +; CHECK-NEXT: nop +; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end +; CHECK-NEXT: srl %o5, %o0, %o0 +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: retl +; CHECK-NEXT: nop + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i32: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: ld [%o0], %o2 +; CHECK-NEXT: .LBB2_1: ! %atomicrmw.start +; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov %o2, %o3 +; CHECK-NEXT: add %o2, 1, %o2 +; CHECK-NEXT: cmp %o3, %o1 +; CHECK-NEXT: movcc %icc, 0, %o2 +; CHECK-NEXT: cas [%o0], %o3, %o2 +; CHECK-NEXT: mov %g0, %o4 +; CHECK-NEXT: cmp %o2, %o3 +; CHECK-NEXT: move %icc, 1, %o4 +; CHECK-NEXT: cmp %o4, 1 +; CHECK-NEXT: bne %icc, .LBB2_1 +; CHECK-NEXT: nop +; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: retl +; CHECK-NEXT: mov %o2, %o0 + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i64: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: save %sp, -96, %sp +; CHECK-NEXT: .cfi_def_cfa_register %fp +; CHECK-NEXT: .cfi_window_save +; CHECK-NEXT: .cfi_register %o7, %i7 +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: ldd [%i0], %i4 +; CHECK-NEXT: .LBB3_1: ! %atomicrmw.start +; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov %g0, %i3 +; CHECK-NEXT: mov %g0, %g2 +; CHECK-NEXT: addcc %i5, 1, %o4 +; CHECK-NEXT: addxcc %i4, 0, %o3 +; CHECK-NEXT: cmp %i4, %i1 +; CHECK-NEXT: movcc %icc, 1, %i3 +; CHECK-NEXT: cmp %i5, %i2 +; CHECK-NEXT: movcc %icc, 1, %g2 +; CHECK-NEXT: cmp %i4, %i1 +; CHECK-NEXT: move %icc, %g2, %i3 +; CHECK-NEXT: cmp %i3, 0 +; CHECK-NEXT: movne %icc, 0, %o3 +; CHECK-NEXT: movne %icc, 0, %o4 +; CHECK-NEXT: mov %i0, %o0 +; CHECK-NEXT: mov %i4, %o1 +; CHECK-NEXT: call __sync_val_compare_and_swap_8 +; CHECK-NEXT: mov %i5, %o2 +; CHECK-NEXT: xor %o0, %i4, %i3 +; CHECK-NEXT: xor %o1, %i5, %i4 +; CHECK-NEXT: or %i4, %i3, %i3 +; CHECK-NEXT: mov %o1, %i5 +; CHECK-NEXT: cmp %i3, 0 +; CHECK-NEXT: bne %icc, .LBB3_1 +; CHECK-NEXT: mov %o0, %i4 +; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: mov %i4, %i0 +; CHECK-NEXT: ret +; CHECK-NEXT: restore %g0, %i5, %o1 + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i8: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: and %o0, -4, %o2 +; CHECK-NEXT: mov 3, %o3 +; CHECK-NEXT: andn %o3, %o0, %o0 +; CHECK-NEXT: sll %o0, 3, %o0 +; CHECK-NEXT: mov 255, %o3 +; CHECK-NEXT: ld [%o2], %o5 +; CHECK-NEXT: sll %o3, %o0, %o3 +; CHECK-NEXT: xor %o3, -1, %o3 +; CHECK-NEXT: and %o1, 255, %o4 +; CHECK-NEXT: .LBB4_1: ! %atomicrmw.start +; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov %o5, %g2 +; CHECK-NEXT: srl %o5, %o0, %o5 +; CHECK-NEXT: and %o5, 255, %g3 +; CHECK-NEXT: add %o5, -1, %o5 +; CHECK-NEXT: cmp %g3, %o4 +; CHECK-NEXT: movgu %icc, %o1, %o5 +; CHECK-NEXT: cmp %g3, 0 +; CHECK-NEXT: move %icc, %o1, %o5 +; CHECK-NEXT: and %o5, 255, %o5 +; CHECK-NEXT: sll %o5, %o0, %o5 +; CHECK-NEXT: and %g2, %o3, %g3 +; CHECK-NEXT: or %g3, %o5, %o5 +; CHECK-NEXT: cas [%o2], %g2, %o5 +; CHECK-NEXT: mov %g0, %g3 +; CHECK-NEXT: cmp %o5, %g2 +; CHECK-NEXT: move %icc, 1, %g3 +; CHECK-NEXT: cmp %g3, 1 +; CHECK-NEXT: bne %icc, .LBB4_1 +; CHECK-NEXT: nop +; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end +; CHECK-NEXT: srl %o5, %o0, %o0 +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: retl +; CHECK-NEXT: nop + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i16: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: and %o0, -4, %o2 +; CHECK-NEXT: and %o0, 3, %o0 +; CHECK-NEXT: xor %o0, 2, %o0 +; CHECK-NEXT: sll %o0, 3, %o0 +; CHECK-NEXT: sethi 63, %o3 +; CHECK-NEXT: or %o3, 1023, %o3 +; CHECK-NEXT: ld [%o2], %g2 +; CHECK-NEXT: sll %o3, %o0, %o4 +; CHECK-NEXT: xor %o4, -1, %o4 +; CHECK-NEXT: and %o1, %o3, %o5 +; CHECK-NEXT: .LBB5_1: ! %atomicrmw.start +; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov %g2, %g3 +; CHECK-NEXT: srl %g2, %o0, %g2 +; CHECK-NEXT: and %g2, %o3, %g4 +; CHECK-NEXT: add %g2, -1, %g2 +; CHECK-NEXT: cmp %g4, %o5 +; CHECK-NEXT: movgu %icc, %o1, %g2 +; CHECK-NEXT: cmp %g4, 0 +; CHECK-NEXT: move %icc, %o1, %g2 +; CHECK-NEXT: and %g2, %o3, %g2 +; CHECK-NEXT: sll %g2, %o0, %g2 +; CHECK-NEXT: and %g3, %o4, %g4 +; CHECK-NEXT: or %g4, %g2, %g2 +; CHECK-NEXT: cas [%o2], %g3, %g2 +; CHECK-NEXT: mov %g0, %g4 +; CHECK-NEXT: cmp %g2, %g3 +; CHECK-NEXT: move %icc, 1, %g4 +; CHECK-NEXT: cmp %g4, 1 +; CHECK-NEXT: bne %icc, .LBB5_1 +; CHECK-NEXT: nop +; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end +; CHECK-NEXT: srl %g2, %o0, %o0 +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: retl +; CHECK-NEXT: nop + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i32: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: ld [%o0], %o2 +; CHECK-NEXT: .LBB6_1: ! %atomicrmw.start +; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov %o2, %o3 +; CHECK-NEXT: add %o2, -1, %o2 +; CHECK-NEXT: cmp %o3, %o1 +; CHECK-NEXT: movgu %icc, %o1, %o2 +; CHECK-NEXT: cmp %o3, 0 +; CHECK-NEXT: move %icc, %o1, %o2 +; CHECK-NEXT: cas [%o0], %o3, %o2 +; CHECK-NEXT: mov %g0, %o4 +; CHECK-NEXT: cmp %o2, %o3 +; CHECK-NEXT: move %icc, 1, %o4 +; CHECK-NEXT: cmp %o4, 1 +; CHECK-NEXT: bne %icc, .LBB6_1 +; CHECK-NEXT: nop +; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: retl +; CHECK-NEXT: mov %o2, %o0 + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i64: +; CHECK: .cfi_startproc +; CHECK-NEXT: ! %bb.0: +; CHECK-NEXT: save %sp, -96, %sp +; CHECK-NEXT: .cfi_def_cfa_register %fp +; CHECK-NEXT: .cfi_window_save +; CHECK-NEXT: .cfi_register %o7, %i7 +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: ldd [%i0], %i4 +; CHECK-NEXT: .LBB7_1: ! %atomicrmw.start +; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov %g0, %i3 +; CHECK-NEXT: mov %g0, %g2 +; CHECK-NEXT: mov %g0, %g3 +; CHECK-NEXT: addcc %i5, -1, %o4 +; CHECK-NEXT: addxcc %i4, -1, %o3 +; CHECK-NEXT: or %i5, %i4, %g4 +; CHECK-NEXT: cmp %g4, 0 +; CHECK-NEXT: move %icc, 1, %i3 +; CHECK-NEXT: cmp %i4, %i1 +; CHECK-NEXT: movgu %icc, 1, %g2 +; CHECK-NEXT: cmp %i5, %i2 +; CHECK-NEXT: movgu %icc, 1, %g3 +; CHECK-NEXT: cmp %i4, %i1 +; CHECK-NEXT: move %icc, %g3, %g2 +; CHECK-NEXT: or %i3, %g2, %i3 +; CHECK-NEXT: cmp %i3, 0 +; CHECK-NEXT: movne %icc, %i1, %o3 +; CHECK-NEXT: movne %icc, %i2, %o4 +; CHECK-NEXT: mov %i0, %o0 +; CHECK-NEXT: mov %i4, %o1 +; CHECK-NEXT: call __sync_val_compare_and_swap_8 +; CHECK-NEXT: mov %i5, %o2 +; CHECK-NEXT: xor %o0, %i4, %i3 +; CHECK-NEXT: xor %o1, %i5, %i4 +; CHECK-NEXT: or %i4, %i3, %i3 +; CHECK-NEXT: mov %o1, %i5 +; CHECK-NEXT: cmp %i3, 0 +; CHECK-NEXT: bne %icc, .LBB7_1 +; CHECK-NEXT: mov %o0, %i4 +; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end +; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: mov %i4, %i0 +; CHECK-NEXT: ret +; CHECK-NEXT: restore %g0, %i5, %o1 + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/VE/Scalar/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/VE/Scalar/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,240 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=ve-unknown-unknown < %s | FileCheck %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s3, %s1, (32)0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: sla.w.sx %s2, (56)0, %s0 +; CHECK-NEXT: ldl.sx %s4, (, %s1) +; CHECK-NEXT: xor %s2, -1, %s2 +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: and %s3, %s3, (56)0 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: and %s4, %s5, (32)0 +; CHECK-NEXT: srl %s4, %s4, %s0 +; CHECK-NEXT: and %s6, %s4, (56)0 +; CHECK-NEXT: adds.w.sx %s4, 1, %s4 +; CHECK-NEXT: cmpu.w %s6, %s6, %s3 +; CHECK-NEXT: cmov.w.ge %s4, (0)1, %s6 +; CHECK-NEXT: and %s4, %s4, (56)0 +; CHECK-NEXT: sla.w.sx %s4, %s4, %s0 +; CHECK-NEXT: and %s6, %s5, %s2 +; CHECK-NEXT: or %s4, %s6, %s4 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: b.l.t (, %s10) + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s3, %s1, (32)0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: sla.w.sx %s2, (48)0, %s0 +; CHECK-NEXT: ldl.sx %s4, (, %s1) +; CHECK-NEXT: xor %s2, -1, %s2 +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: and %s3, %s3, (48)0 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: and %s4, %s5, (32)0 +; CHECK-NEXT: srl %s4, %s4, %s0 +; CHECK-NEXT: and %s6, %s4, (48)0 +; CHECK-NEXT: adds.w.sx %s4, 1, %s4 +; CHECK-NEXT: cmpu.w %s6, %s6, %s3 +; CHECK-NEXT: cmov.w.ge %s4, (0)1, %s6 +; CHECK-NEXT: and %s4, %s4, (48)0 +; CHECK-NEXT: sla.w.sx %s4, %s4, %s0 +; CHECK-NEXT: and %s6, %s5, %s2 +; CHECK-NEXT: or %s4, %s6, %s4 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB1_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: b.l.t (, %s10) + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: ldl.sx %s2, (, %s0) +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: .LBB2_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s2 +; CHECK-NEXT: adds.w.sx %s2, 1, %s2 +; CHECK-NEXT: cmpu.w %s4, %s3, %s1 +; CHECK-NEXT: cmov.w.ge %s2, (0)1, %s4 +; CHECK-NEXT: cas.w %s2, (%s0), %s3 +; CHECK-NEXT: brne.w %s2, %s3, .LBB2_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: ld %s2, (, %s0) +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s2 +; CHECK-NEXT: lea %s2, 1(, %s2) +; CHECK-NEXT: cmpu.l %s4, %s3, %s1 +; CHECK-NEXT: cmov.l.ge %s2, (0)1, %s4 +; CHECK-NEXT: cas.l %s2, (%s0), %s3 +; CHECK-NEXT: brne.l %s2, %s3, .LBB3_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: and %s2, -4, %s0 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: sla.w.sx %s3, (56)0, %s0 +; CHECK-NEXT: ldl.sx %s5, (, %s2) +; CHECK-NEXT: xor %s3, -1, %s3 +; CHECK-NEXT: and %s3, %s3, (32)0 +; CHECK-NEXT: and %s4, %s1, (56)0 +; CHECK-NEXT: .LBB4_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s6, 0, %s5 +; CHECK-NEXT: and %s5, %s6, (32)0 +; CHECK-NEXT: srl %s5, %s5, %s0 +; CHECK-NEXT: and %s7, %s5, (56)0 +; CHECK-NEXT: adds.w.sx %s5, -1, %s5 +; CHECK-NEXT: cmpu.w %s34, %s7, %s4 +; CHECK-NEXT: cmov.w.gt %s5, %s1, %s34 +; CHECK-NEXT: cmov.w.eq %s5, %s1, %s7 +; CHECK-NEXT: and %s5, %s5, (56)0 +; CHECK-NEXT: sla.w.sx %s5, %s5, %s0 +; CHECK-NEXT: and %s7, %s6, %s3 +; CHECK-NEXT: or %s5, %s7, %s5 +; CHECK-NEXT: cas.w %s5, (%s2), %s6 +; CHECK-NEXT: brne.w %s5, %s6, .LBB4_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s5, (32)0 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: b.l.t (, %s10) + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: and %s2, -4, %s0 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: sla.w.sx %s3, (48)0, %s0 +; CHECK-NEXT: ldl.sx %s5, (, %s2) +; CHECK-NEXT: xor %s3, -1, %s3 +; CHECK-NEXT: and %s3, %s3, (32)0 +; CHECK-NEXT: and %s4, %s1, (48)0 +; CHECK-NEXT: .LBB5_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s6, 0, %s5 +; CHECK-NEXT: and %s5, %s6, (32)0 +; CHECK-NEXT: srl %s5, %s5, %s0 +; CHECK-NEXT: and %s7, %s5, (48)0 +; CHECK-NEXT: adds.w.sx %s5, -1, %s5 +; CHECK-NEXT: cmpu.w %s34, %s7, %s4 +; CHECK-NEXT: cmov.w.gt %s5, %s1, %s34 +; CHECK-NEXT: cmov.w.eq %s5, %s1, %s7 +; CHECK-NEXT: and %s5, %s5, (48)0 +; CHECK-NEXT: sla.w.sx %s5, %s5, %s0 +; CHECK-NEXT: and %s7, %s6, %s3 +; CHECK-NEXT: or %s5, %s7, %s5 +; CHECK-NEXT: cas.w %s5, (%s2), %s6 +; CHECK-NEXT: brne.w %s5, %s6, .LBB5_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s5, (32)0 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: b.l.t (, %s10) + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: ldl.sx %s2, (, %s0) +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: .LBB6_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s2 +; CHECK-NEXT: adds.w.sx %s2, -1, %s2 +; CHECK-NEXT: cmpu.w %s4, %s3, %s1 +; CHECK-NEXT: cmov.w.gt %s2, %s1, %s4 +; CHECK-NEXT: cmov.w.eq %s2, %s1, %s3 +; CHECK-NEXT: cas.w %s2, (%s0), %s3 +; CHECK-NEXT: brne.w %s2, %s3, .LBB6_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: ld %s2, (, %s0) +; CHECK-NEXT: .LBB7_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s2 +; CHECK-NEXT: lea %s2, -1(, %s2) +; CHECK-NEXT: cmpu.l %s4, %s3, %s1 +; CHECK-NEXT: cmov.l.gt %s2, %s1, %s4 +; CHECK-NEXT: cmov.l.eq %s2, %s1, %s3 +; CHECK-NEXT: cas.l %s2, (%s0), %s3 +; CHECK-NEXT: brne.l %s2, %s3, .LBB7_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/WebAssembly/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/WebAssembly/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,381 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=wasm32-unknown-unknown < %s | FileCheck -check-prefix=WASM32 %s +; RUN: llc -mtriple=wasm64-unknown-unknown < %s | FileCheck -check-prefix=WASM64 %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; WASM32-LABEL: atomicrmw_uinc_wrap_i8: +; WASM32: .functype atomicrmw_uinc_wrap_i8 (i32, i32) -> (i32) +; WASM32-NEXT: .local i32 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.const 0 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.load8_u 0 +; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: i32.const 1 +; WASM32-NEXT: i32.add +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 255 +; WASM32-NEXT: i32.and +; WASM32-NEXT: i32.ge_u +; WASM32-NEXT: i32.select +; WASM32-NEXT: i32.store8 0 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: atomicrmw_uinc_wrap_i8: +; WASM64: .functype atomicrmw_uinc_wrap_i8 (i64, i32) -> (i32) +; WASM64-NEXT: .local i32 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.const 0 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.load8_u 0 +; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: i32.const 1 +; WASM64-NEXT: i32.add +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i32.const 255 +; WASM64-NEXT: i32.and +; WASM64-NEXT: i32.ge_u +; WASM64-NEXT: i32.select +; WASM64-NEXT: i32.store8 0 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: # fallthrough-return + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; WASM32-LABEL: atomicrmw_uinc_wrap_i16: +; WASM32: .functype atomicrmw_uinc_wrap_i16 (i32, i32) -> (i32) +; WASM32-NEXT: .local i32 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.const 0 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.load16_u 0 +; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: i32.const 1 +; WASM32-NEXT: i32.add +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 65535 +; WASM32-NEXT: i32.and +; WASM32-NEXT: i32.ge_u +; WASM32-NEXT: i32.select +; WASM32-NEXT: i32.store16 0 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: atomicrmw_uinc_wrap_i16: +; WASM64: .functype atomicrmw_uinc_wrap_i16 (i64, i32) -> (i32) +; WASM64-NEXT: .local i32 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.const 0 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.load16_u 0 +; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: i32.const 1 +; WASM64-NEXT: i32.add +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i32.const 65535 +; WASM64-NEXT: i32.and +; WASM64-NEXT: i32.ge_u +; WASM64-NEXT: i32.select +; WASM64-NEXT: i32.store16 0 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: # fallthrough-return + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; WASM32-LABEL: atomicrmw_uinc_wrap_i32: +; WASM32: .functype atomicrmw_uinc_wrap_i32 (i32, i32) -> (i32) +; WASM32-NEXT: .local i32 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.const 0 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.load 0 +; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: i32.const 1 +; WASM32-NEXT: i32.add +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.ge_u +; WASM32-NEXT: i32.select +; WASM32-NEXT: i32.store 0 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: atomicrmw_uinc_wrap_i32: +; WASM64: .functype atomicrmw_uinc_wrap_i32 (i64, i32) -> (i32) +; WASM64-NEXT: .local i32 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.const 0 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.load 0 +; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: i32.const 1 +; WASM64-NEXT: i32.add +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i32.ge_u +; WASM64-NEXT: i32.select +; WASM64-NEXT: i32.store 0 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: # fallthrough-return + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; WASM32-LABEL: atomicrmw_uinc_wrap_i64: +; WASM32: .functype atomicrmw_uinc_wrap_i64 (i32, i64) -> (i64) +; WASM32-NEXT: .local i64 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i64.const 0 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i64.load 0 +; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: i64.const 1 +; WASM32-NEXT: i64.add +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i64.ge_u +; WASM32-NEXT: i64.select +; WASM32-NEXT: i64.store 0 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: atomicrmw_uinc_wrap_i64: +; WASM64: .functype atomicrmw_uinc_wrap_i64 (i64, i64) -> (i64) +; WASM64-NEXT: .local i64 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i64.const 0 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i64.load 0 +; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: i64.const 1 +; WASM64-NEXT: i64.add +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i64.ge_u +; WASM64-NEXT: i64.select +; WASM64-NEXT: i64.store 0 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: # fallthrough-return + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; WASM32-LABEL: atomicrmw_udec_wrap_i8: +; WASM32: .functype atomicrmw_udec_wrap_i8 (i32, i32) -> (i32) +; WASM32-NEXT: .local i32 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.load8_u 0 +; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: i32.const -1 +; WASM32-NEXT: i32.add +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 255 +; WASM32-NEXT: i32.and +; WASM32-NEXT: i32.gt_u +; WASM32-NEXT: i32.select +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: i32.select +; WASM32-NEXT: i32.store8 0 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: atomicrmw_udec_wrap_i8: +; WASM64: .functype atomicrmw_udec_wrap_i8 (i64, i32) -> (i32) +; WASM64-NEXT: .local i32 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.load8_u 0 +; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: i32.const -1 +; WASM64-NEXT: i32.add +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i32.const 255 +; WASM64-NEXT: i32.and +; WASM64-NEXT: i32.gt_u +; WASM64-NEXT: i32.select +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: i32.select +; WASM64-NEXT: i32.store8 0 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: # fallthrough-return + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; WASM32-LABEL: atomicrmw_udec_wrap_i16: +; WASM32: .functype atomicrmw_udec_wrap_i16 (i32, i32) -> (i32) +; WASM32-NEXT: .local i32 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.load16_u 0 +; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: i32.const -1 +; WASM32-NEXT: i32.add +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 65535 +; WASM32-NEXT: i32.and +; WASM32-NEXT: i32.gt_u +; WASM32-NEXT: i32.select +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: i32.select +; WASM32-NEXT: i32.store16 0 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: atomicrmw_udec_wrap_i16: +; WASM64: .functype atomicrmw_udec_wrap_i16 (i64, i32) -> (i32) +; WASM64-NEXT: .local i32 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.load16_u 0 +; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: i32.const -1 +; WASM64-NEXT: i32.add +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i32.const 65535 +; WASM64-NEXT: i32.and +; WASM64-NEXT: i32.gt_u +; WASM64-NEXT: i32.select +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: i32.select +; WASM64-NEXT: i32.store16 0 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: # fallthrough-return + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; WASM32-LABEL: atomicrmw_udec_wrap_i32: +; WASM32: .functype atomicrmw_udec_wrap_i32 (i32, i32) -> (i32) +; WASM32-NEXT: .local i32 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.load 0 +; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: i32.const -1 +; WASM32-NEXT: i32.add +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.gt_u +; WASM32-NEXT: i32.select +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: i32.select +; WASM32-NEXT: i32.store 0 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: atomicrmw_udec_wrap_i32: +; WASM64: .functype atomicrmw_udec_wrap_i32 (i64, i32) -> (i32) +; WASM64-NEXT: .local i32 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i32.load 0 +; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: i32.const -1 +; WASM64-NEXT: i32.add +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i32.gt_u +; WASM64-NEXT: i32.select +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: i32.select +; WASM64-NEXT: i32.store 0 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: # fallthrough-return + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; WASM32-LABEL: atomicrmw_udec_wrap_i64: +; WASM32: .functype atomicrmw_udec_wrap_i64 (i32, i64) -> (i64) +; WASM32-NEXT: .local i64 +; WASM32-NEXT: # %bb.0: +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i64.load 0 +; WASM32-NEXT: local.tee 2 +; WASM32-NEXT: i64.const -1 +; WASM32-NEXT: i64.add +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i64.gt_u +; WASM32-NEXT: i64.select +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: i64.eqz +; WASM32-NEXT: i64.select +; WASM32-NEXT: i64.store 0 +; WASM32-NEXT: local.get 2 +; WASM32-NEXT: # fallthrough-return +; +; WASM64-LABEL: atomicrmw_udec_wrap_i64: +; WASM64: .functype atomicrmw_udec_wrap_i64 (i64, i64) -> (i64) +; WASM64-NEXT: .local i64 +; WASM64-NEXT: # %bb.0: +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: local.get 0 +; WASM64-NEXT: i64.load 0 +; WASM64-NEXT: local.tee 2 +; WASM64-NEXT: i64.const -1 +; WASM64-NEXT: i64.add +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: local.get 1 +; WASM64-NEXT: i64.gt_u +; WASM64-NEXT: i64.select +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: i64.eqz +; WASM64-NEXT: i64.select +; WASM64-NEXT: i64.store 0 +; WASM64-NEXT: local.get 2 +; WASM64-NEXT: # fallthrough-return + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/CodeGen/X86/atomicrmw-uinc-udec-wrap.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/atomicrmw-uinc-udec-wrap.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck %s + +define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: leal 1(%rax), %edx +; CHECK-NEXT: cmpb %sil, %al +; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: cmovael %ecx, %edx +; CHECK-NEXT: # kill: def $al killed $al killed $rax +; CHECK-NEXT: lock cmpxchgb %dl, (%rdi) +; CHECK-NEXT: # kill: def $al killed $al def $rax +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: # kill: def $al killed $al killed $rax +; CHECK-NEXT: retq + %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: leal 1(%rax), %edx +; CHECK-NEXT: cmpw %si, %ax +; CHECK-NEXT: cmovael %ecx, %edx +; CHECK-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-NEXT: lock cmpxchgw %dx, (%rdi) +; CHECK-NEXT: # kill: def $ax killed $ax def $rax +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-NEXT: retq + %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB2_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: leal 1(%rax), %edx +; CHECK-NEXT: cmpl %esi, %eax +; CHECK-NEXT: cmovael %ecx, %edx +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: lock cmpxchgl %edx, (%rdi) +; CHECK-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-NEXT: jne .LBB2_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_uinc_wrap_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: leaq 1(%rax), %rdx +; CHECK-NEXT: cmpq %rsi, %rax +; CHECK-NEXT: cmovaeq %rcx, %rdx +; CHECK-NEXT: lock cmpxchgq %rdx, (%rdi) +; CHECK-NEXT: jne .LBB3_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: retq + %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} + +define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movzbl %sil, %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB4_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: subb $1, %dl +; CHECK-NEXT: cmpb %cl, %al +; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: cmoval %ecx, %edx +; CHECK-NEXT: cmpb $1, %al +; CHECK-NEXT: cmovbl %ecx, %edx +; CHECK-NEXT: lock cmpxchgb %dl, (%rdi) +; CHECK-NEXT: jne .LBB4_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: retq + %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst + ret i8 %result +} + +define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB5_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: subw $1, %cx +; CHECK-NEXT: cmpw %si, %ax +; CHECK-NEXT: cmoval %esi, %ecx +; CHECK-NEXT: cmpw $1, %ax +; CHECK-NEXT: cmovbl %esi, %ecx +; CHECK-NEXT: lock cmpxchgw %cx, (%rdi) +; CHECK-NEXT: jne .LBB5_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: retq + %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst + ret i16 %result +} + +define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB6_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: leal -1(%rax), %ecx +; CHECK-NEXT: cmpl %esi, %eax +; CHECK-NEXT: cmoval %esi, %ecx +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: cmovbl %esi, %ecx +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi) +; CHECK-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-NEXT: jne .LBB6_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq + %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst + ret i32 %result +} + +define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { +; CHECK-LABEL: atomicrmw_udec_wrap_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB7_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: leaq -1(%rax), %rcx +; CHECK-NEXT: cmpq %rsi, %rax +; CHECK-NEXT: cmovaq %rsi, %rcx +; CHECK-NEXT: cmpq $1, %rax +; CHECK-NEXT: cmovbq %rsi, %rcx +; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi) +; CHECK-NEXT: jne .LBB7_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: retq + %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst + ret i64 %result +} Index: llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll +++ llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll @@ -254,17 +254,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE:%.*]] ; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 -; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; %res = atomicrmw max ptr addrspace(1) %ptr, i16 %value seq_cst ret i16 %res @@ -288,17 +288,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE:%.*]] ; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 -; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; %res = atomicrmw min ptr addrspace(1) %ptr, i16 %value seq_cst ret i16 %res @@ -322,17 +322,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] ; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 -; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; %res = atomicrmw umax ptr addrspace(1) %ptr, i16 %value seq_cst ret i16 %res @@ -356,17 +356,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE:%.*]] ; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 -; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; %res = atomicrmw umin ptr addrspace(1) %ptr, i16 %value seq_cst ret i16 %res @@ -529,3 +529,373 @@ %res = atomicrmw xor ptr addrspace(3) %ptr, i16 %value seq_cst, align 4 ret i16 %res } + +define i16 @test_atomicrmw_inc_i16_global(i16 addrspace(1)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_global( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i16 addrspace(1)* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_global_align4(i16 addrspace(1)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_global_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i16 addrspace(1)* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_local(i16 addrspace(3)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_local( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i16 addrspace(3)* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_local_align4(i16 addrspace(3)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_local_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i16 addrspace(3)* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_flat(i16* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_flat( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i16* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_flat_align4(i16* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i16* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_global(i16 addrspace(1)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_global( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i16 addrspace(1)* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_global_align4(i16 addrspace(1)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_global_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i16 addrspace(1)* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_local(i16 addrspace(3)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_local( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i16 addrspace(3)* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_local_align4(i16 addrspace(3)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_local_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i16 addrspace(3)* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_flat(i16* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_flat( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i16* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_flat_align4(i16* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i16* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} Index: llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll +++ llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll @@ -263,17 +263,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i8 [[EXTRACTED]], [[VALUE:%.*]] ; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 -; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] ; %res = atomicrmw max ptr addrspace(1) %ptr, i8 %value seq_cst ret i8 %res @@ -297,17 +297,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp sle i8 [[EXTRACTED]], [[VALUE:%.*]] ; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 -; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] ; %res = atomicrmw min ptr addrspace(1) %ptr, i8 %value seq_cst ret i8 %res @@ -331,17 +331,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] ; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 -; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] ; %res = atomicrmw umax ptr addrspace(1) %ptr, i8 %value seq_cst ret i8 %res @@ -365,17 +365,17 @@ ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i8 [[EXTRACTED]], [[VALUE:%.*]] ; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 -; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] ; %res = atomicrmw umin ptr addrspace(1) %ptr, i8 %value seq_cst ret i8 %res @@ -466,3 +466,591 @@ %extract = extractvalue {i8, i1} %res, 0 ret i8 %extract } + +define i8 @test_atomicrmw_inc_i8_global(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_global( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_global_align2(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_global_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_global_align4(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_global_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_local(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_local( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_local_align2(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_local_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_local_align4(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_local_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_flat(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_flat( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i8* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_flat_align2(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_flat_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw uinc_wrap i8* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_flat_align4(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_flat_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i8* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_global(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_global( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_global_align2(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_global_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_global_align4(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_global_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_local(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_local( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_local_align2(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_local_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_local_align4(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_local_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_flat(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_flat( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i8* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_flat_align2(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_flat_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; + %res = atomicrmw udec_wrap i8* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_flat_align4(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_flat_align4( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i8* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +}