Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -10420,6 +10420,8 @@ - fsub - fmax - fmin +- uinc_wrap +- udec_wrap For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight @@ -10464,6 +10466,8 @@ - fsub: ``*ptr = *ptr - val`` (using floating point arithmetic) - fmax: ``*ptr = maxnum(*ptr, val)`` (match the `llvm.maxnum.*`` intrinsic) - fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic) +- uinc_wrap: ``*ptr = (*ptr u>= val) ? 0 : (*ptr + 1)`` +- udec_wrap: ``*ptr = ((*ptr == 0) || (*ptr u> val)) ? val : (*ptr - 1)`` Example: """""""" Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -93,6 +93,8 @@ * ``fneg`` + * Added ``uinc_wrap`` and ``udec_wrap`` operations to ``atomicrmw`` + Changes to building LLVM ------------------------ Index: llvm/include/llvm/AsmParser/LLToken.h =================================================================== --- llvm/include/llvm/AsmParser/LLToken.h +++ llvm/include/llvm/AsmParser/LLToken.h @@ -237,6 +237,8 @@ kw_umin, kw_fmax, kw_fmin, + kw_uinc_wrap, + kw_udec_wrap, // Instruction Opcodes (Opcode in UIntVal). kw_fneg, Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -460,7 +460,9 @@ RMW_FADD = 11, RMW_FSUB = 12, RMW_FMAX = 13, - RMW_FMIN = 14 + RMW_FMIN = 14, + RMW_UINC_WRAP = 15, + RMW_UDEC_WRAP = 16 }; /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1197,6 +1197,8 @@ ATOMIC_LOAD_FSUB, ATOMIC_LOAD_FMAX, ATOMIC_LOAD_FMIN, + ATOMIC_LOAD_UINC_WRAP, + ATOMIC_LOAD_UDEC_WRAP, // Masked load and store - consecutive vector load and store operations // with additional mask operand that prevents memory accesses to the Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1419,6 +1419,8 @@ case ISD::ATOMIC_LOAD_FSUB: case ISD::ATOMIC_LOAD_FMAX: case ISD::ATOMIC_LOAD_FMIN: + case ISD::ATOMIC_LOAD_UINC_WRAP: + case ISD::ATOMIC_LOAD_UDEC_WRAP: case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: case ISD::MLOAD: @@ -1486,6 +1488,8 @@ N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || N->getOpcode() == ISD::ATOMIC_LOAD_FMAX || N->getOpcode() == ISD::ATOMIC_LOAD_FMIN || + N->getOpcode() == ISD::ATOMIC_LOAD_UINC_WRAP || + N->getOpcode() == ISD::ATOMIC_LOAD_UDEC_WRAP || N->getOpcode() == ISD::ATOMIC_LOAD || N->getOpcode() == ISD::ATOMIC_STORE; } Index: llvm/include/llvm/IR/Instructions.h =================================================================== --- llvm/include/llvm/IR/Instructions.h +++ llvm/include/llvm/IR/Instructions.h @@ -762,8 +762,16 @@ /// \p minnum matches the behavior of \p llvm.minnum.*. FMin, + /// Increment one up to a maximum value. + /// *p = (old u>= v) ? 0 : (old + 1) + UIncWrap, + + /// Decrement one until a minimum value or zero. + /// *p = ((old == 0) || (old u> v)) ? v : (old - 1) + UDecWrap, + FIRST_BINOP = Xchg, - LAST_BINOP = FMin, + LAST_BINOP = UDecWrap, BAD_BINOP }; @@ -775,7 +783,7 @@ template using BinOpBitfieldElement = - typename Bitfield::Element; + typename Bitfield::Element; public: AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, Align Alignment, Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -386,12 +386,14 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_UINC_WRAP) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_UDEC_WRAP) // Marker for start of Generic AtomicRMW opcodes HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_START, G_ATOMICRMW_XCHG) // Marker for end of Generic AtomicRMW opcodes -HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_FMIN) +HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_UDEC_WRAP) // Generic atomic fence HANDLE_TARGET_OPCODE(G_FENCE) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -1128,6 +1128,8 @@ def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP; def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP; +def G_ATOMICRMW_UINC_WRAP : G_ATOMICRMW_OP; +def G_ATOMICRMW_UDEC_WRAP : G_ATOMICRMW_OP; def G_FENCE : GenericInstruction { let OutOperandList = (outs); Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -213,6 +213,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. Index: llvm/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/include/llvm/Target/TargetSelectionDAG.td +++ llvm/include/llvm/Target/TargetSelectionDAG.td @@ -663,6 +663,10 @@ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load_fmin : SDNode<"ISD::ATOMIC_LOAD_FMIN", SDTFPAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_uinc_wrap : SDNode<"ISD::ATOMIC_LOAD_UINC_WRAP", SDTAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def atomic_load_udec_wrap : SDNode<"ISD::ATOMIC_LOAD_UDEC_WRAP", SDTAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; Index: llvm/include/llvm/Transforms/Utils/LowerAtomic.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LowerAtomic.h +++ llvm/include/llvm/Transforms/Utils/LowerAtomic.h @@ -31,7 +31,7 @@ /// Emit IR to implement the given atomicrmw operation on values in registers, /// returning the new value. Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, - Value *Loaded, Value *Inc); + Value *Loaded, Value *Val); } #endif // LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -672,6 +672,8 @@ KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax); KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin); + KEYWORD(uinc_wrap); + KEYWORD(udec_wrap); KEYWORD(vscale); KEYWORD(x); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -7622,6 +7622,8 @@ case lltok::kw_min: Operation = AtomicRMWInst::Min; break; case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break; case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break; + case lltok::kw_uinc_wrap: Operation = AtomicRMWInst::UIncWrap; break; + case lltok::kw_udec_wrap: Operation = AtomicRMWInst::UDecWrap; break; case lltok::kw_fadd: Operation = AtomicRMWInst::FAdd; IsFP = true; Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1243,6 +1243,8 @@ case bitc::RMW_FSUB: return AtomicRMWInst::FSub; case bitc::RMW_FMAX: return AtomicRMWInst::FMax; case bitc::RMW_FMIN: return AtomicRMWInst::FMin; + case bitc::RMW_UINC_WRAP: return AtomicRMWInst::UIncWrap; + case bitc::RMW_UDEC_WRAP: return AtomicRMWInst::UDecWrap; } } Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -579,6 +579,8 @@ case AtomicRMWInst::FSub: return bitc::RMW_FSUB; case AtomicRMWInst::FMax: return bitc::RMW_FMAX; case AtomicRMWInst::FMin: return bitc::RMW_FMIN; + case AtomicRMWInst::UIncWrap: return bitc::RMW_UINC_WRAP; + case AtomicRMWInst::UDecWrap: return bitc::RMW_UDEC_WRAP; } } Index: llvm/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -804,7 +804,9 @@ case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: - case AtomicRMWInst::UMin: { + case AtomicRMWInst::UMin: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: { // Finally, comparison ops will operate on the full value, so // truncate down to the original size, and expand out again after // doing the operation. @@ -1674,6 +1676,8 @@ case AtomicRMWInst::FMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: // No atomic libcalls are available for max/min/umax/umin. return {}; } Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2939,6 +2939,12 @@ case AtomicRMWInst::FMin: Opcode = TargetOpcode::G_ATOMICRMW_FMIN; break; + case AtomicRMWInst::UIncWrap: + Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP; + break; + case AtomicRMWInst::UDecWrap: + Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP; + break; } MIRBuilder.buildAtomicRMW( Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7669,6 +7669,8 @@ Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX || Opcode == ISD::ATOMIC_LOAD_FMIN || + Opcode == ISD::ATOMIC_LOAD_UINC_WRAP || + Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4662,6 +4662,8 @@ case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; + case AtomicRMWInst::UIncWrap: NT = ISD::ATOMIC_LOAD_UINC_WRAP; break; + case AtomicRMWInst::UDecWrap: NT = ISD::ATOMIC_LOAD_UDEC_WRAP; break; } AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -95,6 +95,8 @@ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd"; + case ISD::ATOMIC_LOAD_UINC_WRAP: return "AtomicLoadUIncWrap"; + case ISD::ATOMIC_LOAD_UDEC_WRAP: return "AtomicLoadUDecWrap"; case ISD::ATOMIC_LOAD: return "AtomicLoad"; case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4280,6 +4280,7 @@ return Builder.saveIP(); } +// FIXME: Duplicating AtomicExpand Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, AtomicRMWInst::BinOp RMWOp) { switch (RMWOp) { @@ -4305,6 +4306,8 @@ case AtomicRMWInst::UMin: case AtomicRMWInst::FMax: case AtomicRMWInst::FMin: + case AtomicRMWInst::UIncWrap: + case AtomicRMWInst::UDecWrap: llvm_unreachable("Unsupported atomic update operation"); } llvm_unreachable("Unsupported atomic update operation"); Index: llvm/lib/IR/Instructions.cpp =================================================================== --- llvm/lib/IR/Instructions.cpp +++ llvm/lib/IR/Instructions.cpp @@ -1792,6 +1792,10 @@ return "fmax"; case AtomicRMWInst::FMin: return "fmin"; + case AtomicRMWInst::UIncWrap: + return "uinc_wrap"; + case AtomicRMWInst::UDecWrap: + return "udec_wrap"; case AtomicRMWInst::BAD_BINOP: return ""; } Index: llvm/lib/Transforms/Utils/LowerAtomic.cpp =================================================================== --- llvm/lib/Transforms/Utils/LowerAtomic.cpp +++ llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -41,43 +41,60 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, - Value *Inc) { + Value *Val) { Value *NewVal; switch (Op) { case AtomicRMWInst::Xchg: - return Inc; + return Val; case AtomicRMWInst::Add: - return Builder.CreateAdd(Loaded, Inc, "new"); + return Builder.CreateAdd(Loaded, Val, "new"); case AtomicRMWInst::Sub: - return Builder.CreateSub(Loaded, Inc, "new"); + return Builder.CreateSub(Loaded, Val, "new"); case AtomicRMWInst::And: - return Builder.CreateAnd(Loaded, Inc, "new"); + return Builder.CreateAnd(Loaded, Val, "new"); case AtomicRMWInst::Nand: - return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + return Builder.CreateNot(Builder.CreateAnd(Loaded, Val), "new"); case AtomicRMWInst::Or: - return Builder.CreateOr(Loaded, Inc, "new"); + return Builder.CreateOr(Loaded, Val, "new"); case AtomicRMWInst::Xor: - return Builder.CreateXor(Loaded, Inc, "new"); + return Builder.CreateXor(Loaded, Val, "new"); case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + NewVal = Builder.CreateICmpSGT(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + NewVal = Builder.CreateICmpSLE(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + NewVal = Builder.CreateICmpUGT(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + NewVal = Builder.CreateICmpULE(Loaded, Val); + return Builder.CreateSelect(NewVal, Loaded, Val, "new"); case AtomicRMWInst::FAdd: - return Builder.CreateFAdd(Loaded, Inc, "new"); + return Builder.CreateFAdd(Loaded, Val, "new"); case AtomicRMWInst::FSub: - return Builder.CreateFSub(Loaded, Inc, "new"); + return Builder.CreateFSub(Loaded, Val, "new"); case AtomicRMWInst::FMax: - return Builder.CreateMaxNum(Loaded, Inc); + return Builder.CreateMaxNum(Loaded, Val); case AtomicRMWInst::FMin: - return Builder.CreateMinNum(Loaded, Inc); + return Builder.CreateMinNum(Loaded, Val); + case AtomicRMWInst::UIncWrap: { + Constant *One = ConstantInt::get(Loaded->getType(), 1); + Value *Inc = Builder.CreateAdd(Loaded, One); + Value *Cmp = Builder.CreateICmpUGE(Loaded, Val); + Constant *Zero = ConstantInt::get(Loaded->getType(), 0); + return Builder.CreateSelect(Cmp, Zero, Inc, "new"); + } + case AtomicRMWInst::UDecWrap: { + Constant *Zero = ConstantInt::get(Loaded->getType(), 0); + Constant *One = ConstantInt::get(Loaded->getType(), 1); + + Value *Dec = Builder.CreateSub(Loaded, One); + Value *CmpEq0 = Builder.CreateICmpEQ(Loaded, Zero); + Value *CmpOldGtVal = Builder.CreateICmpUGT(Loaded, Val); + Value *Or = Builder.CreateOr(CmpEq0, CmpOldGtVal); + return Builder.CreateSelect(Or, Val, Dec, "new"); + } default: llvm_unreachable("Unknown atomic op"); } Index: llvm/test/Assembler/atomic.ll =================================================================== --- llvm/test/Assembler/atomic.ll +++ llvm/test/Assembler/atomic.ll @@ -31,6 +31,17 @@ atomicrmw volatile xchg i32* %x, i32 10 monotonic ; CHECK: atomicrmw volatile xchg i32* %x, i32 10 syncscope("agent") monotonic atomicrmw volatile xchg i32* %x, i32 10 syncscope("agent") monotonic + + ; CHECK: atomicrmw volatile uinc_wrap i32* %x, i32 10 monotonic + atomicrmw volatile uinc_wrap i32* %x, i32 10 monotonic + ; CHECK: atomicrmw volatile uinc_wrap i32* %x, i32 10 syncscope("agent") monotonic + atomicrmw volatile uinc_wrap i32* %x, i32 10 syncscope("agent") monotonic + + ; CHECK: atomicrmw volatile udec_wrap i32* %x, i32 10 monotonic + atomicrmw volatile udec_wrap i32* %x, i32 10 monotonic + ; CHECK: atomicrmw volatile udec_wrap i32* %x, i32 10 syncscope("agent") monotonic + atomicrmw volatile udec_wrap i32* %x, i32 10 syncscope("agent") monotonic + ; CHECK: fence syncscope("singlethread") release fence syncscope("singlethread") release ; CHECK: fence seq_cst Index: llvm/test/Bitcode/compatibility.ll =================================================================== --- llvm/test/Bitcode/compatibility.ll +++ llvm/test/Bitcode/compatibility.ll @@ -874,6 +874,34 @@ ret void } +define void @uinc_udec_wrap_atomics(i32* %word) { +; CHECK: %atomicrmw.inc0 = atomicrmw uinc_wrap i32* %word, i32 64 monotonic + %atomicrmw.inc0 = atomicrmw uinc_wrap i32* %word, i32 64 monotonic + +; CHECK: %atomicrmw.inc1 = atomicrmw uinc_wrap i32* %word, i32 128 seq_cst + %atomicrmw.inc1 = atomicrmw uinc_wrap i32* %word, i32 128 seq_cst + +; CHECK: %atomicrmw.inc2 = atomicrmw volatile uinc_wrap i32* %word, i32 128 seq_cst + %atomicrmw.inc2 = atomicrmw volatile uinc_wrap i32* %word, i32 128 seq_cst + +; CHECK: %atomicrmw.inc0.syncscope = atomicrmw uinc_wrap i32* %word, i32 27 syncscope("agent") monotonic + %atomicrmw.inc0.syncscope = atomicrmw uinc_wrap i32* %word, i32 27 syncscope("agent") monotonic + +; CHECK: %atomicrmw.dec0 = atomicrmw udec_wrap i32* %word, i32 99 monotonic + %atomicrmw.dec0 = atomicrmw udec_wrap i32* %word, i32 99 monotonic + +; CHECK: %atomicrmw.dec1 = atomicrmw udec_wrap i32* %word, i32 12 seq_cst + %atomicrmw.dec1 = atomicrmw udec_wrap i32* %word, i32 12 seq_cst + +; CHECK: %atomicrmw.dec2 = atomicrmw volatile udec_wrap i32* %word, i32 12 seq_cst + %atomicrmw.dec2 = atomicrmw volatile udec_wrap i32* %word, i32 12 seq_cst + +; CHECK: %atomicrmw.dec0.syncscope = atomicrmw udec_wrap i32* %word, i32 5 syncscope("system") monotonic + %atomicrmw.dec0.syncscope = atomicrmw udec_wrap i32* %word, i32 5 syncscope("system") monotonic + + ret void +} + define void @pointer_atomics(i8** %word) { ; CHECK: %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -239,6 +239,12 @@ # DEBUG-NEXT: G_ATOMICRMW_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ATOMICRMW_UINC_WRAP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_ATOMICRMW_UDEC_WRAP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FENCE (opcode {{[0-9]+}}): 0 type indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined Index: llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll +++ llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll @@ -533,3 +533,56 @@ store i32 %val, i32 addrspace(1)* %out ret void } + +define i32 @atomicrmw_inc_private_i32(i32 addrspace(5)* %ptr) { +; IR-LABEL: @atomicrmw_inc_private_i32( +; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 +; IR-NEXT: [[TMP3:%.*]] = icmp uge i32 [[TMP1]], 4 +; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]] +; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: ret i32 [[TMP1]] +; +; GCN-LABEL: atomicrmw_inc_private_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 4, v1 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw inc i32 addrspace(5)* %ptr, i32 4 seq_cst + ret i32 %result +} + +define i32 @atomicrmw_dec_private_i32(i32 addrspace(5)* %ptr) { +; IR-LABEL: @atomicrmw_dec_private_i32( +; IR-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4 +; IR-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1 +; IR-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0 +; IR-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP1]], 4 +; IR-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; IR-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i32 4, i32 [[TMP2]] +; IR-NEXT: store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4 +; IR-NEXT: ret i32 [[TMP1]] +; +; GCN-LABEL: atomicrmw_dec_private_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_i32_e32 v2, vcc, -1, v1 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GCN-NEXT: v_cmp_lt_u32_e64 s[4:5], 4, v1 +; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 4, s[4:5] +; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %result = atomicrmw dec i32 addrspace(5)* %ptr, i32 4 seq_cst + ret i32 %result +} Index: llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll +++ llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll @@ -547,3 +547,385 @@ %res = atomicrmw xor i16 addrspace(3)* %ptr, i16 %value seq_cst, align 4 ret i16 %res } + +define i16 @test_atomicrmw_inc_i16_global(i16 addrspace(1)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_global( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i16 addrspace(1)* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_global_align4(i16 addrspace(1)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_global_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i16 addrspace(1)* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_local(i16 addrspace(3)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_local( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(3)* @llvm.ptrmask.p3i16.i32(i16 addrspace(3)* [[PTR:%.*]], i32 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(3)* [[PTR]] to i32 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(3)* [[ALIGNEDADDR]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i16 addrspace(3)* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_local_align4(i16 addrspace(3)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_local_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16 addrspace(3)* [[PTR:%.*]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i16 addrspace(3)* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_flat(i16* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_flat( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16* @llvm.ptrmask.p0i16.i64(i16* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i16* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_inc_i16_flat_align4(i16* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i16* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_global(i16 addrspace(1)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_global( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i16 addrspace(1)* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_global_align4(i16 addrspace(1)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_global_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i16 addrspace(1)* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_local(i16 addrspace(3)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_local( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(3)* @llvm.ptrmask.p3i16.i32(i16 addrspace(3)* [[PTR:%.*]], i32 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(3)* [[PTR]] to i32 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(3)* [[ALIGNEDADDR]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i16 addrspace(3)* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_local_align4(i16 addrspace(3)* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_local_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16 addrspace(3)* [[PTR:%.*]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i16 addrspace(3)* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_flat(i16* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_flat( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16* @llvm.ptrmask.p0i16.i64(i16* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i16* %ptr, i16 %value seq_cst + ret i16 %res +} + +define i16 @test_atomicrmw_dec_i16_flat_align4(i16* %ptr, i16 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i16* %ptr, i16 %value seq_cst, align 4 + ret i16 %res +} Index: llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll +++ llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll @@ -481,3 +481,609 @@ %extract = extractvalue {i8, i1} %res, 0 ret i8 %extract } + +define i8 @test_atomicrmw_inc_i8_global(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_global( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_global_align2(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_global_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_global_align4(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_global_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i8 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_local(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_local( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(3)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(3)* [[ALIGNEDADDR]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_local_align2(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_local_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(3)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(3)* [[ALIGNEDADDR]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_local_align4(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_local_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i8 addrspace(3)* [[PTR:%.*]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_flat(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_flat( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i8* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_flat_align2(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_flat_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw uinc_wrap i8* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_inc_i8_flat_align4(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_inc_i8_flat_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i8* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw uinc_wrap i8* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_global(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_global( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_global_align2(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_global_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_global_align4(i8 addrspace(1)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_global_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i8 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i8 addrspace(1)* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_local(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_local( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(3)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(3)* [[ALIGNEDADDR]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_local_align2(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_local_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(3)* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(3)* [[ALIGNEDADDR]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_local_align4(i8 addrspace(3)* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_local_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i8 addrspace(3)* [[PTR:%.*]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i8 addrspace(3)* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_flat(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_flat( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i8* %ptr, i8 %value seq_cst + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_flat_align2(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_flat_align2( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8* [[PTR]] to i64 +; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] +; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] +; + %res = atomicrmw udec_wrap i8* %ptr, i8 %value seq_cst, align 2 + ret i8 %res +} + +define i8 @test_atomicrmw_dec_i8_flat_align4(i8* %ptr, i8 %value) { +; CHECK-LABEL: @test_atomicrmw_dec_i8_flat_align4( +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i8* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]] +; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 +; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], -256 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED1]] +; + %res = atomicrmw udec_wrap i8* %ptr, i8 %value seq_cst, align 4 + ret i8 %res +}