Index: llvm/include/llvm/CodeGen/AtomicExpandUtils.h =================================================================== --- llvm/include/llvm/CodeGen/AtomicExpandUtils.h +++ llvm/include/llvm/CodeGen/AtomicExpandUtils.h @@ -22,7 +22,7 @@ /// (the builder, %addr, %loaded, %new_val, ordering, /// /* OUT */ %success, /* OUT */ %new_loaded) using CreateCmpXchgInstFun = - function_ref &, Value *, Value *, Value *, Align, + function_ref; /// Expand an atomic RMW instruction into a loop utilizing Index: llvm/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -60,6 +61,7 @@ class AtomicExpand : public FunctionPass { const TargetLowering *TLI = nullptr; + const DataLayout *DL = nullptr; public: static char ID; // Pass identification, replacement for typeid @@ -83,13 +85,13 @@ bool tryExpandAtomicRMW(AtomicRMWInst *AI); AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); Value * - insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref &, Value *)> PerformOp); - void - expandAtomicOpToLLSC(Instruction *I, Type *ResultTy, Value *Addr, - Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref &, Value *)> PerformOp); + function_ref PerformOp); + void expandAtomicOpToLLSC( + Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, + function_ref PerformOp); void expandPartwordAtomicRMW( AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); @@ -98,12 +100,11 @@ void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); - static Value * - insertRMWCmpXchgLoop(IRBuilder<> &Builder, Type *ResultType, Value *Addr, - Align AddrAlign, AtomicOrdering MemOpOrder, - SyncScope::ID SSID, - function_ref &, Value *)> PerformOp, - CreateCmpXchgInstFun CreateCmpXchg); + static Value *insertRMWCmpXchgLoop( + IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, + AtomicOrdering MemOpOrder, SyncScope::ID SSID, + function_ref PerformOp, + CreateCmpXchgInstFun CreateCmpXchg); bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -126,9 +127,11 @@ }; // IRBuilder to be used for replacement atomic instructions. -struct ReplacementIRBuilder : IRBuilder<> { +struct ReplacementIRBuilder : IRBuilder { // Preserves the DebugLoc from I, and preserves still valid metadata. - explicit ReplacementIRBuilder(Instruction *I) : IRBuilder<>(I) { + explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL) + : IRBuilder(I->getContext(), DL) { + SetInsertPoint(I); this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections}); } }; @@ -182,9 +185,11 @@ return false; auto &TM = TPC->getTM(); - if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) + const auto *Subtarget = TM.getSubtargetImpl(F); + if (!Subtarget->enableAtomicExpand()) return false; - TLI = TM.getSubtargetImpl(F)->getTargetLowering(); + TLI = Subtarget->getTargetLowering(); + DL = &F.getParent()->getDataLayout(); SmallVector AtomicInsts; @@ -318,7 +323,7 @@ } bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { - ReplacementIRBuilder Builder(I); + ReplacementIRBuilder Builder(I, *DL); auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); @@ -347,7 +352,7 @@ auto *M = LI->getModule(); Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); - ReplacementIRBuilder Builder(LI); + ReplacementIRBuilder Builder(LI, *DL); Value *Addr = LI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); @@ -371,7 +376,7 @@ Type *NewTy = getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); - ReplacementIRBuilder Builder(RMWI); + ReplacementIRBuilder Builder(RMWI, *DL); Value *Addr = RMWI->getPointerOperand(); Value *Val = RMWI->getValOperand(); @@ -403,7 +408,7 @@ expandAtomicOpToLLSC( LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), LI->getOrdering(), - [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); + [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; }); return true; case TargetLoweringBase::AtomicExpansionKind::LLOnly: return expandAtomicLoadToLL(LI); @@ -433,7 +438,7 @@ } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { - ReplacementIRBuilder Builder(LI); + ReplacementIRBuilder Builder(LI, *DL); // On some architectures, load-linked instructions are atomic for larger // sizes than normal loads. For example, the only 64-bit load guaranteed @@ -449,7 +454,7 @@ } bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { - ReplacementIRBuilder Builder(LI); + ReplacementIRBuilder Builder(LI, *DL); AtomicOrdering Order = LI->getOrdering(); if (Order == AtomicOrdering::Unordered) Order = AtomicOrdering::Monotonic; @@ -478,7 +483,7 @@ /// mechanism, we convert back to the old format which the backends understand. /// Each backend will need individual work to recognize the new format. StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { - ReplacementIRBuilder Builder(SI); + ReplacementIRBuilder Builder(SI, *DL); auto *M = SI->getModule(); Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), M->getDataLayout()); @@ -504,7 +509,7 @@ // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. - ReplacementIRBuilder Builder(SI); + ReplacementIRBuilder Builder(SI, *DL); AtomicOrdering Ordering = SI->getOrdering(); assert(Ordering != AtomicOrdering::NotAtomic); AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered @@ -519,7 +524,7 @@ tryExpandAtomicRMW(AI); } -static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, +static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { @@ -559,7 +564,7 @@ expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::LLSC); } else { - auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { + auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) { return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }; @@ -669,9 +674,9 @@ /// include only the part that would've been loaded from Addr. /// /// Inv_Mask: The inverse of Mask. -static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, - Type *ValueType, Value *Addr, - Align AddrAlign, +static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, + Instruction *I, Type *ValueType, + Value *Addr, Align AddrAlign, unsigned MinWordSize) { PartwordMaskValues PMV; @@ -736,7 +741,7 @@ return PMV; } -static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, +static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV) { assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); if (PMV.WordType == PMV.ValueType) @@ -747,7 +752,7 @@ return Trunc; } -static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, +static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV) { assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); @@ -766,7 +771,7 @@ /// operation. (That is, only the bits under the Mask should be /// affected by the operation) static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, - IRBuilder<> &Builder, Value *Loaded, + IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV) { // TODO: update to use @@ -821,7 +826,7 @@ AtomicOrdering MemOpOrder = AI->getOrdering(); SyncScope::ID SSID = AI->getSyncScopeID(); - ReplacementIRBuilder Builder(AI); + ReplacementIRBuilder Builder(AI, *DL); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), @@ -831,7 +836,7 @@ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), PMV.ShiftAmt, "ValOperand_Shifted"); - auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) { + auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) { return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, ValOperand_Shifted, AI->getValOperand(), PMV); }; @@ -855,7 +860,7 @@ // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { - ReplacementIRBuilder Builder(AI); + ReplacementIRBuilder Builder(AI, *DL); AtomicRMWInst::BinOp Op = AI->getOperation(); assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || @@ -930,7 +935,7 @@ BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); - ReplacementIRBuilder Builder(CI); + ReplacementIRBuilder Builder(CI, *DL); LLVMContext &Ctx = Builder.getContext(); BasicBlock *EndBB = @@ -1016,8 +1021,8 @@ void AtomicExpand::expandAtomicOpToLLSC( Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref &, Value *)> PerformOp) { - ReplacementIRBuilder Builder(I); + function_ref PerformOp) { + ReplacementIRBuilder Builder(I, *DL); Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, MemOpOrder, PerformOp); @@ -1026,7 +1031,7 @@ } void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { - ReplacementIRBuilder Builder(AI); + ReplacementIRBuilder Builder(AI, *DL); PartwordMaskValues PMV = createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), @@ -1052,7 +1057,7 @@ } void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { - ReplacementIRBuilder Builder(CI); + ReplacementIRBuilder Builder(CI, *DL); PartwordMaskValues PMV = createMaskInstrs( Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), @@ -1079,9 +1084,9 @@ } Value *AtomicExpand::insertRMWLLSCLoop( - IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, + IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref &, Value *)> PerformOp) { + function_ref PerformOp) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); @@ -1139,7 +1144,7 @@ Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), M->getDataLayout()); - ReplacementIRBuilder Builder(CI); + ReplacementIRBuilder Builder(CI, *DL); Value *Addr = CI->getPointerOperand(); Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); @@ -1263,7 +1268,7 @@ BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); - ReplacementIRBuilder Builder(CI); + ReplacementIRBuilder Builder(CI, *DL); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove @@ -1443,9 +1448,9 @@ } Value *AtomicExpand::insertRMWCmpXchgLoop( - IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, + IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, - function_ref &, Value *)> PerformOp, + function_ref PerformOp, CreateCmpXchgInstFun CreateCmpXchg) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); @@ -1528,11 +1533,11 @@ // Note: This function is exposed externally by AtomicExpandUtils.h bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { - ReplacementIRBuilder Builder(AI); + ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout()); Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), AI->getOrdering(), AI->getSyncScopeID(), - [&](IRBuilder<> &Builder, Value *Loaded) { + [&](IRBuilderBase &Builder, Value *Loaded) { return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }, @@ -1682,7 +1687,7 @@ // CAS libcall, via a CAS loop, instead. if (!Success) { expandAtomicRMWToCmpXchg( - I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded, + I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { // Create the CAS instruction normally... Index: llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll +++ llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll @@ -39,20 +39,18 @@ ; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_align4( ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: ; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[LOADED]], -65536 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP4]] seq_cst seq_cst, align 4 ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], 0 -; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; %res = atomicrmw xchg i16 addrspace(1)* %ptr, i16 %value seq_cst, align 4 @@ -96,12 +94,11 @@ ; CHECK-LABEL: @test_atomicrmw_add_i16_global_align4( ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: ; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[NEW]], 65535 ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -65536 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]] @@ -110,8 +107,7 @@ ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], 0 -; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; %res = atomicrmw add i16 addrspace(1)* %ptr, i16 %value seq_cst, align 4 @@ -446,30 +442,27 @@ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, i16 addrspace(1)* [[OUT:%.*]], i64 4 ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16 addrspace(1)* [[GEP]] to i32 addrspace(1)* ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[IN:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[OLD:%.*]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], -65536 +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[OLD:%.*]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], -65536 ; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] ; CHECK: partword.cmpxchg.loop: -; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[TMP0:%.*]] ], [ [[TMP13:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP7]], [[TMP4]] -; CHECK-NEXT: [[TMP10:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[TMP9]], i32 [[TMP8]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { i32, i1 } [[TMP10]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { i32, i1 } [[TMP10]], 1 -; CHECK-NEXT: br i1 [[TMP12]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[TMP11:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[TMP7]], i32 [[TMP6]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] ; CHECK: partword.cmpxchg.failure: -; CHECK-NEXT: [[TMP13]] = and i32 [[TMP11]], -65536 -; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP7]], [[TMP13]] -; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] +; CHECK-NEXT: [[TMP11]] = and i32 [[TMP9]], -65536 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP5]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] ; CHECK: partword.cmpxchg.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP11]], 0 -; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { i16, i1 } undef, i16 [[EXTRACTED]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i16, i1 } [[TMP15]], i1 [[TMP12]], 1 -; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP16]], 0 +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP9]] to i16 +; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { i16, i1 } undef, i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { i16, i1 } [[TMP13]], i1 [[TMP10]], 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP14]], 0 ; CHECK-NEXT: ret i16 [[EXTRACT]] ; %gep = getelementptr i16, i16 addrspace(1)* %out, i64 4 @@ -555,10 +548,8 @@ ; CHECK-LABEL: @test_atomicrmw_xor_i16_local_align4( ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i16 addrspace(3)* [[PTR:%.*]] to i32 addrspace(3)* ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xor i32 addrspace(3)* [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP2]], 0 -; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xor i32 addrspace(3)* [[ALIGNEDADDR]], i32 [[TMP1]] seq_cst, align 4 +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; %res = atomicrmw xor i16 addrspace(3)* %ptr, i16 %value seq_cst, align 4 Index: llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll +++ llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll @@ -103,12 +103,11 @@ ; CHECK-LABEL: @test_atomicrmw_add_i8_global_align4( ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = bitcast i8 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: ; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[NEW]], 255 ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[LOADED]], -256 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]] @@ -117,8 +116,7 @@ ; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], 0 -; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 +; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i8 ; CHECK-NEXT: ret i8 [[EXTRACTED]] ; %res = atomicrmw add i8 addrspace(1)* %ptr, i8 %value seq_cst, align 4