Index: llvm/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -694,16 +694,23 @@ assert(ValueSize < MinWordSize); PointerType *PtrTy = cast(Addr->getType()); - Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace()); + IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace()); // TODO: we could skip some of this if AddrAlign >= MinWordSize. - Value *AddrInt = Builder.CreatePtrToInt( - Addr, DL.getIntPtrType(Ctx, PtrTy->getAddressSpace())); - PMV.AlignedAddr = Builder.CreateIntToPtr( - Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, + + PMV.AlignedAddr = Builder.CreateIntrinsic( + Intrinsic::ptrmask, {PtrTy, IntTy}, + {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr, "AlignedAddr"); + + Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace()); + if (!PtrTy->isOpaquePointerTy()) + PMV.AlignedAddr = + Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType, "AlignedAddr"); + PMV.AlignedAddrAlignment = Align(MinWordSize); + Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy); Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { // turn bytes into bits Index: llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll +++ llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll @@ -6,25 +6,25 @@ define i16 @test_atomicrmw_xchg_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_xchg_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -37,25 +37,25 @@ define i16 @test_atomicrmw_xchg_i16_global_align4(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_align4( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -68,27 +68,27 @@ define i16 @test_atomicrmw_add_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_add_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -101,27 +101,27 @@ define i16 @test_atomicrmw_add_i16_global_align4(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_add_i16_global_align4( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -134,27 +134,27 @@ define i16 @test_atomicrmw_sub_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_sub_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -167,19 +167,19 @@ define i16 @test_atomicrmw_and_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_and_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] ; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[INV_MASK]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP5:%.*]] = atomicrmw and i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[ANDOPERAND]] seq_cst, align 4 -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[ANDOPERAND]] seq_cst, align 4 +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; @@ -189,28 +189,28 @@ define i16 @test_atomicrmw_nand_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_nand_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP6]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[NEW]], [[MASK]] -; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP8]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP9]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP10]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP10]], 0 +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP5]], -1 +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -223,18 +223,18 @@ define i16 @test_atomicrmw_or_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_or_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = atomicrmw or i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw or i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; @@ -244,18 +244,18 @@ define i16 @test_atomicrmw_xor_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_xor_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = atomicrmw xor i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xor i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; @@ -265,36 +265,36 @@ define i16 @test_atomicrmw_max_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_max_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VALUE]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[EXTRACTED]], i16 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 -; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] ; %res = atomicrmw max i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -302,36 +302,36 @@ define i16 @test_atomicrmw_min_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_min_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VALUE]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[EXTRACTED]], i16 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 -; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] ; %res = atomicrmw min i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -339,36 +339,36 @@ define i16 @test_atomicrmw_umax_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_umax_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VALUE]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[EXTRACTED]], i16 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 -; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] ; %res = atomicrmw umax i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -376,36 +376,36 @@ define i16 @test_atomicrmw_umin_i16_global(i16 addrspace(1)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_umin_i16_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VALUE]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[EXTRACTED]], i16 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 -; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 +; CHECK-NEXT: ret i16 [[EXTRACTED4]] ; %res = atomicrmw umin i16 addrspace(1)* %ptr, i16 %value seq_cst ret i16 %res @@ -414,39 +414,39 @@ define i16 @test_cmpxchg_i16_global(i16 addrspace(1)* %out, i16 %in, i16 %old) { ; CHECK-LABEL: @test_cmpxchg_i16_global( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, i16 addrspace(1)* [[OUT:%.*]], i64 4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(1)* @llvm.ptrmask.p1i16.i64(i16 addrspace(1)* [[GEP]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(1)* [[GEP]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[IN:%.*]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[OLD:%.*]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], [[INV_MASK]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] ; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] ; CHECK: partword.cmpxchg.loop: -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP9]], [[TMP0:%.*]] ], [ [[TMP16:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] -; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP7]] -; CHECK-NEXT: [[TMP13:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[TMP12]], i32 [[TMP11]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { i32, i1 } [[TMP13]], 1 -; CHECK-NEXT: br i1 [[TMP15]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] ; CHECK: partword.cmpxchg.failure: -; CHECK-NEXT: [[TMP16]] = and i32 [[TMP14]], [[INV_MASK]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] +; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] ; CHECK: partword.cmpxchg.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP14]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } undef, i16 [[EXTRACTED]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { i16, i1 } [[TMP18]], i1 [[TMP15]], 1 -; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP19]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } undef, i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i16 [[EXTRACT]] ; %gep = getelementptr i16, i16 addrspace(1)* %out, i64 4 @@ -457,27 +457,27 @@ define i16 @test_atomicrmw_xchg_i16_local(i16 addrspace(3)* %ptr, i16 %value) { ; CHECK-LABEL: @test_atomicrmw_xchg_i16_local( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(3)* [[PTR:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i32 [[TMP2]] to i32 addrspace(3)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(3)* @llvm.ptrmask.p3i16.i32(i16 addrspace(3)* [[PTR:%.*]], i32 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(3)* [[ALIGNEDADDR]] to i32 addrspace(3)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(3)* [[PTR]] to i32 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[PTRLSB]], 3 -; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP3]] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; @@ -488,38 +488,38 @@ define i16 @test_cmpxchg_i16_local(i16 addrspace(3)* %out, i16 %in, i16 %old) { ; CHECK-LABEL: @test_cmpxchg_i16_local( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, i16 addrspace(3)* [[OUT:%.*]], i64 4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16 addrspace(3)* @llvm.ptrmask.p3i16.i32(i16 addrspace(3)* [[GEP]], i32 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16 addrspace(3)* [[ALIGNEDADDR]] to i32 addrspace(3)* ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i16 addrspace(3)* [[GEP]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i32 [[TMP2]] to i32 addrspace(3)* ; CHECK-NEXT: [[PTRLSB:%.*]] = and i32 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[PTRLSB]], 3 -; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3 +; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[TMP2]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[IN:%.*]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[OLD:%.*]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], [[INV_MASK]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32 addrspace(3)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] ; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] ; CHECK: partword.cmpxchg.loop: -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP9]], [[TMP0:%.*]] ], [ [[TMP16:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] -; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP7]] -; CHECK-NEXT: [[TMP13:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR]], i32 [[TMP12]], i32 [[TMP11]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { i32, i1 } [[TMP13]], 1 -; CHECK-NEXT: br i1 [[TMP15]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg i32 addrspace(3)* [[ALIGNEDADDR1]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] ; CHECK: partword.cmpxchg.failure: -; CHECK-NEXT: [[TMP16]] = and i32 [[TMP14]], [[INV_MASK]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] +; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] ; CHECK: partword.cmpxchg.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[TMP2]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } undef, i16 [[EXTRACTED]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { i16, i1 } [[TMP18]], i1 [[TMP15]], 1 -; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP19]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } undef, i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i16 [[EXTRACT]] ; %gep = getelementptr i16, i16 addrspace(3)* %out, i64 4 Index: llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll +++ llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll @@ -4,25 +4,25 @@ define i8 @test_atomicrmw_xchg_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_xchg_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -35,27 +35,27 @@ define i8 @test_atomicrmw_add_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_add_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -68,27 +68,27 @@ define i8 @test_atomicrmw_sub_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_sub_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -101,19 +101,19 @@ define i8 @test_atomicrmw_and_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_and_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] ; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[INV_MASK]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP5:%.*]] = atomicrmw and i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[ANDOPERAND]] seq_cst, align 4 -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[ANDOPERAND]] seq_cst, align 4 +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 ; CHECK-NEXT: ret i8 [[EXTRACTED]] ; @@ -123,28 +123,28 @@ define i8 @test_atomicrmw_nand_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_nand_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP6]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[NEW]], [[MASK]] -; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP8]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP9]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP10]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP10]], 0 +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]] +; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[TMP5]], -1 +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -157,18 +157,18 @@ define i8 @test_atomicrmw_or_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_or_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = atomicrmw or i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw or i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 ; CHECK-NEXT: ret i8 [[EXTRACTED]] ; @@ -178,18 +178,18 @@ define i8 @test_atomicrmw_xor_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_xor_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = atomicrmw xor i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xor i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4 +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 ; CHECK-NEXT: ret i8 [[EXTRACTED]] ; @@ -199,36 +199,36 @@ define i8 @test_atomicrmw_max_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_max_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i8 [[EXTRACTED]], [[VALUE]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i8 [[EXTRACTED]], i8 [[VALUE]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i8 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[EXTRACTED]], i8 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 -; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] ; %res = atomicrmw max i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -236,36 +236,36 @@ define i8 @test_atomicrmw_min_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_min_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sle i8 [[EXTRACTED]], [[VALUE]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i8 [[EXTRACTED]], i8 [[VALUE]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp sle i8 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[EXTRACTED]], i8 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 -; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] ; %res = atomicrmw min i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -273,36 +273,36 @@ define i8 @test_atomicrmw_umax_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_umax_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i8 [[EXTRACTED]], i8 [[VALUE]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[EXTRACTED]], i8 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 -; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] ; %res = atomicrmw umax i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -310,36 +310,36 @@ define i8 @test_atomicrmw_umin_i8_global(i8 addrspace(1)* %ptr, i8 %value) { ; CHECK-LABEL: @test_atomicrmw_umin_i8_global( -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[PTR:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[VALUE:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i8 [[EXTRACTED]], [[VALUE]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i8 [[EXTRACTED]], i8 [[VALUE]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i8 [[EXTRACTED]], [[VALUE]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i8 [[EXTRACTED]], i8 [[VALUE]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8 -; CHECK-NEXT: ret i8 [[EXTRACTED3]] +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i8 +; CHECK-NEXT: ret i8 [[EXTRACTED4]] ; %res = atomicrmw umin i8 addrspace(1)* %ptr, i8 %value seq_cst ret i8 %res @@ -348,39 +348,39 @@ define i8 @test_cmpxchg_i8_global(i8 addrspace(1)* %out, i8 %in, i8 %old) { ; CHECK-LABEL: @test_cmpxchg_i8_global( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8 addrspace(1)* [[OUT:%.*]], i64 4 +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8 addrspace(1)* @llvm.ptrmask.p1i8.i64(i8 addrspace(1)* [[GEP]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8 addrspace(1)* [[ALIGNEDADDR]] to i32 addrspace(1)* ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[GEP]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP2]] to i32 addrspace(1)* ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[IN:%.*]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[OLD:%.*]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], [[INV_MASK]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[IN:%.*]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[OLD:%.*]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32 addrspace(1)* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] ; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] ; CHECK: partword.cmpxchg.loop: -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP9]], [[TMP0:%.*]] ], [ [[TMP16:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] -; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP7]] -; CHECK-NEXT: [[TMP13:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR]], i32 [[TMP12]], i32 [[TMP11]] seq_cst seq_cst, align 4 -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { i32, i1 } [[TMP13]], 1 -; CHECK-NEXT: br i1 [[TMP15]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg i32 addrspace(1)* [[ALIGNEDADDR1]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] ; CHECK: partword.cmpxchg.failure: -; CHECK-NEXT: [[TMP16]] = and i32 [[TMP14]], [[INV_MASK]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] +; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] ; CHECK: partword.cmpxchg.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP14]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i8, i1 } undef, i8 [[EXTRACTED]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { i8, i1 } [[TMP18]], i1 [[TMP15]], 1 -; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i8, i1 } [[TMP19]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i8, i1 } undef, i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i8 [[EXTRACT]] ; %gep = getelementptr i8, i8 addrspace(1)* %out, i64 4 Index: llvm/test/Transforms/AtomicExpand/SPARC/partword.ll =================================================================== --- llvm/test/Transforms/AtomicExpand/SPARC/partword.ll +++ llvm/test/Transforms/AtomicExpand/SPARC/partword.ll @@ -13,41 +13,41 @@ ; CHECK-LABEL: @test_cmpxchg_i8( ; CHECK-NEXT: entry: ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i8* [[ARG:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP1]] to i32* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i8* @llvm.ptrmask.p0i8.i64(i8* [[ARG:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i8* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i8* [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[PTRLSB]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[PTRLSB]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[NEW:%.*]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[OLD:%.*]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], [[INV_MASK]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[NEW:%.*]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[OLD:%.*]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] ; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] ; CHECK: partword.cmpxchg.loop: -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP16:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] -; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP7]] -; CHECK-NEXT: [[TMP13:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[TMP12]], i32 [[TMP11]] monotonic monotonic, align 4 -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { i32, i1 } [[TMP13]], 1 -; CHECK-NEXT: br i1 [[TMP15]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[TMP11]], i32 [[TMP10]] monotonic monotonic, align 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] ; CHECK: partword.cmpxchg.failure: -; CHECK-NEXT: [[TMP16]] = and i32 [[TMP14]], [[INV_MASK]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] +; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] ; CHECK: partword.cmpxchg.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP14]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i8, i1 } undef, i8 [[EXTRACTED]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { i8, i1 } [[TMP18]], i1 [[TMP15]], 1 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i8, i1 } undef, i8 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1 ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[RET:%.*]] = extractvalue { i8, i1 } [[TMP19]], 0 +; CHECK-NEXT: [[RET:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i8 [[RET]] ; entry: @@ -60,41 +60,41 @@ ; CHECK-LABEL: @test_cmpxchg_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP1]] to i32* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16* @llvm.ptrmask.p0i16.i64(i16* [[ARG:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[PTRLSB]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[PTRLSB]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[NEW:%.*]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP6:%.*]] = zext i16 [[OLD:%.*]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], [[INV_MASK]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[NEW:%.*]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]] ; CHECK-NEXT: br label [[PARTWORD_CMPXCHG_LOOP:%.*]] ; CHECK: partword.cmpxchg.loop: -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP16:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] -; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP7]] -; CHECK-NEXT: [[TMP13:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[TMP12]], i32 [[TMP11]] monotonic monotonic, align 4 -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { i32, i1 } [[TMP13]], 1 -; CHECK-NEXT: br i1 [[TMP15]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ] +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[TMP11]], i32 [[TMP10]] monotonic monotonic, align 4 +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]] ; CHECK: partword.cmpxchg.failure: -; CHECK-NEXT: [[TMP16]] = and i32 [[TMP14]], [[INV_MASK]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] +; CHECK-NEXT: [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]] ; CHECK: partword.cmpxchg.end: -; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP14]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } undef, i16 [[EXTRACTED]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { i16, i1 } [[TMP18]], i1 [[TMP15]], 1 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } undef, i16 [[EXTRACTED]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[RET:%.*]] = extractvalue { i16, i1 } [[TMP19]], 0 +; CHECK-NEXT: [[RET:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i16 [[RET]] ; entry: @@ -107,28 +107,28 @@ ; CHECK-LABEL: @test_add_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP1]] to i32* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16* @llvm.ptrmask.p0i16.i64(i16* [[ARG:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[PTRLSB]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[PTRLSB]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VAL:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VAL:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]] -; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] monotonic monotonic, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]] +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]] +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[TMP7]] monotonic monotonic, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -145,25 +145,25 @@ ; CHECK-LABEL: @test_xor_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP1]] to i32* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16* @llvm.ptrmask.p0i16.i64(i16* [[ARG:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[PTRLSB]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[PTRLSB]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VAL:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VAL:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = xor i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] monotonic monotonic, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[NEW]] monotonic monotonic, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -180,25 +180,25 @@ ; CHECK-LABEL: @test_or_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP1]] to i32* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16* @llvm.ptrmask.p0i16.i64(i16* [[ARG:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[PTRLSB]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[PTRLSB]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VAL:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VAL:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = or i32 [[LOADED]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] monotonic monotonic, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[NEW]] monotonic monotonic, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -215,26 +215,26 @@ ; CHECK-LABEL: @test_and_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP1]] to i32* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16* @llvm.ptrmask.p0i16.i64(i16* [[ARG:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[PTRLSB]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[PTRLSB]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VAL:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VAL:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] ; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[INV_MASK]], [[VALOPERAND_SHIFTED]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[NEW:%.*]] = and i32 [[LOADED]], [[ANDOPERAND]] -; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[NEW]] monotonic monotonic, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[NEW]] monotonic monotonic, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] @@ -251,38 +251,38 @@ ; CHECK-LABEL: @test_min_i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4 -; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = inttoptr i64 [[TMP1]] to i32* +; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call i16* @llvm.ptrmask.p0i16.i64(i16* [[ARG:%.*]], i64 -4) +; CHECK-NEXT: [[ALIGNEDADDR1:%.*]] = bitcast i16* [[ALIGNEDADDR]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i16* [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[PTRLSB]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 3 -; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[PTRLSB]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3 +; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]] ; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[VAL:%.*]] to i32 -; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ALIGNEDADDR]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VAL:%.*]] to i32 +; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ALIGNEDADDR1]], align 4 ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] ; CHECK: atomicrmw.start: -; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[ENTRY:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sle i16 [[EXTRACTED]], [[VAL]] -; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP6]], i16 [[EXTRACTED]], i16 [[VAL]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp sle i16 [[EXTRACTED]], [[VAL]] +; CHECK-NEXT: [[NEW:%.*]] = select i1 [[TMP5]], i16 [[EXTRACTED]], i16 [[VAL]] ; CHECK-NEXT: [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32 -; CHECK-NEXT: [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] +; CHECK-NEXT: [[SHIFTED2:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]] ; CHECK-NEXT: [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]] -; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]] -; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg i32* [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] monotonic monotonic, align 4 -; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1 -; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0 +; CHECK-NEXT: [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED2]] +; CHECK-NEXT: [[TMP6:%.*]] = cmpxchg i32* [[ALIGNEDADDR1]], i32 [[LOADED]], i32 [[INSERTED]] monotonic monotonic, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0 ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] ; CHECK: atomicrmw.end: -; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] -; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 +; CHECK-NEXT: [[SHIFTED3:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] +; CHECK-NEXT: [[EXTRACTED4:%.*]] = trunc i32 [[SHIFTED3]] to i16 ; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: ret i16 [[EXTRACTED3]] +; CHECK-NEXT: ret i16 [[EXTRACTED4]] ; entry: %ret = atomicrmw min i16* %arg, i16 %val seq_cst