diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1894,7 +1894,7 @@ auto FI = cast(SAddr.getOperand(0)); SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); - SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, SDLoc(SAddr), + SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr), MVT::i32, TFI, SAddr.getOperand(1)), 0); } @@ -1936,8 +1936,9 @@ SAddr.getOpcode() == ISD::TargetFrameIndex ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL) : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32); - SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32, - SAddr, AddOffset), 0); + SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32, + SAddr, AddOffset), + 0); } Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3694,9 +3694,9 @@ const DebugLoc &DL = I.getDebugLoc(); SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), SAddr) - .addFrameIndex(FI) - .addReg(RHSDef->Reg); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr) + .addFrameIndex(FI) + .addReg(RHSDef->Reg); } } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -307,9 +307,9 @@ // Add wave offset in bytes to private base offset. // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) - .addReg(FlatScrInitLo) - .addReg(ScratchWaveOffsetReg); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo) + .addReg(FlatScrInitLo) + .addReg(ScratchWaveOffsetReg); // Convert offset to 256-byte units. BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) @@ -909,9 +909,9 @@ LiveRegs.addLiveIns(MBB); } - // s_add_u32 s33, s32, NumBytes + // s_add_i32 s33, s32, NumBytes // s_and_b32 s33, s33, 0b111...0000 - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), FramePtrReg) + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg) .addReg(StackPtrReg) .addImm((Alignment - 1) * getScratchScaleFactor(ST)) .setMIFlag(MachineInstr::FrameSetup); @@ -937,7 +937,7 @@ } if (HasFP && RoundedSize != 0) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) .addReg(StackPtrReg) .addImm(RoundedSize * getScratchScaleFactor(ST)) .setMIFlag(MachineInstr::FrameSetup); @@ -988,10 +988,10 @@ Optional BPSaveIndex = FuncInfo->BasePointerSaveIndex; if (RoundedSize != 0 && hasFP(MF)) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) - .addReg(StackPtrReg) - .addImm(RoundedSize * getScratchScaleFactor(ST)) - .setMIFlag(MachineInstr::FrameDestroy); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) + .addReg(StackPtrReg) + .addImm(-static_cast(RoundedSize * getScratchScaleFactor(ST))) + .setMIFlag(MachineInstr::FrameDestroy); } if (FuncInfo->SGPRForFPSaveRestoreCopy) { @@ -1294,10 +1294,12 @@ const SIMachineFunctionInfo *MFI = MF.getInfo(); Register SPReg = MFI->getStackPtrOffsetReg(); - unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; - BuildMI(MBB, I, DL, TII->get(Op), SPReg) - .addReg(SPReg) - .addImm(Amount * getScratchScaleFactor(ST)); + Amount *= getScratchScaleFactor(ST); + if (IsDestroy) + Amount = -Amount; + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg) + .addReg(SPReg) + .addImm(Amount); } else if (CalleePopAmount != 0) { llvm_unreachable("is this used?"); } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -703,7 +703,7 @@ .addFrameIndex(FrameIdx); if (ST.enableFlatScratch() ) { - BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_U32), BaseReg) + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg) .addReg(OffsetReg, RegState::Kill) .addReg(FIReg); return BaseReg; @@ -1113,7 +1113,7 @@ if (ScratchOffsetReg == AMDGPU::NoRegister) { BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset); } else { - BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) .addReg(ScratchOffsetReg) .addImm(Offset); } @@ -1262,9 +1262,9 @@ if (ScratchOffsetRegDelta != 0) { // Subtract the offset we added to the ScratchOffset register. - BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset) + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) .addReg(SOffset) - .addImm(ScratchOffsetRegDelta); + .addImm(-ScratchOffsetRegDelta); } } @@ -1707,9 +1707,9 @@ FIOp.setIsKill(false); } - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), TmpSReg) - .addReg(FrameReg) - .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg) + .addReg(FrameReg) + .addImm(Offset); if (!UseSGPR) BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) @@ -1717,10 +1717,10 @@ if (TmpSReg == FrameReg) { // Undo frame register modification. - BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_SUB_U32), + BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32), FrameReg) - .addReg(FrameReg) - .addImm(Offset); + .addReg(FrameReg) + .addImm(-Offset); } return; @@ -1794,17 +1794,17 @@ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) .addReg(FrameReg) .addImm(ST.getWavefrontSizeLog2()); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg) - .addReg(ScaledReg, RegState::Kill) - .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) + .addReg(ScaledReg, RegState::Kill) + .addImm(Offset); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) .addReg(ScaledReg, RegState::Kill); // If there were truly no free SGPRs, we need to undo everything. if (!TmpScaledReg.isValid()) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg) - .addReg(ScaledReg, RegState::Kill) - .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) + .addReg(ScaledReg, RegState::Kill) + .addImm(-Offset); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) .addReg(FrameReg) .addImm(ST.getWavefrontSizeLog2()); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll @@ -55,7 +55,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s6, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 @@ -71,7 +71,7 @@ ; GFX9-NEXT: s_add_u32 s4, s32, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -81,7 +81,7 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 @@ -95,7 +95,7 @@ ; GFX10-NEXT: s_and_b32 s4, s4, -16 ; GFX10-NEXT: s_lshl_b32 s4, s4, 5 ; GFX10-NEXT: s_add_u32 s4, s32, s4 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -157,7 +157,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s6, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 @@ -173,7 +173,7 @@ ; GFX9-NEXT: s_add_u32 s4, s32, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -183,7 +183,7 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 @@ -197,7 +197,7 @@ ; GFX10-NEXT: s_and_b32 s4, s4, -16 ; GFX10-NEXT: s_lshl_b32 s4, s4, 5 ; GFX10-NEXT: s_add_u32 s4, s32, s4 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -260,9 +260,9 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s6, s33 -; GFX9-NEXT: s_add_u32 s33, s32, 0x7c0 +; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800 -; GFX9-NEXT: s_add_u32 s32, s32, 0x1000 +; GFX9-NEXT: s_addk_i32 s32, 0x1000 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 @@ -279,7 +279,7 @@ ; GFX9-NEXT: s_and_b32 s4, s4, 0xfffff800 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen -; GFX9-NEXT: s_sub_u32 s32, s32, 0x1000 +; GFX9-NEXT: s_addk_i32 s32, 0xf000 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -288,10 +288,10 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s6, s33 -; GFX10-NEXT: s_add_u32 s33, s32, 0x3e0 +; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00 -; GFX10-NEXT: s_add_u32 s32, s32, 0x800 +; GFX10-NEXT: s_addk_i32 s32, 0x800 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 @@ -305,7 +305,7 @@ ; GFX10-NEXT: s_lshl_b32 s4, s4, 5 ; GFX10-NEXT: s_add_u32 s4, s32, s4 ; GFX10-NEXT: s_and_b32 s4, s4, 0xfffffc00 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x800 +; GFX10-NEXT: s_addk_i32 s32, 0xf800 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -9,7 +9,7 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s6, s33 -; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0 +; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000 ; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill @@ -56,8 +56,8 @@ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_add_u32_e32 v1, 0x100, v1 ; GCN-NEXT: v_add_u32_e32 v0, v1, v0 -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_add_i32 s32, s32, 0x10000 +; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -270,7 +270,7 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s6, s33 -; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0 +; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000 ; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill @@ -317,8 +317,8 @@ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_and_b32_e32 v1, 1, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_add_i32 s32, s32, 0x10000 +; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -536,7 +536,7 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s6, s33 -; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0 +; GCN-NEXT: s_add_i32 s33, s32, 0x3fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000 ; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill @@ -583,8 +583,8 @@ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GCN-NEXT: v_add_u32_e32 v2, 0x100, v2 ; GCN-NEXT: v_add_u32_e32 v1, v2, v0 -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_add_i32 s32, s32, 0x10000 +; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -13,10 +13,10 @@ ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-NEXT: s_add_u32 s1, 4, s1 +; GFX9-NEXT: s_add_i32 s1, s1, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 4, s0 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -33,8 +33,8 @@ ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-NEXT: s_add_u32 s0, 4, s0 -; GFX10-NEXT: s_add_u32 s1, 4, s1 +; GFX10-NEXT: s_add_i32 s0, s0, 4 +; GFX10-NEXT: s_add_i32 s1, s1, 4 ; GFX10-NEXT: scratch_store_dword off, v0, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -185,10 +185,10 @@ ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_u32 s1, 0x104, s1 +; GFX9-NEXT: s_addk_i32 s1, 0x104 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 0x104, s0 +; GFX9-NEXT: s_addk_i32 s0, 0x104 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -207,8 +207,8 @@ ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-NEXT: s_add_u32 s0, 0x104, s0 -; GFX10-NEXT: s_add_u32 s1, 0x104, s1 +; GFX10-NEXT: s_addk_i32 s0, 0x104 +; GFX10-NEXT: s_addk_i32 s1, 0x104 ; GFX10-NEXT: scratch_store_dword off, v0, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -295,7 +295,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi @@ -314,7 +314,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x100 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 15 @@ -357,10 +357,10 @@ ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX9-NEXT: s_addk_i32 s1, 0x4004 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX9-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -379,8 +379,8 @@ ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-NEXT: s_add_u32 s0, 0x4004, s0 -; GFX10-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX10-NEXT: s_addk_i32 s0, 0x4004 +; GFX10-NEXT: s_addk_i32 s1, 0x4004 ; GFX10-NEXT: scratch_store_dword off, v0, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -467,7 +467,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi @@ -486,7 +486,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 15 @@ -527,7 +527,7 @@ ; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_u32 s0, 4, s0 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc @@ -543,7 +543,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3e80 -; GFX10-NEXT: s_add_u32 s0, 4, s0 +; GFX10-NEXT: s_add_i32 s0, s0, 4 ; GFX10-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 @@ -571,7 +571,7 @@ ; GFX9-NEXT: scratch_store_dword off, v0, s32 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_u32 s0, s32, s0 +; GFX9-NEXT: s_add_i32 s0, s0, s32 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc @@ -585,7 +585,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3e80 -; GFX10-NEXT: s_add_u32 s0, s32, s0 +; GFX10-NEXT: s_add_i32 s0, s0, s32 ; GFX10-NEXT: scratch_store_dword off, v0, s32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -155,7 +155,7 @@ ; GCN-NEXT: s_mov_b32 s7, s33 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz BB2_3 ; GCN-NEXT: ; %bb.1: ; %bb.0 @@ -181,7 +181,7 @@ ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: s_mov_b32 s33, s7 ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -217,10 +217,10 @@ ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s7, s33 -; GCN-NEXT: s_add_u32 s33, s32, 0xfc0 +; GCN-NEXT: s_add_i32 s33, s32, 0xfc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GCN-NEXT: s_add_u32 s32, s32, 0x2000 +; GCN-NEXT: s_addk_i32 s32, 0x2000 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz BB3_2 ; GCN-NEXT: ; %bb.1: ; %bb.0 @@ -243,7 +243,7 @@ ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_sub_u32 s32, s32, 0x2000 +; GCN-NEXT: s_addk_i32 s32, 0xe000 ; GCN-NEXT: s_mov_b32 s33, s7 ; GCN-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -347,7 +347,7 @@ ; Check for prologue initializing special SGPRs pointing to scratch. ; HSA-LABEL: {{^}}store_flat_scratch: ; CI-DAG: s_mov_b32 flat_scratch_lo, s9 -; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11 +; CI-DAG: s_add_i32 [[ADD:s[0-9]+]], s8, s11 ; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8 ; GFX9: s_add_u32 flat_scratch_lo, s6, s9 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -42,7 +42,7 @@ ; HSA-ALLOCA: .end_amd_kernel_code_t ; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7 -; HSA-ALLOCA: s_add_u32 s6, s6, s9 +; HSA-ALLOCA: s_add_i32 s6, s6, s9 ; HSA-ALLOCA: s_lshr_b32 flat_scratch_hi, s6, 8 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen ; encoding: [0x00,0x10,0x70,0xe0 diff --git a/llvm/test/CodeGen/AMDGPU/call-constant.ll b/llvm/test/CodeGen/AMDGPU/call-constant.ll --- a/llvm/test/CodeGen/AMDGPU/call-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/call-constant.ll @@ -5,7 +5,7 @@ ; GCN-LABEL: {{^}}test_call_undef: ; SDAG: s_mov_b32 flat_scratch_lo, s13 -; SDAG: s_add_u32 s12, s12, s17 +; SDAG: s_add_i32 s12, s12, s17 ; SDAG: s_lshr_b32 ; GCN: s_endpgm define amdgpu_kernel void @test_call_undef() #0 { @@ -27,7 +27,7 @@ ; GCN-LABEL: {{^}}test_call_null: ; SDAG: s_mov_b32 flat_scratch_lo, s13 -; SDAG: s_add_u32 s12, s12, s17 +; SDAG: s_add_i32 s12, s12, s17 ; SDAG: s_lshr_b32 ; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -59,8 +59,8 @@ ; GCN: v_writelane_b32 v40, s33, 4 ; GCN: s_mov_b32 s33, s32 -; MUBUF: s_add_u32 s32, s32, 0x400 -; FLATSCR: s_add_u32 s32, s32, 16 +; MUBUF: s_addk_i32 s32, 0x400 +; FLATSCR: s_add_i32 s32, s32, 16 ; GCN: s_swappc_b64 ; GCN-NEXT: s_swappc_b64 diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -52,14 +52,14 @@ ; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33 ; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33 ; GCN-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: s_add_u32 s32, s32, 0x200 -; FLATSCR-NEXT: s_add_u32 s32, s32, 8 +; MUBUF-NEXT: s_addk_i32 s32, 0x200 +; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}} ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4{{$}} ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4{{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x200 -; FLATSCR-NEXT: s_sub_u32 s32, s32, 8 +; MUBUF-NEXT: s_addk_i32 s32, 0xfe00 +; FLATSCR-NEXT: s_add_i32 s32, s32, -8 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] ; GCN-NEXT: s_setpc_b64 define void @callee_with_stack_no_fp_elim_all() #1 { @@ -91,8 +91,8 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-DAG: s_mov_b32 s33, s32 -; MUBUF-DAG: s_add_u32 s32, s32, 0x400{{$}} -; FLATSCR-DAG: s_add_u32 s32, s32, 16{{$}} +; MUBUF-DAG: s_addk_i32 s32, 0x400{{$}} +; FLATSCR-DAG: s_add_i32 s32, s32, 16{{$}} ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, @@ -107,8 +107,8 @@ ; FLATSCR-DAG: v_readlane_b32 s0, [[CSR_VGPR]] ; FLATSCR-DAG: v_readlane_b32 s1, [[CSR_VGPR]] -; MUBUF: s_sub_u32 s32, s32, 0x400{{$}} -; FLATSCR: s_sub_u32 s32, s32, 16{{$}} +; MUBUF: s_addk_i32 s32, 0xfc00{{$}} +; FLATSCR: s_add_i32 s32, s32, -16{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -136,8 +136,8 @@ ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; MUBUF-DAG: s_add_u32 s32, s32, 0x400 -; FLATSCR-DAG: s_add_u32 s32, s32, 16 +; MUBUF-DAG: s_addk_i32 s32, 0x400 +; FLATSCR-DAG: s_add_i32 s32, s32, 16 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s33, [[FP_SPILL_LANE:[0-9]+]] ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 @@ -149,8 +149,8 @@ ; FLATSCR-DAG: v_readlane_b32 s0, v40, 0 ; FLATSCR-DAG: v_readlane_b32 s1, v40, 1 -; MUBUF: s_sub_u32 s32, s32, 0x400 -; FLATSCR: s_sub_u32 s32, s32, 16 +; MUBUF: s_addk_i32 s32, 0xfc00 +; FLATSCR: s_add_i32 s32, s32, -16 ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]] ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload @@ -251,11 +251,11 @@ ; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload -; MUBUF: s_add_u32 s32, s32, 0x300 -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300 +; MUBUF: s_addk_i32 s32, 0x300 +; MUBUF-NEXT: s_addk_i32 s32, 0xfd00 ; MUBUF-NEXT: s_mov_b32 s33, s4 -; FLATSCR: s_add_u32 s32, s32, 12 -; FLATSCR-NEXT: s_sub_u32 s32, s32, 12 +; FLATSCR: s_add_i32 s32, s32, 12 +; FLATSCR-NEXT: s_add_i32 s32, s32, -12 ; FLATSCR-NEXT: s_mov_b32 s33, s0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 @@ -284,10 +284,10 @@ ; GCN: ;;#ASMSTART ; GCN: v_writelane_b32 v1 -; MUBUF: s_add_u32 s32, s32, 0x400 -; MUBUF: s_sub_u32 s32, s32, 0x400 -; FLATSCR: s_add_u32 s32, s32, 16 -; FLATSCR: s_sub_u32 s32, s32, 16 +; MUBUF: s_addk_i32 s32, 0x400 +; MUBUF: s_addk_i32 s32, 0xfc00 +; FLATSCR: s_add_i32 s32, s32, 16 +; FLATSCR: s_add_i32 s32, s32, -16 ; GCN-NEXT: v_readlane_b32 s33, v1, 63 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -330,11 +330,11 @@ ; GCN: v_writelane_b32 v1, ; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload -; MUBUF: s_add_u32 s32, s32, 0x400 -; FLATSCR: s_add_u32 s32, s32, 16 +; MUBUF: s_addk_i32 s32, 0x400 +; FLATSCR: s_add_i32 s32, s32, 16 ; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1 -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x400 -; FLATSCR-NEXT: s_sub_u32 s32, s32, 16 +; MUBUF-NEXT: s_addk_i32 s32, 0xfc00 +; FLATSCR-NEXT: s_add_i32 s32, s32, -16 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -362,18 +362,18 @@ ; GCN: s_waitcnt ; MUBUF-NEXT: s_mov_b32 [[FP_COPY:s4]], s33 ; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33 -; MUBUF-NEXT: s_add_u32 s33, s32, 0x7ffc0 -; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff +; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0 +; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000 ; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000 -; MUBUF-NEXT: s_add_u32 s32, s32, 0x100000 -; FLATSCR-NEXT: s_add_u32 s32, s32, 0x4000 +; MUBUF-NEXT: s_add_i32 s32, s32, 0x100000 +; FLATSCR-NEXT: s_addk_i32 s32, 0x4000 ; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; MUBUF-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s33 ; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], s33 ; GCN-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x100000 -; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x4000 +; MUBUF-NEXT: s_add_i32 s32, s32, 0xfff00000 +; FLATSCR-NEXT: s_addk_i32 s32, 0xc000 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] ; GCN-NEXT: s_setpc_b64 define void @realign_stack_no_fp_elim() #1 { @@ -397,14 +397,14 @@ ; FLATSCR: scratch_store_dword off, [[ZERO]], s33 offset:4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN: ;;#ASMSTART -; MUBUF: s_add_u32 s32, s32, 0x300 +; MUBUF: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: v_readlane_b32 s4, v1, 0 ; MUBUF-NEXT: v_readlane_b32 s5, v1, 1 -; FLATSCR: s_add_u32 s32, s32, 12 +; FLATSCR: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: v_readlane_b32 s0, v1, 0 ; FLATSCR-NEXT: v_readlane_b32 s1, v1, 1 -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300 -; FLATSCR-NEXT: s_sub_u32 s32, s32, 12 +; MUBUF-NEXT: s_addk_i32 s32, 0xfd00 +; FLATSCR-NEXT: s_add_i32 s32, s32, -12 ; GCN-NEXT: v_readlane_b32 s33, v1, 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -441,16 +441,16 @@ ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword -; MUBUF: s_add_u32 s32, s32, 0x300{{$}} -; FLATSCR: s_add_u32 s32, s32, 12{{$}} +; MUBUF: s_addk_i32 s32, 0x300{{$}} +; FLATSCR: s_add_i32 s32, s32, 12{{$}} ; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 ; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART ; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 ; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x300{{$}} -; FLATSCR-NEXT: s_sub_u32 s32, s32, 12{{$}} +; MUBUF-NEXT: s_addk_i32 s32, 0xfd00{{$}} +; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -483,17 +483,17 @@ ; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset: ; GCN: s_waitcnt ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200 +; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200 ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill -; FLATSCR-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008 +; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008 ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: s_mov_b32 s33, s32 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 -; MUBUF-DAG: s_add_u32 s32, s32, 0x40300{{$}} -; FLATSCR-DAG: s_add_u32 s32, s32, 0x100c{{$}} +; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}} +; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}} ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword @@ -502,13 +502,13 @@ ; GCN: ;;#ASMSTART ; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 ; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x40300{{$}} -; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x100c{{$}} +; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfd00{{$}} +; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} -; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200 +; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200 ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Reload -; FLATSCR-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008 +; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008 ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -546,13 +546,13 @@ ; GCN-LABEL: {{^}}ipra_call_with_stack: ; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN: s_mov_b32 s33, s32 -; MUBUF: s_add_u32 s32, s32, 0x400 -; FLATSCR: s_add_u32 s32, s32, 16 +; MUBUF: s_addk_i32 s32, 0x400 +; FLATSCR: s_add_i32 s32, s32, 16 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}} ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33{{$}} ; GCN: s_swappc_b64 -; MUBUF: s_sub_u32 s32, s32, 0x400 -; FLATSCR: s_sub_u32 s32, s32, 16 +; MUBUF: s_addk_i32 s32, 0xfc00 +; FLATSCR: s_add_i32 s32, s32, -16 ; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]] define void @ipra_call_with_stack() #0 { %alloca = alloca i32, addrspace(5) @@ -666,13 +666,13 @@ ; scratch VGPR to hold the offset. ; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset ; MUBUF: s_or_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200 +; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200 ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill ; MUBUF: v_mov_b32_e32 v0, s33 ; GCN-NOT: v_mov_b32_e32 v0, 0x100c -; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40300 +; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40300 ; MUBUF: buffer_store_dword v0, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill -; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s33, 0x1004 +; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1004 ; FLATSCR: v_mov_b32_e32 v0, 0 ; FLATSCR: scratch_store_dword off, v0, [[SOFF]] define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #3 { diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -522,7 +522,7 @@ ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill: ; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: s_add_u32 s32, s32, 0x400 +; GCN-DAG: s_addk_i32 s32, 0x400 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5] ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll @@ -403,7 +403,7 @@ ; Requires loading and storing to stack slot. ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: -; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} +; GCN-DAG: s_addk_i32 s32, 0x400{{$}} ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}} @@ -411,7 +411,7 @@ ; GCN: s_swappc_b64 -; GCN: s_sub_u32 s32, s32, 0x400{{$}} +; GCN: s_addk_i32 s32, 0xfc00{{$}} ; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN: s_setpc_b64 define void @too_many_args_call_too_many_args_use_workitem_id_x( diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -509,7 +509,7 @@ ; Requires loading and storing to stack slot. ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: -; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} +; GCN-DAG: s_addk_i32 s32, 0x400{{$}} ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}} @@ -517,7 +517,7 @@ ; GCN: s_swappc_b64 -; GCN: s_sub_u32 s32, s32, 0x400{{$}} +; GCN: s_addk_i32 s32, 0xfc00{{$}} ; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN: s_setpc_b64 define void @too_many_args_call_too_many_args_use_workitem_id_x( diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @test_kern_stack() local_unnamed_addr #0 { ; GFX803-LABEL: test_kern_stack: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_u32 s4, s4, s7 +; GFX803-NEXT: s_add_i32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 @@ -64,7 +64,7 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 { ; GFX803-LABEL: test_kern_call: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_u32 s4, s4, s7 +; GFX803-NEXT: s_add_i32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 @@ -111,7 +111,7 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 { ; GFX803-LABEL: test_kern_stack_and_call: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_u32 s4, s4, s7 +; GFX803-NEXT: s_add_i32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 @@ -188,7 +188,7 @@ define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 { ; GFX803-LABEL: test_force_fp_kern_stack: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_u32 s4, s4, s7 +; GFX803-NEXT: s_add_i32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 ; GFX803-NEXT: s_mov_b32 s33, 0 @@ -233,7 +233,7 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 { ; GFX803-LABEL: test_force_fp_kern_call: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_u32 s4, s4, s7 +; GFX803-NEXT: s_add_i32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 @@ -283,7 +283,7 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 { ; GFX803-LABEL: test_force_fp_kern_stack_and_call: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_u32 s4, s4, s7 +; GFX803-NEXT: s_add_i32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 ; GFX803-NEXT: s_mov_b32 s33, 0 @@ -344,7 +344,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 { ; GFX803-LABEL: test_sgpr_offset_kernel: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_add_u32 s4, s4, s7 +; GFX803-NEXT: s_add_i32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -33,7 +33,7 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v2f32@rel32@hi+12 @@ -41,7 +41,7 @@ ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: v_readlane_b32 s4, v40, 0 ; GCN-NEXT: v_readlane_b32 s5, v40, 1 -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -67,7 +67,7 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v3f32@rel32@hi+12 @@ -75,7 +75,7 @@ ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: v_readlane_b32 s4, v40, 0 ; GCN-NEXT: v_readlane_b32 s5, v40, 1 -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -101,7 +101,7 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_v4f16@rel32@hi+12 @@ -109,7 +109,7 @@ ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GCN-NEXT: v_readlane_b32 s4, v40, 0 ; GCN-NEXT: v_readlane_b32 s5, v40, 1 -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -135,7 +135,7 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, func_struct@rel32@hi+12 @@ -144,7 +144,7 @@ ; GCN-NEXT: v_readlane_b32 s4, v40, 0 ; GCN-NEXT: v_mov_b32_e32 v1, v4 ; GCN-NEXT: v_readlane_b32 s5, v40, 1 -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -228,10 +228,10 @@ ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-NEXT: s_add_u32 s1, 4, s1 +; GFX9-NEXT: s_add_i32 s1, s1, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 4, s0 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -248,8 +248,8 @@ ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-NEXT: s_add_u32 s0, 4, s0 -; GFX10-NEXT: s_add_u32 s1, 4, s1 +; GFX10-NEXT: s_add_i32 s0, s0, 4 +; GFX10-NEXT: s_add_i32 s1, s1, 4 ; GFX10-NEXT: scratch_store_dword off, v0, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -270,10 +270,10 @@ ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 ; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-PAL-NEXT: s_add_u32 s1, 4, s1 +; GFX9-PAL-NEXT: s_add_i32 s1, s1, 4 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm @@ -295,8 +295,8 @@ ; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-PAL-NEXT: s_add_u32 s0, 4, s0 -; GFX10-PAL-NEXT: s_add_u32 s1, 4, s1 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, 4 +; GFX10-PAL-NEXT: s_add_i32 s1, s1, 4 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -321,13 +321,13 @@ ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; GFX9-NEXT: s_lshl_b32 s0, s2, 2 -; GFX9-NEXT: s_add_u32 s0, 4, s0 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_and_b32 s0, s2, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-NEXT: s_add_u32 s0, 4, s0 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -342,8 +342,8 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 15 ; GFX10-NEXT: s_lshl_b32 s1, s2, 2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 -; GFX10-NEXT: s_add_u32 s1, 4, s1 -; GFX10-NEXT: s_add_u32 s0, 4, s0 +; GFX10-NEXT: s_add_i32 s1, s1, 4 +; GFX10-NEXT: s_add_i32 s0, s0, 4 ; GFX10-NEXT: scratch_store_dword off, v0, s1 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc @@ -363,10 +363,10 @@ ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 ; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-PAL-NEXT: s_add_u32 s1, 4, s1 +; GFX9-PAL-NEXT: s_add_i32 s1, s1, 4 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm @@ -386,8 +386,8 @@ ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-PAL-NEXT: s_add_u32 s0, 4, s0 -; GFX10-PAL-NEXT: s_add_u32 s1, 4, s1 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, 4 +; GFX10-PAL-NEXT: s_add_i32 s1, s1, 4 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -857,10 +857,10 @@ ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_u32 s1, 0x104, s1 +; GFX9-NEXT: s_addk_i32 s1, 0x104 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 0x104, s0 +; GFX9-NEXT: s_addk_i32 s0, 0x104 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -879,8 +879,8 @@ ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-NEXT: s_add_u32 s0, 0x104, s0 -; GFX10-NEXT: s_add_u32 s1, 0x104, s1 +; GFX10-NEXT: s_addk_i32 s0, 0x104 +; GFX10-NEXT: s_addk_i32 s1, 0x104 ; GFX10-NEXT: scratch_store_dword off, v0, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -904,10 +904,10 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-PAL-NEXT: s_add_u32 s1, 0x104, s1 +; GFX9-PAL-NEXT: s_addk_i32 s1, 0x104 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 s0, 0x104, s0 +; GFX9-PAL-NEXT: s_addk_i32 s0, 0x104 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm @@ -932,8 +932,8 @@ ; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX1010-PAL-NEXT: s_add_u32 s0, 0x104, s0 -; GFX1010-PAL-NEXT: s_add_u32 s1, 0x104, s1 +; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x104 +; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x104 ; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -959,8 +959,8 @@ ; GFX1030-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX1030-PAL-NEXT: s_add_u32 s0, 0x104, s0 -; GFX1030-PAL-NEXT: s_add_u32 s1, 0x104, s1 +; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x104 +; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x104 ; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -991,13 +991,13 @@ ; GFX9-NEXT: s_lshl_b32 s0, s2, 2 ; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 0x104, s0 +; GFX9-NEXT: s_addk_i32 s0, 0x104 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_and_b32 s0, s2, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-NEXT: s_add_u32 s0, 0x104, s0 +; GFX9-NEXT: s_addk_i32 s0, 0x104 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -1014,8 +1014,8 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 15 ; GFX10-NEXT: s_lshl_b32 s1, s2, 2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 -; GFX10-NEXT: s_add_u32 s1, 0x104, s1 -; GFX10-NEXT: s_add_u32 s0, 0x104, s0 +; GFX10-NEXT: s_addk_i32 s1, 0x104 +; GFX10-NEXT: s_addk_i32 s0, 0x104 ; GFX10-NEXT: scratch_store_dword off, v0, s1 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc @@ -1037,11 +1037,11 @@ ; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-PAL-NEXT: s_add_u32 s1, 0x104, s1 +; GFX9-PAL-NEXT: s_addk_i32 s1, 0x104 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 s0, 0x104, s0 +; GFX9-PAL-NEXT: s_addk_i32 s0, 0x104 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm @@ -1064,8 +1064,8 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX1010-PAL-NEXT: s_add_u32 s0, 0x104, s0 -; GFX1010-PAL-NEXT: s_add_u32 s1, 0x104, s1 +; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x104 +; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x104 ; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -1089,8 +1089,8 @@ ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX1030-PAL-NEXT: s_add_u32 s0, 0x104, s0 -; GFX1030-PAL-NEXT: s_add_u32 s1, 0x104, s1 +; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x104 +; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x104 ; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -1245,7 +1245,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100 ; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 @@ -1262,7 +1262,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x100 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v3, v0, v1 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2 @@ -1280,7 +1280,7 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x100 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x100 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 @@ -1297,7 +1297,7 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x100 +; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo ; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1 ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2 @@ -1495,13 +1495,13 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi -; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 -; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 -; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -1513,7 +1513,7 @@ ; GFX10-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -1522,11 +1522,11 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo -; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 -; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 -; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1544,13 +1544,13 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi -; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 -; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 -; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] @@ -1562,7 +1562,7 @@ ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s3, s0 @@ -1572,13 +1572,13 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo ; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 ; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 ; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1010-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31] @@ -1590,7 +1590,7 @@ ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1030-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s2, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s3, s0 @@ -1599,11 +1599,11 @@ ; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo -; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 -; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 -; GFX1030-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31] @@ -1629,10 +1629,10 @@ ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX9-NEXT: s_addk_i32 s1, 0x4004 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX9-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -1651,8 +1651,8 @@ ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-NEXT: s_add_u32 s0, 0x4004, s0 -; GFX10-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX10-NEXT: s_addk_i32 s0, 0x4004 +; GFX10-NEXT: s_addk_i32 s1, 0x4004 ; GFX10-NEXT: scratch_store_dword off, v0, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -1676,10 +1676,10 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-PAL-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX9-PAL-NEXT: s_addk_i32 s1, 0x4004 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX9-PAL-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm @@ -1704,8 +1704,8 @@ ; GFX1010-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX1010-PAL-NEXT: s_add_u32 s0, 0x4004, s0 -; GFX1010-PAL-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x4004 +; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x4004 ; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -1731,8 +1731,8 @@ ; GFX1030-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX1030-PAL-NEXT: s_add_u32 s0, 0x4004, s0 -; GFX1030-PAL-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x4004 +; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x4004 ; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -1763,13 +1763,13 @@ ; GFX9-NEXT: s_lshl_b32 s0, s2, 2 ; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX9-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_and_b32 s0, s2, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX9-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -1786,8 +1786,8 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 15 ; GFX10-NEXT: s_lshl_b32 s1, s2, 2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 -; GFX10-NEXT: s_add_u32 s1, 0x4004, s1 -; GFX10-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX10-NEXT: s_addk_i32 s1, 0x4004 +; GFX10-NEXT: s_addk_i32 s0, 0x4004 ; GFX10-NEXT: scratch_store_dword off, v0, s1 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc @@ -1809,11 +1809,11 @@ ; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-PAL-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX9-PAL-NEXT: s_addk_i32 s1, 0x4004 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX9-PAL-NEXT: s_addk_i32 s0, 0x4004 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm @@ -1836,8 +1836,8 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1010-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1010-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX1010-PAL-NEXT: s_add_u32 s0, 0x4004, s0 -; GFX1010-PAL-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX1010-PAL-NEXT: s_addk_i32 s0, 0x4004 +; GFX1010-PAL-NEXT: s_addk_i32 s1, 0x4004 ; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -1861,8 +1861,8 @@ ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX1030-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX1030-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX1030-PAL-NEXT: s_add_u32 s0, 0x4004, s0 -; GFX1030-PAL-NEXT: s_add_u32 s1, 0x4004, s1 +; GFX1030-PAL-NEXT: s_addk_i32 s0, 0x4004 +; GFX1030-PAL-NEXT: s_addk_i32 s1, 0x4004 ; GFX1030-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -2017,7 +2017,7 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 @@ -2034,7 +2034,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v3, v0, v1 ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2 @@ -2052,7 +2052,7 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 vcc_hi, s32, 0x4000 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 @@ -2069,7 +2069,7 @@ ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-PAL-NEXT: s_add_u32 vcc_lo, s32, 0x4000 +; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo ; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1 ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2 @@ -2107,7 +2107,7 @@ ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 4, s0 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -2124,7 +2124,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-NEXT: s_add_u32 s0, 4, s0 +; GFX10-NEXT: s_add_i32 s0, s0, 4 ; GFX10-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -2147,7 +2147,7 @@ ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, vcc_hi offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 s0, 4, s0 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -2170,7 +2170,7 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: s_add_u32 s0, 4, s0 +; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX1010-PAL-NEXT: scratch_store_dword off, v0, vcc_lo offset:4 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -2193,7 +2193,7 @@ ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3800 -; GFX1030-PAL-NEXT: s_add_u32 s0, 4, s0 +; GFX1030-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX1030-PAL-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -2220,7 +2220,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: scratch_store_dword off, v0, s32 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, s32, s0 +; GFX9-NEXT: s_add_i32 s0, s0, s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -2235,7 +2235,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-NEXT: s_add_u32 s0, s32, s0 +; GFX10-NEXT: s_add_i32 s0, s0, s32 ; GFX10-NEXT: scratch_store_dword off, v0, s32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -2251,7 +2251,7 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_u32 s0, s32, s0 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, s32 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -2266,7 +2266,7 @@ ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-PAL-NEXT: s_add_u32 s0, s32, s0 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, s32 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -37,7 +37,7 @@ ; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX9-FLATSCR: v_mov_b32_e32 v0, s32 -; GFX9-FLATSCR: s_add_u32 [[ADD:[^,]+]], s32, 4 +; GFX9-FLATSCR: s_add_i32 [[ADD:[^,]+]], s32, 4 ; GFX9-NEXT: ds_write_b32 v0, v0 ; GFX9-MUBUF-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 ; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] @@ -196,7 +196,7 @@ ; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 ; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]] -; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200 +; GFX9-FLATSCR-DAG: s_add_i32 [[SZ:[^,]+]], s32, 0x200 ; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]] ; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9 @@ -222,7 +222,7 @@ ; GFX9-MUBUF-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32 ; GFX9-MUBUF: v_add_u32_e32 [[VZ:v[0-9]+]], 0x200, [[SCALED]] -; GFX9-FLATSCR-DAG: s_add_u32 [[SZ:[^,]+]], s32, 0x200 +; GFX9-FLATSCR-DAG: s_add_i32 [[SZ:[^,]+]], s32, 0x200 ; GFX9-FLATSCR: v_mov_b32_e32 [[VZ:v[0-9]+]], [[SZ]] ; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9 diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -16,7 +16,7 @@ ; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s33, 2 ; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0 ; SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 -; SPILL-TO-VGPR-NEXT: s_add_u32 s32, s32, 0x400 +; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400 ; SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5] ; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 @@ -27,7 +27,7 @@ ; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 0 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v40, 1 -; SPILL-TO-VGPR-NEXT: s_sub_u32 s32, s32, 0x400 +; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 2 ; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 ; SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -41,7 +41,7 @@ ; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, s33 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 -; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s32, s32, 0x800 +; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 @@ -68,7 +68,7 @@ ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] -; NO-SPILL-TO-VGPR-NEXT: s_sub_u32 s32, s32, 0x800 +; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -102,7 +102,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 @@ -112,7 +112,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -131,7 +131,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 @@ -141,7 +141,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -161,7 +161,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1@rel32@hi+12 @@ -171,7 +171,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -195,7 +195,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 @@ -205,7 +205,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -225,7 +225,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 @@ -236,7 +236,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -257,7 +257,7 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_signext@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_signext@rel32@hi+12 @@ -268,7 +268,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -293,7 +293,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 @@ -303,7 +303,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -323,7 +323,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 @@ -334,7 +334,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -355,7 +355,7 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_zeroext@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_zeroext@rel32@hi+12 @@ -366,7 +366,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -389,7 +389,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 @@ -398,7 +398,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -417,7 +417,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 @@ -426,7 +426,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -446,7 +446,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8@rel32@hi+12 @@ -455,7 +455,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -479,7 +479,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 @@ -487,7 +487,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -507,7 +507,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 @@ -516,7 +516,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -537,7 +537,7 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_signext@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_signext@rel32@hi+12 @@ -546,7 +546,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -571,7 +571,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 @@ -579,7 +579,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -599,7 +599,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 @@ -608,7 +608,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -629,7 +629,7 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_zeroext@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_zeroext@rel32@hi+12 @@ -638,7 +638,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -661,7 +661,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 @@ -670,7 +670,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -689,7 +689,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 @@ -698,7 +698,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -718,7 +718,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16@rel32@hi+12 @@ -727,7 +727,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -751,7 +751,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 @@ -759,7 +759,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -779,7 +779,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 @@ -788,7 +788,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -809,7 +809,7 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_signext@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_signext@rel32@hi+12 @@ -818,7 +818,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -843,7 +843,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 @@ -851,7 +851,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -871,7 +871,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 @@ -880,7 +880,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -901,7 +901,7 @@ ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_zeroext@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_zeroext@rel32@hi+12 @@ -910,7 +910,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -933,7 +933,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 @@ -942,7 +942,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -961,7 +961,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 @@ -970,7 +970,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -990,7 +990,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32@rel32@hi+12 @@ -999,7 +999,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1021,7 +1021,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -1031,7 +1031,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1051,7 +1051,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 @@ -1060,7 +1060,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1081,7 +1081,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64@rel32@lo+4 @@ -1090,7 +1090,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1115,7 +1115,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 @@ -1123,7 +1123,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1143,7 +1143,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 @@ -1153,7 +1153,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1174,7 +1174,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 @@ -1184,7 +1184,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1207,7 +1207,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -1219,7 +1219,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1242,7 +1242,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 @@ -1250,7 +1250,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1274,7 +1274,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 @@ -1282,7 +1282,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1307,7 +1307,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -1317,7 +1317,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1341,7 +1341,7 @@ ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 @@ -1349,7 +1349,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1374,7 +1374,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 @@ -1382,7 +1382,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1410,7 +1410,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_mov_b32_e32 v6, 3 @@ -1422,7 +1422,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1448,7 +1448,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 @@ -1456,7 +1456,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1483,7 +1483,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 @@ -1491,7 +1491,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1515,7 +1515,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 @@ -1524,7 +1524,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1543,7 +1543,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 @@ -1552,7 +1552,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1572,7 +1572,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16@rel32@hi+12 @@ -1581,7 +1581,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1603,7 +1603,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 @@ -1612,7 +1612,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1631,7 +1631,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 @@ -1640,7 +1640,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1660,7 +1660,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32@rel32@hi+12 @@ -1669,7 +1669,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1691,7 +1691,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -1701,7 +1701,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1721,7 +1721,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 @@ -1730,7 +1730,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1751,7 +1751,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32@rel32@lo+4 @@ -1760,7 +1760,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1782,7 +1782,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1793,7 +1793,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1815,7 +1815,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 @@ -1823,7 +1823,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1846,7 +1846,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 @@ -1854,7 +1854,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1876,7 +1876,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1889,7 +1889,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1913,7 +1913,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 @@ -1921,7 +1921,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -1946,7 +1946,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 @@ -1954,7 +1954,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -1976,7 +1976,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -1986,7 +1986,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2006,7 +2006,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 @@ -2015,7 +2015,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2036,7 +2036,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64@rel32@lo+4 @@ -2045,7 +2045,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2067,7 +2067,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2079,7 +2079,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2102,7 +2102,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 @@ -2110,7 +2110,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2134,7 +2134,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 @@ -2142,7 +2142,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2164,7 +2164,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2178,7 +2178,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2203,7 +2203,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 @@ -2211,7 +2211,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2237,7 +2237,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 @@ -2245,7 +2245,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2268,7 +2268,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 @@ -2276,7 +2276,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2295,7 +2295,7 @@ ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 @@ -2304,7 +2304,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2324,7 +2324,7 @@ ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16@rel32@hi+12 @@ -2333,7 +2333,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2357,7 +2357,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 @@ -2365,7 +2365,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2384,7 +2384,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 @@ -2393,7 +2393,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2413,7 +2413,7 @@ ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 @@ -2422,7 +2422,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2446,7 +2446,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 @@ -2454,7 +2454,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2473,7 +2473,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 @@ -2482,7 +2482,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2502,7 +2502,7 @@ ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 @@ -2511,7 +2511,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2534,7 +2534,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -2544,7 +2544,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2564,7 +2564,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 @@ -2573,7 +2573,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2594,7 +2594,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16@rel32@lo+4 @@ -2603,7 +2603,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2625,7 +2625,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -2635,7 +2635,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2655,7 +2655,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 @@ -2664,7 +2664,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2685,7 +2685,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16@rel32@lo+4 @@ -2694,7 +2694,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2717,7 +2717,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 @@ -2725,7 +2725,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2744,7 +2744,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 @@ -2753,7 +2753,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2773,7 +2773,7 @@ ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 @@ -2782,7 +2782,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2805,7 +2805,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -2815,7 +2815,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2835,7 +2835,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 @@ -2844,7 +2844,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2865,7 +2865,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16@rel32@lo+4 @@ -2874,7 +2874,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2897,7 +2897,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 @@ -2905,7 +2905,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2924,7 +2924,7 @@ ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 @@ -2933,7 +2933,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -2953,7 +2953,7 @@ ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16@rel32@hi+12 @@ -2962,7 +2962,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -2986,7 +2986,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 @@ -2994,7 +2994,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3013,7 +3013,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 @@ -3022,7 +3022,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3042,7 +3042,7 @@ ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 @@ -3051,7 +3051,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3074,7 +3074,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -3084,7 +3084,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3104,7 +3104,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 @@ -3113,7 +3113,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3134,7 +3134,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32@rel32@lo+4 @@ -3143,7 +3143,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3165,7 +3165,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 @@ -3176,7 +3176,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3198,7 +3198,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 @@ -3206,7 +3206,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3229,7 +3229,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 @@ -3237,7 +3237,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3259,7 +3259,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 @@ -3271,7 +3271,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3294,7 +3294,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v3, 6 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 @@ -3302,7 +3302,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3326,7 +3326,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 @@ -3334,7 +3334,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3357,7 +3357,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 @@ -3365,7 +3365,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3384,7 +3384,7 @@ ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 @@ -3393,7 +3393,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3413,7 +3413,7 @@ ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 @@ -3422,7 +3422,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3445,7 +3445,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -3457,7 +3457,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3480,7 +3480,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 @@ -3488,7 +3488,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3512,7 +3512,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 @@ -3520,7 +3520,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3542,7 +3542,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -3555,7 +3555,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3579,7 +3579,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 @@ -3587,7 +3587,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3612,7 +3612,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 @@ -3620,7 +3620,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3647,7 +3647,7 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[4:5] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[4:5] offset:16 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 @@ -3655,7 +3655,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3675,7 +3675,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -3689,7 +3689,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3710,7 +3710,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) @@ -3724,7 +3724,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3748,7 +3748,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -3764,7 +3764,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3791,7 +3791,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v6, 7 ; GFX10-NEXT: v_mov_b32_e32 v7, 8 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 @@ -3799,7 +3799,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3827,7 +3827,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 7 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 8 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 @@ -3835,7 +3835,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3864,7 +3864,7 @@ ; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[4:5] offset:16 ; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[4:5] offset:32 ; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[4:5] offset:48 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 @@ -3872,7 +3872,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3892,7 +3892,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -3908,7 +3908,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -3929,7 +3929,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) @@ -3945,7 +3945,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -3981,7 +3981,7 @@ ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[4:5] offset:96 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[4:5] offset:112 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12 @@ -3989,7 +3989,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4009,7 +4009,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -4029,7 +4029,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4050,7 +4050,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) @@ -4070,7 +4070,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -4106,7 +4106,7 @@ ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[4:5] offset:96 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[4:5] offset:112 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 @@ -4118,7 +4118,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4138,7 +4138,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_dword v33, v[0:1], off @@ -4161,7 +4161,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4182,7 +4182,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_dword v33, v[0:1], off @@ -4205,7 +4205,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -4232,7 +4232,7 @@ ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v40, v0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -4247,7 +4247,7 @@ ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s4, v42, 0 ; GFX9-NEXT: v_readlane_b32 s5, v42, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -4265,7 +4265,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v42, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_mov_b32_e32 v40, v0 @@ -4284,7 +4284,7 @@ ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_readlane_b32 s4, v42, 0 ; GFX10-NEXT: v_readlane_b32 s5, v42, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v42, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -4303,7 +4303,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, v0 @@ -4322,7 +4322,7 @@ ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v42, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v42, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s32 offset:8 ; 4-byte Folded Reload @@ -4350,7 +4350,7 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v2, s[4:5] offset:4 ; GFX9-NEXT: global_load_ubyte v0, v2, s[4:5] -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 @@ -4358,7 +4358,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4378,7 +4378,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -4392,7 +4392,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4413,7 +4413,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) @@ -4427,7 +4427,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -4454,7 +4454,7 @@ ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -4464,7 +4464,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -4484,7 +4484,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 @@ -4496,7 +4496,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -4517,7 +4517,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 @@ -4529,7 +4529,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload @@ -4561,7 +4561,7 @@ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_add_u32 s32, s32, 0x800 +; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX9-NEXT: s_getpc_b64 s[4:5] @@ -4573,7 +4573,7 @@ ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x800 +; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_byte v[0:1], v0, off @@ -4598,7 +4598,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x400 +; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 @@ -4615,7 +4615,7 @@ ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX10-NEXT: s_addk_i32 s32, 0xfc00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -4640,12 +4640,12 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 32 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX10-SCRATCH-NEXT: s_add_u32 vcc_lo, s33, 8 +; GFX10-SCRATCH-NEXT: s_add_i32 vcc_lo, s33, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 @@ -4658,7 +4658,7 @@ ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 32 +; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: global_store_byte v[0:1], v0, off @@ -4702,7 +4702,7 @@ ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 @@ -4729,7 +4729,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4749,7 +4749,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -4780,7 +4780,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4801,7 +4801,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) @@ -4832,7 +4832,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -4905,7 +4905,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_inreg@rel32@lo+4 @@ -4915,7 +4915,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4934,7 +4934,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_inreg@rel32@hi+12 @@ -4944,7 +4944,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -4964,7 +4964,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i1_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i1_inreg@rel32@hi+12 @@ -4974,7 +4974,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -4996,7 +4996,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i8_inreg@rel32@lo+4 @@ -5005,7 +5005,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5024,7 +5024,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i8_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i8_inreg@rel32@hi+12 @@ -5033,7 +5033,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5053,7 +5053,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12 @@ -5062,7 +5062,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5084,7 +5084,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i16_inreg@rel32@lo+4 @@ -5093,7 +5093,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5112,7 +5112,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i16_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i16_inreg@rel32@hi+12 @@ -5121,7 +5121,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5141,7 +5141,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12 @@ -5150,7 +5150,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5172,7 +5172,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 42 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i32_inreg@rel32@lo+4 @@ -5181,7 +5181,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5200,7 +5200,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s4, 42 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i32_inreg@rel32@hi+12 @@ -5209,7 +5209,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5229,7 +5229,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12 @@ -5238,7 +5238,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5260,7 +5260,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: s_getpc_b64 s[6:7] @@ -5270,7 +5270,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5290,7 +5290,7 @@ ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i64_inreg@rel32@lo+4 @@ -5299,7 +5299,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5320,7 +5320,7 @@ ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4 @@ -5329,7 +5329,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5353,7 +5353,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[8:9] ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12 @@ -5361,7 +5361,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5381,7 +5381,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[8:9] ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12 @@ -5390,7 +5390,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5411,7 +5411,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 @@ -5420,7 +5420,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5443,7 +5443,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 @@ -5455,7 +5455,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5478,7 +5478,7 @@ ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[8:9] ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12 @@ -5486,7 +5486,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5510,7 +5510,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 @@ -5518,7 +5518,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5542,7 +5542,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_getpc_b64 s[10:11] @@ -5552,7 +5552,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5575,7 +5575,7 @@ ; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[10:11] ; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v3i64_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v3i64_inreg@rel32@hi+12 @@ -5583,7 +5583,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5607,7 +5607,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12 @@ -5615,7 +5615,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5642,7 +5642,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 @@ -5654,7 +5654,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5679,7 +5679,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[12:13] ; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v4i64_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v4i64_inreg@rel32@hi+12 @@ -5687,7 +5687,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5713,7 +5713,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12 @@ -5721,7 +5721,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5745,7 +5745,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_movk_i32 s4, 0x4400 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f16_inreg@rel32@lo+4 @@ -5754,7 +5754,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5773,7 +5773,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f16_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f16_inreg@rel32@hi+12 @@ -5782,7 +5782,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5802,7 +5802,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12 @@ -5811,7 +5811,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5833,7 +5833,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 4.0 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f32_inreg@rel32@lo+4 @@ -5842,7 +5842,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5861,7 +5861,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s4, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f32_inreg@rel32@hi+12 @@ -5870,7 +5870,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5890,7 +5890,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12 @@ -5899,7 +5899,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -5921,7 +5921,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_getpc_b64 s[6:7] @@ -5931,7 +5931,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5951,7 +5951,7 @@ ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2f32_inreg@rel32@lo+4 @@ -5960,7 +5960,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -5981,7 +5981,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4 @@ -5990,7 +5990,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6012,7 +6012,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 @@ -6023,7 +6023,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6045,7 +6045,7 @@ ; GFX10-NEXT: s_mov_b32 s6, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[8:9] ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3f32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32_inreg@rel32@hi+12 @@ -6053,7 +6053,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6076,7 +6076,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12 @@ -6084,7 +6084,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6106,7 +6106,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 @@ -6119,7 +6119,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6143,7 +6143,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s8, 0.5 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[10:11] ; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v5f32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v5f32_inreg@rel32@hi+12 @@ -6151,7 +6151,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6176,7 +6176,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12 @@ -6184,7 +6184,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6206,7 +6206,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 ; GFX9-NEXT: s_getpc_b64 s[6:7] @@ -6216,7 +6216,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6236,7 +6236,7 @@ ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f64_inreg@rel32@lo+4 @@ -6245,7 +6245,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6266,7 +6266,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4 @@ -6275,7 +6275,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6297,7 +6297,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 0 @@ -6309,7 +6309,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6332,7 +6332,7 @@ ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[8:9] ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2f64_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64_inreg@rel32@hi+12 @@ -6340,7 +6340,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6364,7 +6364,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12 @@ -6372,7 +6372,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6394,7 +6394,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 0 @@ -6408,7 +6408,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6433,7 +6433,7 @@ ; GFX10-NEXT: s_mov_b32 s8, 0 ; GFX10-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[10:11] ; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v3f64_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v3f64_inreg@rel32@hi+12 @@ -6441,7 +6441,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6467,7 +6467,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12 @@ -6475,7 +6475,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6498,7 +6498,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i16_inreg@rel32@hi+12 @@ -6506,7 +6506,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6525,7 +6525,7 @@ ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i16_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i16_inreg@rel32@hi+12 @@ -6534,7 +6534,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6554,7 +6554,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12 @@ -6563,7 +6563,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6587,7 +6587,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12 @@ -6595,7 +6595,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6614,7 +6614,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12 @@ -6623,7 +6623,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6643,7 +6643,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 @@ -6652,7 +6652,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6676,7 +6676,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12 @@ -6684,7 +6684,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6703,7 +6703,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12 @@ -6712,7 +6712,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6732,7 +6732,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 @@ -6741,7 +6741,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6764,7 +6764,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 ; GFX9-NEXT: s_getpc_b64 s[6:7] @@ -6774,7 +6774,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6794,7 +6794,7 @@ ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: s_mov_b32 s5, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4 @@ -6803,7 +6803,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6824,7 +6824,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 @@ -6833,7 +6833,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6855,7 +6855,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 ; GFX9-NEXT: s_getpc_b64 s[6:7] @@ -6865,7 +6865,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6885,7 +6885,7 @@ ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4 @@ -6894,7 +6894,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6915,7 +6915,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 @@ -6924,7 +6924,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -6947,7 +6947,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12 @@ -6955,7 +6955,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -6974,7 +6974,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12 @@ -6983,7 +6983,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7003,7 +7003,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 @@ -7012,7 +7012,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7035,7 +7035,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 ; GFX9-NEXT: s_getpc_b64 s[6:7] @@ -7045,7 +7045,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7065,7 +7065,7 @@ ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4 @@ -7074,7 +7074,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7095,7 +7095,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 @@ -7104,7 +7104,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7127,7 +7127,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2f16_inreg@rel32@hi+12 @@ -7135,7 +7135,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7154,7 +7154,7 @@ ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2f16_inreg@rel32@hi+12 @@ -7163,7 +7163,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7183,7 +7183,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12 @@ -7192,7 +7192,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7216,7 +7216,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12 @@ -7224,7 +7224,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7243,7 +7243,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12 @@ -7252,7 +7252,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7272,7 +7272,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 @@ -7281,7 +7281,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7304,7 +7304,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_getpc_b64 s[6:7] @@ -7314,7 +7314,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7334,7 +7334,7 @@ ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4 @@ -7343,7 +7343,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7364,7 +7364,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 @@ -7373,7 +7373,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7395,7 +7395,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 @@ -7406,7 +7406,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7428,7 +7428,7 @@ ; GFX10-NEXT: s_mov_b32 s6, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[8:9] ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_inreg@rel32@hi+12 @@ -7436,7 +7436,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7459,7 +7459,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12 @@ -7467,7 +7467,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7489,7 +7489,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 @@ -7501,7 +7501,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7524,7 +7524,7 @@ ; GFX10-NEXT: s_mov_b32 s7, 6 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[8:9] ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32_inreg@rel32@hi+12 @@ -7532,7 +7532,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7556,7 +7556,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12 @@ -7564,7 +7564,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7587,7 +7587,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[8:9] ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12 @@ -7595,7 +7595,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7614,7 +7614,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[8:9] ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12 @@ -7623,7 +7623,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7643,7 +7643,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 @@ -7652,7 +7652,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7675,7 +7675,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 @@ -7687,7 +7687,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7710,7 +7710,7 @@ ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[8:9] ; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12 @@ -7718,7 +7718,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7742,7 +7742,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 @@ -7750,7 +7750,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7772,7 +7772,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 @@ -7785,7 +7785,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7809,7 +7809,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s8, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[10:11] ; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v5i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v5i32_inreg@rel32@hi+12 @@ -7817,7 +7817,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7842,7 +7842,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12 @@ -7850,7 +7850,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7873,7 +7873,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 ; GFX9-NEXT: s_getpc_b64 s[12:13] @@ -7883,7 +7883,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7902,7 +7902,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[12:13] ; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12 @@ -7913,7 +7913,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -7933,7 +7933,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) @@ -7944,7 +7944,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -7968,7 +7968,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 @@ -7984,7 +7984,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8011,7 +8011,7 @@ ; GFX10-NEXT: s_mov_b32 s10, 7 ; GFX10-NEXT: s_mov_b32 s11, 8 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[12:13] ; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12 @@ -8019,7 +8019,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8047,7 +8047,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 @@ -8055,7 +8055,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -8078,7 +8078,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0 ; GFX9-NEXT: s_getpc_b64 s[20:21] @@ -8088,7 +8088,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8107,7 +8107,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[20:21] ; GFX10-NEXT: s_add_u32 s20, s20, external_void_func_v16i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s21, s21, external_void_func_v16i32_inreg@rel32@hi+12 @@ -8118,7 +8118,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[20:21] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8138,7 +8138,7 @@ ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) @@ -8149,7 +8149,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -8192,7 +8192,7 @@ ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 16 ; GFX9-NEXT: v_writelane_b32 v40, s31, 17 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -8240,7 +8240,7 @@ ; GFX9-NEXT: v_readlane_b32 s38, v40, 2 ; GFX9-NEXT: v_readlane_b32 s37, v40, 1 ; GFX9-NEXT: v_readlane_b32 s36, v40, 0 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 18 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8259,7 +8259,7 @@ ; GFX10-NEXT: v_writelane_b32 v40, s33, 18 ; GFX10-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s36, 0 ; GFX10-NEXT: v_writelane_b32 v40, s37, 1 ; GFX10-NEXT: v_writelane_b32 v40, s38, 2 @@ -8327,7 +8327,7 @@ ; GFX10-NEXT: v_readlane_b32 s38, v40, 2 ; GFX10-NEXT: v_readlane_b32 s37, v40, 1 ; GFX10-NEXT: v_readlane_b32 s36, v40, 0 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 18 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8347,7 +8347,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 2 @@ -8411,7 +8411,7 @@ ; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 0 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -8456,7 +8456,7 @@ ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: v_mov_b32_e32 v0, s22 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s46 @@ -8505,7 +8505,7 @@ ; GFX9-NEXT: v_readlane_b32 s38, v40, 2 ; GFX9-NEXT: v_readlane_b32 s37, v40, 1 ; GFX9-NEXT: v_readlane_b32 s36, v40, 0 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 18 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8526,7 +8526,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0 ; GFX10-NEXT: s_load_dword s22, s[4:5], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s36, 0 ; GFX10-NEXT: v_writelane_b32 v40, s37, 1 ; GFX10-NEXT: v_writelane_b32 v40, s38, 2 @@ -8596,7 +8596,7 @@ ; GFX10-NEXT: v_readlane_b32 s38, v40, 2 ; GFX10-NEXT: v_readlane_b32 s37, v40, 1 ; GFX10-NEXT: v_readlane_b32 s36, v40, 0 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 18 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8616,7 +8616,7 @@ ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 2 @@ -8685,7 +8685,7 @@ ; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 0 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -8712,7 +8712,7 @@ ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 @@ -8724,7 +8724,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -8742,7 +8742,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 @@ -8758,7 +8758,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -8777,7 +8777,7 @@ ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4 @@ -8789,7 +8789,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload @@ -8811,7 +8811,7 @@ ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 12 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 @@ -8860,7 +8860,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8881,7 +8881,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 13 ; GFX10-NEXT: v_mov_b32_e32 v2, 14 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v3, 15 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -8927,7 +8927,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -8950,7 +8950,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 @@ -8992,7 +8992,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -9026,7 +9026,7 @@ ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 9 @@ -9083,7 +9083,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -9104,7 +9104,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 10 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v3, 14 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 @@ -9158,7 +9158,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -9185,7 +9185,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 10 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 11 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s32 @@ -9228,7 +9228,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload @@ -9258,7 +9258,7 @@ ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000 @@ -9315,7 +9315,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -9336,7 +9336,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 @@ -9390,7 +9390,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -9417,7 +9417,7 @@ ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 0x41200000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 0x41300000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s32 @@ -9460,7 +9460,7 @@ ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 -; GFX10-SCRATCH-NEXT: s_sub_u32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -16,7 +16,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s35, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[34:35] ; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 @@ -29,7 +29,7 @@ ; GFX9-NEXT: v_readlane_b32 s5, v40, 3 ; GFX9-NEXT: v_readlane_b32 s35, v40, 1 ; GFX9-NEXT: v_readlane_b32 s34, v40, 0 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 4 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -47,7 +47,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s34, 0 ; GFX10-NEXT: v_writelane_b32 v40, s35, 1 ; GFX10-NEXT: s_getpc_b64 s[34:35] @@ -63,7 +63,7 @@ ; GFX10-NEXT: v_readlane_b32 s5, v40, 3 ; GFX10-NEXT: v_readlane_b32 s35, v40, 1 ; GFX10-NEXT: v_readlane_b32 s34, v40, 0 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 4 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -111,7 +111,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s34, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 @@ -128,7 +128,7 @@ ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s5, v40, 2 ; GFX9-NEXT: v_readlane_b32 s34, v40, 0 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -146,7 +146,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 @@ -165,7 +165,7 @@ ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s5, v40, 2 ; GFX10-NEXT: v_readlane_b32 s34, v40, 0 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -189,7 +189,7 @@ ; GFX9-NEXT: v_writelane_b32 v41, s33, 2 ; GFX9-NEXT: v_writelane_b32 v41, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v31 @@ -207,7 +207,7 @@ ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s4, v41, 0 ; GFX9-NEXT: v_readlane_b32 s5, v41, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -225,7 +225,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v41, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v31 @@ -244,7 +244,7 @@ ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: v_readlane_b32 s4, v41, 0 ; GFX10-NEXT: v_readlane_b32 s5, v41, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -270,7 +270,7 @@ ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: v_writelane_b32 v40, s33, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 @@ -285,7 +285,7 @@ ; GFX9-NEXT: v_readlane_b32 s4, v40, 1 ; GFX9-NEXT: v_readlane_b32 s33, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 2 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -303,7 +303,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 @@ -320,7 +320,7 @@ ; GFX10-NEXT: v_readlane_b32 s4, v40, 1 ; GFX10-NEXT: v_readlane_b32 s33, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 2 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -345,7 +345,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s34, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 @@ -360,7 +360,7 @@ ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s5, v40, 2 ; GFX9-NEXT: v_readlane_b32 s34, v40, 0 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -378,7 +378,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 @@ -395,7 +395,7 @@ ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s5, v40, 2 ; GFX10-NEXT: v_readlane_b32 s34, v40, 0 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -419,7 +419,7 @@ ; GFX9-NEXT: v_writelane_b32 v41, s33, 2 ; GFX9-NEXT: v_writelane_b32 v41, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 @@ -435,7 +435,7 @@ ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: v_readlane_b32 s4, v41, 0 ; GFX9-NEXT: v_readlane_b32 s5, v41, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -453,7 +453,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v41, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 @@ -470,7 +470,7 @@ ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX10-NEXT: v_readlane_b32 s4, v41, 0 ; GFX10-NEXT: v_readlane_b32 s5, v41, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -578,7 +578,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12 @@ -586,7 +586,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -604,7 +604,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12 @@ -613,7 +613,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -635,7 +635,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12 @@ -643,7 +643,7 @@ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -661,7 +661,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12 @@ -670,7 +670,7 @@ ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -693,7 +693,7 @@ ; GFX9-NEXT: v_writelane_b32 v40, s40, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 @@ -708,7 +708,7 @@ ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s5, v40, 2 ; GFX9-NEXT: v_readlane_b32 s40, v40, 0 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -726,7 +726,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 @@ -743,7 +743,7 @@ ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s5, v40, 2 ; GFX10-NEXT: v_readlane_b32 s40, v40, 0 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -768,7 +768,7 @@ ; GFX9-NEXT: v_writelane_b32 v41, s40, 0 ; GFX9-NEXT: v_writelane_b32 v41, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s40 @@ -792,7 +792,7 @@ ; GFX9-NEXT: v_readlane_b32 s4, v41, 1 ; GFX9-NEXT: v_readlane_b32 s5, v41, 2 ; GFX9-NEXT: v_readlane_b32 s40, v41, 0 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v41, 3 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -810,7 +810,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: v_writelane_b32 v41, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_add_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 @@ -836,7 +836,7 @@ ; GFX10-NEXT: v_readlane_b32 s4, v41, 1 ; GFX10-NEXT: v_readlane_b32 s5, v41, 2 ; GFX10-NEXT: v_readlane_b32 s40, v41, 0 -; GFX10-NEXT: s_sub_u32 s32, s32, 0x200 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v41, 3 ; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -1242,9 +1242,9 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s8, s33 -; GFX9-NEXT: s_add_u32 s33, s32, 0x1ffc0 +; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000 -; GFX9-NEXT: s_add_u32 s32, s32, 0x60000 +; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, return_512xi32@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, return_512xi32@gotpcrel32@hi+12 @@ -1253,7 +1253,7 @@ ; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: s_sub_u32 s32, s32, 0x60000 +; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000 ; GFX9-NEXT: s_mov_b32 s33, s8 ; GFX9-NEXT: s_setpc_b64 s[4:5] ; @@ -1262,10 +1262,10 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_mov_b32 s8, s33 -; GFX10-NEXT: s_add_u32 s33, s32, 0xffe0 +; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0 ; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] ; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000 -; GFX10-NEXT: s_add_u32 s32, s32, 0x30000 +; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: s_getpc_b64 s[6:7] ; GFX10-NEXT: s_add_u32 s6, s6, return_512xi32@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s7, s7, return_512xi32@gotpcrel32@hi+12 @@ -1273,7 +1273,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: s_sub_u32 s32, s32, 0x30000 +; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000 ; GFX10-NEXT: s_mov_b32 s33, s8 ; GFX10-NEXT: s_setpc_b64 s[4:5] entry: diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -77,7 +77,7 @@ ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GCN-NEXT: s_add_u32 s12, s12, s17 +; GCN-NEXT: s_add_i32 s12, s12, s17 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; GCN-NEXT: s_add_u32 s0, s0, s17 ; GCN-NEXT: s_addc_u32 s1, s1, 0 @@ -173,7 +173,7 @@ ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GCN-NEXT: s_add_u32 s12, s12, s17 +; GCN-NEXT: s_add_i32 s12, s12, s17 ; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 ; GCN-NEXT: s_add_u32 s0, s0, s17 ; GCN-NEXT: s_addc_u32 s1, s1, 0 @@ -206,7 +206,7 @@ ; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: v_writelane_b32 v43, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill @@ -276,7 +276,7 @@ ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: s_sub_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0xf800 ; GCN-NEXT: v_readlane_b32 s33, v43, 17 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -296,7 +296,7 @@ ; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: v_writelane_b32 v43, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill @@ -367,7 +367,7 @@ ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: s_sub_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0xf800 ; GCN-NEXT: v_readlane_b32 s33, v43, 17 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -387,7 +387,7 @@ ; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: v_writelane_b32 v43, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill @@ -458,7 +458,7 @@ ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: s_sub_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0xf800 ; GCN-NEXT: v_readlane_b32 s33, v43, 17 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -479,7 +479,7 @@ ; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: v_writelane_b32 v43, s33, 19 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill @@ -560,7 +560,7 @@ ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: s_sub_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0xf800 ; GCN-NEXT: v_readlane_b32 s33, v43, 19 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -587,7 +587,7 @@ ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v42, s33, 6 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_writelane_b32 v42, s34, 0 @@ -618,7 +618,7 @@ ; GCN-NEXT: v_readlane_b32 s34, v42, 0 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v42, 6 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -70,7 +70,7 @@ ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: BB0_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 -; FLATSCR-NEXT: s_add_u32 s3, 0x3000, s2 +; FLATSCR-NEXT: s_add_i32 s3, s2, 0x3000 ; FLATSCR-NEXT: s_add_i32 s2, s2, 1 ; FLATSCR-NEXT: s_cmpk_lt_u32 s2, 0x2120 ; FLATSCR-NEXT: scratch_store_byte off, v0, s3 @@ -78,7 +78,7 @@ ; FLATSCR-NEXT: s_cbranch_scc1 BB0_1 ; FLATSCR-NEXT: ; %bb.2: ; %split ; FLATSCR-NEXT: s_movk_i32 s2, 0x2000 -; FLATSCR-NEXT: s_add_u32 s2, 0x3000, s2 +; FLATSCR-NEXT: s_addk_i32 s2, 0x3000 ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s2 offset:208 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s2, 0x3000 @@ -111,14 +111,14 @@ ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MUBUF-NEXT: s_mov_b32 s5, s33 -; MUBUF-NEXT: s_add_u32 s33, s32, 0x7ffc0 +; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0 ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000 ; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33 ; MUBUF-NEXT: v_add_u32_e32 v3, 0x1000, v3 ; MUBUF-NEXT: v_mov_b32_e32 v4, 0 ; MUBUF-NEXT: v_add_u32_e32 v2, 64, v3 ; MUBUF-NEXT: s_mov_b32 s4, 0 -; MUBUF-NEXT: s_add_u32 s32, s32, 0x180000 +; MUBUF-NEXT: s_add_i32 s32, s32, 0x180000 ; MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: BB1_1: ; %loadstoreloop @@ -141,7 +141,7 @@ ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:4 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x180000 +; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe80000 ; MUBUF-NEXT: s_mov_b32 s33, s5 ; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v4, v6 ; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v7, vcc @@ -153,17 +153,17 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s2, s33 -; FLATSCR-NEXT: s_add_u32 s33, s32, 0x1fff +; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff ; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 ; FLATSCR-NEXT: s_mov_b32 s0, 0 -; FLATSCR-NEXT: s_add_u32 s32, s32, 0x6000 +; FLATSCR-NEXT: s_addk_i32 s32, 0x6000 ; FLATSCR-NEXT: scratch_store_dword off, v2, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: BB1_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 -; FLATSCR-NEXT: s_add_u32 vcc_hi, s33, 0x1000 -; FLATSCR-NEXT: s_add_u32 s1, vcc_hi, s0 +; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x1000 +; FLATSCR-NEXT: s_add_i32 s1, s0, vcc_hi ; FLATSCR-NEXT: s_add_i32 s0, s0, 1 ; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120 ; FLATSCR-NEXT: scratch_store_byte off, v2, s1 @@ -171,14 +171,14 @@ ; FLATSCR-NEXT: s_cbranch_scc1 BB1_1 ; FLATSCR-NEXT: ; %bb.2: ; %split ; FLATSCR-NEXT: s_movk_i32 s0, 0x2000 -; FLATSCR-NEXT: s_add_u32 s1, s33, 0x1000 -; FLATSCR-NEXT: s_add_u32 s0, s1, s0 +; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000 +; FLATSCR-NEXT: s_add_i32 s0, s0, s1 ; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_add_u32 s0, s33, 0x1000 +; FLATSCR-NEXT: s_add_i32 s0, s33, 0x1000 ; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:64 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x6000 +; FLATSCR-NEXT: s_addk_i32 s32, 0xa000 ; FLATSCR-NEXT: s_mov_b32 s33, s2 ; FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc @@ -286,7 +286,7 @@ ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: BB2_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 -; FLATSCR-NEXT: s_add_u32 s3, 0x2000, s2 +; FLATSCR-NEXT: s_add_i32 s3, s2, 0x2000 ; FLATSCR-NEXT: s_add_i32 s2, s2, 1 ; FLATSCR-NEXT: s_cmpk_lt_u32 s2, 0x2120 ; FLATSCR-NEXT: scratch_store_byte off, v0, s3 @@ -294,7 +294,7 @@ ; FLATSCR-NEXT: s_cbranch_scc1 BB2_1 ; FLATSCR-NEXT: ; %bb.2: ; %split ; FLATSCR-NEXT: s_movk_i32 s2, 0x1000 -; FLATSCR-NEXT: s_add_u32 s2, 0x2000, s2 +; FLATSCR-NEXT: s_addk_i32 s2, 0x2000 ; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s2 offset:720 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 offset:704 glc diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -193,7 +193,7 @@ ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v43, s33, 4 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_add_u32 s32, s32, 0x800 +; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: v_writelane_b32 v43, s34, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 @@ -223,7 +223,7 @@ ; GFX9-NEXT: v_readlane_b32 s5, v43, 3 ; GFX9-NEXT: v_readlane_b32 s35, v43, 1 ; GFX9-NEXT: v_readlane_b32 s34, v43, 0 -; GFX9-NEXT: s_sub_u32 s32, s32, 0x800 +; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v43, 4 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-csr-vgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-csr-vgpr-spill.ll --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-csr-vgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-csr-vgpr-spill.ll @@ -8,11 +8,11 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s4, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_add_u32 s32, s32, 0x200 +; CHECK-NEXT: s_addk_i32 s32, 0x200 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_sub_u32 s32, s32, 0x200 +; CHECK-NEXT: s_addk_i32 s32, 0xfe00 ; CHECK-NEXT: s_mov_b32 s33, s4 ; CHECK-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) @@ -29,7 +29,7 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s8, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_add_u32 s32, s32, 0x400 +; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12 @@ -40,7 +40,7 @@ ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: s_sub_u32 s32, s32, 0x400 +; CHECK-NEXT: s_addk_i32 s32, 0xfc00 ; CHECK-NEXT: s_mov_b32 s33, s8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -16,7 +16,7 @@ ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-DAG: v_writelane_b32 v40, s33, 2 ; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: s_add_u32 s32, s32, 0x400 +; GCN-DAG: s_addk_i32 s32, 0x400 ; GCN-DAG: v_writelane_b32 v40, s30, 0 ; GCN-DAG: v_writelane_b32 v40, s31, 1 @@ -25,7 +25,7 @@ ; GCN: v_readlane_b32 s4, v40, 0 ; GCN: v_readlane_b32 s5, v40, 1 -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -40,10 +40,10 @@ ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use: ; GCN: s_waitcnt ; GCN: s_mov_b32 s33, s32 -; GCN-DAG: s_add_u32 s32, s32, 0x1400{{$}} +; GCN-DAG: s_addk_i32 s32, 0x1400{{$}} ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset: ; GCN: s_swappc_b64 -; GCN: s_sub_u32 s32, s32, 0x1400{{$}} +; GCN: s_addk_i32 s32, 0xec00{{$}} ; GCN: s_setpc_b64 define void @test_func_call_external_void_func_i32_imm_stack_use() #0 { %alloca = alloca [16 x i32], align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -65,16 +65,14 @@ ; FLATSCR-NEXT: s_cmp_lg_u32 s5, 0 ; FLATSCR-NEXT: s_cbranch_scc1 BB0_3 ; FLATSCR-NEXT: ; %bb.2: ; %bb.1 -; FLATSCR-NEXT: s_mov_b32 s2, s32 -; FLATSCR-NEXT: s_add_i32 s3, s2, 0x1000 +; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000 ; FLATSCR-NEXT: v_mov_b32_e32 v1, 0 -; FLATSCR-NEXT: s_add_u32 s2, s2, 0x1000 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 1 +; FLATSCR-NEXT: s_lshl_b32 s3, s6, 2 +; FLATSCR-NEXT: s_mov_b32 s32, s2 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s2 -; FLATSCR-NEXT: s_lshl_b32 s2, s6, 2 -; FLATSCR-NEXT: s_mov_b32 s32, s3 -; FLATSCR-NEXT: s_add_i32 s3, s3, s2 -; FLATSCR-NEXT: scratch_load_dword v2, off, s3 +; FLATSCR-NEXT: s_add_i32 s2, s2, s3 +; FLATSCR-NEXT: scratch_load_dword v2, off, s2 ; FLATSCR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: v_add_u32_e32 v0, v2, v0 @@ -221,7 +219,7 @@ ; MUBUF-NEXT: s_mov_b32 s7, s33 ; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: s_add_u32 s32, s32, 0x400 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc ; MUBUF-NEXT: s_cbranch_execz BB2_3 ; MUBUF-NEXT: ; %bb.1: ; %bb.0 @@ -247,17 +245,17 @@ ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: global_store_dword v[0:1], v0, off ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x400 +; MUBUF-NEXT: s_addk_i32 s32, 0xfc00 ; MUBUF-NEXT: s_mov_b32 s33, s7 ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: func_non_entry_block_static_alloca_align4: ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_mov_b32 s4, s33 +; FLATSCR-NEXT: s_mov_b32 s3, s33 ; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_mov_b32 s33, s32 -; FLATSCR-NEXT: s_add_u32 s32, s32, 16 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc ; FLATSCR-NEXT: s_cbranch_execz BB2_3 ; FLATSCR-NEXT: ; %bb.1: ; %bb.0 @@ -265,16 +263,14 @@ ; FLATSCR-NEXT: s_and_b64 exec, exec, vcc ; FLATSCR-NEXT: s_cbranch_execz BB2_3 ; FLATSCR-NEXT: ; %bb.2: ; %bb.1 -; FLATSCR-NEXT: s_mov_b32 s2, s32 -; FLATSCR-NEXT: s_add_i32 s3, s2, 0x1000 -; FLATSCR-NEXT: s_add_u32 s2, s2, 0x1000 +; FLATSCR-NEXT: s_add_i32 s2, s32, 0x1000 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v3, 1 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[2:3], s2 -; FLATSCR-NEXT: v_lshl_add_u32 v2, v4, 2, s3 +; FLATSCR-NEXT: v_lshl_add_u32 v2, v4, 2, s2 ; FLATSCR-NEXT: scratch_load_dword v2, v2, off ; FLATSCR-NEXT: v_and_b32_e32 v3, 0x3ff, v5 -; FLATSCR-NEXT: s_mov_b32 s32, s3 +; FLATSCR-NEXT: s_mov_b32 s32, s2 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: v_add_u32_e32 v2, v2, v3 ; FLATSCR-NEXT: global_store_dword v[0:1], v2, off @@ -283,8 +279,8 @@ ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: global_store_dword v[0:1], v0, off ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_sub_u32 s32, s32, 16 -; FLATSCR-NEXT: s_mov_b32 s33, s4 +; FLATSCR-NEXT: s_add_i32 s32, s32, -16 +; FLATSCR-NEXT: s_mov_b32 s33, s3 ; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: @@ -319,10 +315,10 @@ ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MUBUF-NEXT: s_mov_b32 s7, s33 -; MUBUF-NEXT: s_add_u32 s33, s32, 0xfc0 +; MUBUF-NEXT: s_add_i32 s33, s32, 0xfc0 ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfffff000 ; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; MUBUF-NEXT: s_add_u32 s32, s32, 0x2000 +; MUBUF-NEXT: s_addk_i32 s32, 0x2000 ; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc ; MUBUF-NEXT: s_cbranch_execz BB3_2 ; MUBUF-NEXT: ; %bb.1: ; %bb.0 @@ -345,7 +341,7 @@ ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: global_store_dword v[0:1], v0, off ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_sub_u32 s32, s32, 0x2000 +; MUBUF-NEXT: s_addk_i32 s32, 0xe000 ; MUBUF-NEXT: s_mov_b32 s33, s7 ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; @@ -353,10 +349,10 @@ ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s3, s33 -; FLATSCR-NEXT: s_add_u32 s33, s32, 63 +; FLATSCR-NEXT: s_add_i32 s33, s32, 63 ; FLATSCR-NEXT: s_andn2_b32 s33, s33, 63 ; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; FLATSCR-NEXT: s_add_u32 s32, s32, 0x80 +; FLATSCR-NEXT: s_addk_i32 s32, 0x80 ; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc ; FLATSCR-NEXT: s_cbranch_execz BB3_2 ; FLATSCR-NEXT: ; %bb.1: ; %bb.0 @@ -377,7 +373,7 @@ ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: global_store_dword v[0:1], v0, off ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x80 +; FLATSCR-NEXT: s_addk_i32 s32, 0xff80 ; FLATSCR-NEXT: s_mov_b32 s33, s3 ; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -29,25 +29,25 @@ ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs ; CHECK: liveins: $vgpr1, $vgpr2 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc + ; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc - ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc + ; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc ; CHECK: $vgpr3 = COPY killed $sgpr33 - ; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc + ; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc ; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 - ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc ; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc + ; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK: S_ENDPGM 0, implicit $vcc @@ -81,18 +81,18 @@ ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr ; CHECK: liveins: $sgpr29, $vgpr1 ; CHECK: $sgpr29 = frame-setup COPY $sgpr33 - ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc - ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc + ; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc ; CHECK: $vgpr2 = COPY killed $sgpr33 - ; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc + ; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc ; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 - ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc ; CHECK: $sgpr33 = frame-destroy COPY $sgpr29 ; CHECK: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -125,16 +125,16 @@ ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64 ; CHECK: liveins: $sgpr28, $vgpr1 ; CHECK: $sgpr28 = frame-setup COPY $sgpr33 - ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc - ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK: $sgpr29 = S_ADD_U32 killed $sgpr29, 8192, implicit-def $scc + ; CHECK: $sgpr29 = S_ADD_I32 killed $sgpr29, 8192, implicit-def $scc ; CHECK: $vgpr2 = COPY killed $sgpr29 ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 - ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc ; CHECK: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -166,16 +166,16 @@ ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc ; CHECK: liveins: $sgpr28, $vgpr1 ; CHECK: $sgpr28 = frame-setup COPY $sgpr33 - ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc - ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $vcc_lo = S_MOV_B32 8192 ; CHECK: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 - ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc ; CHECK: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK: S_ENDPGM 0 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir @@ -25,44 +25,44 @@ ; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs ; MUBUF: liveins: $vgpr1, $vgpr2 ; MUBUF: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; MUBUF: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc + ; MUBUF: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; MUBUF: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; MUBUF: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; MUBUF: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc - ; MUBUF: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; MUBUF: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; MUBUF: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; MUBUF: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; MUBUF: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; MUBUF: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec ; MUBUF: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 - ; MUBUF: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; MUBUF: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc ; MUBUF: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; MUBUF: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; MUBUF: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc + ; MUBUF: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; MUBUF: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; MUBUF: S_ENDPGM 0, implicit $vcc ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs ; FLATSCR: liveins: $vgpr1, $vgpr2 ; FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc + ; FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc + ; FLATSCR: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc - ; FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc + ; FLATSCR: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def $scc ; FLATSCR: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec - ; FLATSCR: $sgpr33 = S_ADD_U32 $sgpr33, 8192, implicit-def $scc + ; FLATSCR: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc ; FLATSCR: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 - ; FLATSCR: $sgpr33 = S_SUB_U32 $sgpr33, 8192, implicit-def $scc - ; FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc + ; FLATSCR: $sgpr33 = S_ADD_I32 $sgpr33, -8192, implicit-def $scc + ; FLATSCR: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def $scc ; FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc + ; FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc ; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; FLATSCR: S_ENDPGM 0, implicit $vcc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -27,13 +27,13 @@ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; CHECK: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 262080, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def $scc - ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 524288, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 524288, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 - ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 524288, implicit-def $scc + ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -524288, implicit-def $scc ; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.2, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -26,71 +26,71 @@ ; GFX8-LABEL: name: pei_scavenge_vgpr_spill ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 ; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc + ; GFX8: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX8: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; GFX8: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc - ; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; GFX8: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; GFX8: $sgpr7 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc + ; GFX8: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) ; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8: $vcc_lo = S_MOV_B32 8192 ; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec ; GFX8: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec - ; GFX8: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; GFX8: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc ; GFX8: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc + ; GFX8: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc ; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX8: $sgpr4 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc + ; GFX8: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc ; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) ; GFX8: S_ENDPGM 0, csr_amdgpu_allvgprs ; GFX9-LABEL: name: pei_scavenge_vgpr_spill ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 ; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX9: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc + ; GFX9: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX9: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; GFX9: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc - ; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; GFX9: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; GFX9: $sgpr7 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc + ; GFX9: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) ; GFX9: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX9: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec ; GFX9: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec - ; GFX9: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; GFX9: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc ; GFX9: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX9: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc + ; GFX9: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX9: $sgpr4 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc + ; GFX9: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) ; GFX9: S_ENDPGM 0, csr_amdgpu_allvgprs ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 ; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc + ; GFX9-FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) ; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc + ; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX9-FLATSCR: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def $scc - ; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 24576, implicit-def $scc + ; GFX9-FLATSCR: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def $scc ; GFX9-FLATSCR: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec - ; GFX9-FLATSCR: $vcc_hi = S_ADD_U32 $sgpr33, 8192, implicit-def $scc + ; GFX9-FLATSCR: $vcc_hi = S_ADD_I32 $sgpr33, 8192, implicit-def $scc ; GFX9-FLATSCR: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec - ; GFX9-FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc + ; GFX9-FLATSCR: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def $scc ; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc + ; GFX9-FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5) ; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -602,7 +602,7 @@ ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 - ; GCN64-MUBUF: $sgpr2 = S_ADD_U32 $sgpr33, 262144, implicit-def $scc + ; GCN64-MUBUF: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def $scc ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, align 4096, addrspace 5) ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 @@ -764,7 +764,7 @@ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 - ; GCN32-MUBUF: $sgpr1 = S_ADD_U32 $sgpr33, 131072, implicit-def $scc + ; GCN32-MUBUF: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def $scc ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, align 4096, addrspace 5) ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 @@ -922,7 +922,7 @@ ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 - ; GCN64-FLATSCR: $sgpr9 = S_ADD_U32 $sgpr33, 4096, implicit-def $scc + ; GCN64-FLATSCR: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def $scc ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, align 4096, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 @@ -1129,7 +1129,7 @@ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $sgpr2 = S_ADD_U32 $sgpr33, 262144, implicit-def $scc + ; GCN64-MUBUF: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def $scc ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, align 4096, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) @@ -1265,7 +1265,7 @@ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $sgpr1 = S_ADD_U32 $sgpr33, 131072, implicit-def $scc + ; GCN32-MUBUF: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def $scc ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, align 4096, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) @@ -1397,7 +1397,7 @@ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $sgpr9 = S_ADD_U32 $sgpr33, 4096, implicit-def $scc + ; GCN64-FLATSCR: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def $scc ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, align 4096, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -200,7 +200,7 @@ ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec ; GCN: s_mov_b32 s33, s32 -; GCN-DAG: s_add_u32 s32, s32, 0x400 +; GCN-DAG: s_addk_i32 s32, 0x400 ; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill @@ -224,7 +224,7 @@ ; GCN-DAG: v_readlane_b32 s34, v42, 0 ; GCN-DAG: v_readlane_b32 s35, v42, 1 -; GCN: s_sub_u32 s32, s32, 0x400 +; GCN: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll --- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll @@ -78,10 +78,10 @@ ; 0x40000 / 64 = 4096 (for wave64) %a = load volatile i32, i32 addrspace(5)* %aptr - ; MUBUF: s_add_u32 s32, s32, 0x40000 + ; MUBUF: s_add_i32 s32, s32, 0x40000 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Spill - ; MUBUF: s_sub_u32 s32, s32, 0x40000 - ; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000 + ; MUBUF: s_add_i32 s32, s32, 0xfffc0000 + ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1000 ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a) @@ -97,10 +97,10 @@ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 - ; MUBUF: s_add_u32 s32, s32, 0x40000 + ; MUBUF: s_add_i32 s32, s32, 0x40000 ; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Reload - ; MUBUF: s_sub_u32 s32, s32, 0x40000 - ; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0x1000 + ; MUBUF: s_add_i32 s32, s32, 0xfffc0000 + ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1000 ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload ; Force %a to spill with no free SGPRs @@ -202,9 +202,9 @@ %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 ; 0x40000 / 64 = 4096 (for wave64) - ; MUBUF: s_add_u32 s4, s32, 0x40000 + ; MUBUF: s_add_i32 s4, s32, 0x40000 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill - ; FLATSCR: s_add_u32 s0, s32, 0x1000 + ; FLATSCR: s_add_i32 s0, s32, 0x1000 ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s0 ; 4-byte Folded Spill %a = load volatile i32, i32 addrspace(5)* %aptr @@ -257,7 +257,7 @@ %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* ; 0x3ff00 / 64 = 4092 (for wave64) - ; MUBUF: s_add_u32 s4, s32, 0x3ff00 + ; MUBUF: s_add_i32 s4, s32, 0x3ff00 ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4092 ; 8-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -45,11 +45,12 @@ } ; CHECK-LABEL: test_limited_sgpr -; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] -; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] +; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] +; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] +; GFX6: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32 -; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] +; GFX6-NEXT: s_add_i32 s32, s32, 0x[[OFFSET:[0-9a-f]+]] ; GFX6: NumSgprs: 48 ; GFX6: ScratchSize: 8608 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -6,7 +6,7 @@ define amdgpu_kernel void @max_alignment_128() #0 { ; VI-LABEL: max_alignment_128: ; VI: ; %bb.0: -; VI-NEXT: s_add_u32 s4, s4, s7 +; VI-NEXT: s_add_i32 s4, s4, s7 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; VI-NEXT: s_add_u32 s0, s0, s7 ; VI-NEXT: s_addc_u32 s1, s1, 0 @@ -110,7 +110,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 { ; VI-LABEL: stackrealign_attr: ; VI: ; %bb.0: -; VI-NEXT: s_add_u32 s4, s4, s7 +; VI-NEXT: s_add_i32 s4, s4, s7 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; VI-NEXT: s_add_u32 s0, s0, s7 ; VI-NEXT: s_addc_u32 s1, s1, 0 @@ -214,7 +214,7 @@ define amdgpu_kernel void @alignstack_attr() #2 { ; VI-LABEL: alignstack_attr: ; VI: ; %bb.0: -; VI-NEXT: s_add_u32 s4, s4, s7 +; VI-NEXT: s_add_i32 s4, s4, s7 ; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; VI-NEXT: s_add_u32 s0, s0, s7 ; VI-NEXT: s_addc_u32 s1, s1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -32,17 +32,17 @@ } ; GCN-LABEL: {{^}}needs_align16_stack_align4: -; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}} +; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}} ; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffffc00 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: v_or_b32_e32 v{{[0-9]+}}, 12 -; GCN: s_add_u32 s32, s32, 0x2800{{$}} +; GCN: s_addk_i32 s32, 0x2800{{$}} ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen -; GCN: s_sub_u32 s32, s32, 0x2800 +; GCN: s_addk_i32 s32, 0xd800 ; GCN: ; ScratchSize: 160 define void @needs_align16_stack_align4(i32 %idx) #2 { @@ -53,17 +53,17 @@ } ; GCN-LABEL: {{^}}needs_align32: -; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}} +; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}} ; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffff800 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: v_or_b32_e32 v{{[0-9]+}}, 12 -; GCN: s_add_u32 s32, s32, 0x3000{{$}} +; GCN: s_addk_i32 s32, 0x3000{{$}} ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen -; GCN: s_sub_u32 s32, s32, 0x3000 +; GCN: s_addk_i32 s32, 0xd000 ; GCN: ; ScratchSize: 192 define void @needs_align32(i32 %idx) #0 { @@ -74,12 +74,12 @@ } ; GCN-LABEL: {{^}}force_realign4: -; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}} +; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}} ; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffffff00 -; GCN: s_add_u32 s32, s32, 0xd00{{$}} +; GCN: s_addk_i32 s32, 0xd00{{$}} ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen -; GCN: s_sub_u32 s32, s32, 0xd00 +; GCN: s_addk_i32 s32, 0xf300 ; GCN: ; ScratchSize: 52 define void @force_realign4(i32 %idx) #1 { @@ -125,12 +125,12 @@ ; GCN-LABEL: {{^}}default_realign_align128: ; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 -; GCN-NEXT: s_add_u32 s33, s32, 0x1fc0 +; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000 -; GCN-NEXT: s_add_u32 s32, s32, 0x4000 +; GCN-NEXT: s_addk_i32 s32, 0x4000 ; GCN-NOT: s33 ; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}} -; GCN: s_sub_u32 s32, s32, 0x4000 +; GCN: s_addk_i32 s32, 0xc000 ; GCN: s_mov_b32 s33, [[FP_COPY]] define void @default_realign_align128(i32 %idx) #0 { %alloca.align = alloca i32, align 128, addrspace(5) @@ -159,7 +159,7 @@ ; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 [[VGPR_REG]], s33, 2 -; GCN-DAG: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0 +; GCN-DAG: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0 ; GCN-DAG: v_writelane_b32 [[VGPR_REG]], s34, 3 ; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000 ; GCN: s_mov_b32 s34, s32 @@ -167,11 +167,11 @@ ; GCN: buffer_store_dword v32, off, s[0:3], s33 offset:1024 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34 -; GCN-DAG: s_add_u32 s32, s32, 0x30000 +; GCN-DAG: s_add_i32 s32, s32, 0x30000 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: s_sub_u32 s32, s32, 0x30000 +; GCN: s_add_i32 s32, s32, 0xfffd0000 ; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -193,17 +193,17 @@ ; GCN-LABEL: needs_align1024_stack_args_used_inside_loop: ; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 -; GCN-NEXT: s_add_u32 s33, s32, 0xffc0 +; GCN-NEXT: s_add_i32 s33, s32, 0xffc0 ; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34 ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000 ; GCN-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0 ; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34 -; GCN: s_add_u32 s32, s32, 0x30000 +; GCN: s_add_i32 s32, s32, 0x30000 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:1024 ; GCN: buffer_load_dword v{{[0-9]+}}, [[VGPR_REG]], s[0:3], 0 offen ; GCN: v_add_u32_e32 [[VGPR_REG]], vcc, 4, [[VGPR_REG]] -; GCN: s_sub_u32 s32, s32, 0x30000 +; GCN: s_add_i32 s32, s32, 0xfffd0000 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] ; GCN-NEXT: s_mov_b32 s34, [[BP_COPY]] ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -290,16 +290,16 @@ ; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset ; GCN: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: s_add_u32 s6, s32, 0x42100 +; GCN-NEXT: s_add_i32 s6, s32, 0x42100 ; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, s33 ; GCN-NOT: v_mov_b32_e32 v0, 0x1088 -; GCN-NEXT: s_add_u32 s6, s32, 0x42200 +; GCN-NEXT: s_add_i32 s6, s32, 0x42200 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GCN-NEXT: v_mov_b32_e32 v0, s34 ; GCN-NOT: v_mov_b32_e32 v0, 0x108c -; GCN-NEXT: s_add_u32 s6, s32, 0x42300 +; GCN-NEXT: s_add_i32 s6, s32, 0x42300 ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill %local_val = alloca i32, align 128, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -11,7 +11,7 @@ ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] @@ -53,7 +53,7 @@ ; GCN-NEXT: BB0_7: ; %UnifiedReturnBlock ; GCN-NEXT: v_readlane_b32 s4, v40, 0 ; GCN-NEXT: v_readlane_b32 s5, v40, 1 -; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload @@ -191,7 +191,7 @@ ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_writelane_b32 v43, s33, 4 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_u32 s32, s32, 0x800 +; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1129,8 +1129,8 @@ ; GCN-NEXT: v_writelane_b32 v40, s33, 2 ; GCN: s_mov_b32 s33, s32 -; GFX1064: s_add_u32 s32, s32, 0x400 -; GFX1032: s_add_u32 s32, s32, 0x200 +; GFX1064: s_addk_i32 s32, 0x400 +; GFX1032: s_addk_i32 s32, 0x200 ; GCN-DAG: v_writelane_b32 v40, s30, 0 @@ -1140,8 +1140,8 @@ ; GCN-DAG: v_readlane_b32 s5, v40, 1 -; GFX1064: s_sub_u32 s32, s32, 0x400 -; GFX1032: s_sub_u32 s32, s32, 0x200 +; GFX1064: s_addk_i32 s32, 0xfc00 +; GFX1032: s_addk_i32 s32, 0xfe00 ; GCN: v_readlane_b32 s33, v40, 2 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -354,7 +354,7 @@ ; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11] ; GFX9-O0-NEXT: v_writelane_b32 v3, s33, 7 ; GFX9-O0-NEXT: s_mov_b32 s33, s32 -; GFX9-O0-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-O0-NEXT: s_addk_i32 s32, 0x400 ; GFX9-O0-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-O0-NEXT: v_writelane_b32 v3, s8, 2 @@ -395,7 +395,7 @@ ; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[4:7], s8 offset:4 -; GFX9-O0-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-O0-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 7 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -414,7 +414,7 @@ ; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11] ; GFX9-O3-NEXT: s_mov_b32 s14, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 -; GFX9-O3-NEXT: s_add_u32 s32, s32, 0x400 +; GFX9-O3-NEXT: s_addk_i32 s32, 0x400 ; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8 ; GFX9-O3-NEXT: s_not_b64 exec, exec @@ -431,7 +431,7 @@ ; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 -; GFX9-O3-NEXT: s_sub_u32 s32, s32, 0x400 +; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-O3-NEXT: s_mov_b32 s33, s14 ; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload @@ -555,7 +555,7 @@ ; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11] ; GFX9-O0-NEXT: v_writelane_b32 v11, s33, 9 ; GFX9-O0-NEXT: s_mov_b32 s33, s32 -; GFX9-O0-NEXT: s_add_u32 s32, s32, 0xc00 +; GFX9-O0-NEXT: s_addk_i32 s32, 0xc00 ; GFX9-O0-NEXT: v_writelane_b32 v11, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v11, s31, 1 ; GFX9-O0-NEXT: v_writelane_b32 v11, s9, 2 @@ -621,7 +621,7 @@ ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s8, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], s8 offset:4 -; GFX9-O0-NEXT: s_sub_u32 s32, s32, 0xc00 +; GFX9-O0-NEXT: s_addk_i32 s32, 0xf400 ; GFX9-O0-NEXT: v_readlane_b32 s33, v11, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload @@ -663,7 +663,7 @@ ; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11] ; GFX9-O3-NEXT: s_mov_b32 s14, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 -; GFX9-O3-NEXT: s_add_u32 s32, s32, 0x800 +; GFX9-O3-NEXT: s_addk_i32 s32, 0x800 ; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9 @@ -688,7 +688,7 @@ ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 -; GFX9-O3-NEXT: s_sub_u32 s32, s32, 0x800 +; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-O3-NEXT: s_mov_b32 s33, s14 ; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload