Index: llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -48,6 +48,7 @@ void shrinkMIMG(MachineInstr &MI) const; void shrinkMadFma(MachineInstr &MI) const; bool shrinkScalarLogicOp(MachineInstr &MI) const; + bool tryReplaceDeadSDST(MachineInstr &MI) const; bool instAccessReg(iterator_range &&R, Register Reg, unsigned SubReg) const; bool instReadsReg(const MachineInstr *MI, unsigned Reg, @@ -689,6 +690,22 @@ return nullptr; } +// If an instruction has dead sdst replace it with NULL register on gfx10+ +bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const { + if (ST->getGeneration() < AMDGPUSubtarget::GFX10) + return false; + + MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + if (!Op) + return false; + Register SDstReg = Op->getReg(); + if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg)) + return false; + + Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64); + return true; +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -822,15 +839,21 @@ continue; } - if (!TII->hasVALU32BitEncoding(MI.getOpcode())) + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) { + // If there is no chance we will shrink it and use VCC as sdst to get + // a 32 bit form try to replace dead sdst with NULL. + tryReplaceDeadSDST(MI); continue; + } if (!TII->canShrink(MI, *MRI)) { // Try commuting the instruction and see if that enables us to shrink // it. if (!MI.isCommutable() || !TII->commuteInstruction(MI) || - !TII->canShrink(MI, *MRI)) + !TII->canShrink(MI, *MRI)) { + tryReplaceDeadSDST(MI); continue; + } } int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll @@ -220,7 +220,7 @@ ; ; GFX10-LABEL: div_scale_s_s_true: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_div_scale_f32 v0, s0, s2, s3, s2 +; GFX10-NEXT: v_div_scale_f32 v0, null, s2, s3, s2 ; GFX10-NEXT: ; return to shader part epilog %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true) %result = extractvalue { float, i1 } %div.scale, 0 @@ -236,7 +236,7 @@ ; ; GFX10-LABEL: div_scale_s_s_false: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_div_scale_f32 v0, s0, s3, s3, s2 +; GFX10-NEXT: v_div_scale_f32 v0, null, s3, s3, s2 ; GFX10-NEXT: ; return to shader part epilog %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false) %result = extractvalue { float, i1 } %div.scale, 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll @@ -84,7 +84,7 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-IEEE-NEXT: v_fma_f32 v4, -v2, v3, 1.0 @@ -101,7 +101,7 @@ ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -188,7 +188,7 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-IEEE-NEXT: v_fma_f32 v4, -v2, v3, 1.0 @@ -289,7 +289,7 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v2, v1 ; GFX10-IEEE-NEXT: v_fma_f32 v3, -v1, v2, 1.0 @@ -306,7 +306,7 @@ ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -397,7 +397,7 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v2, v1 ; GFX10-IEEE-NEXT: v_fma_f32 v3, -v1, v2, 1.0 @@ -414,7 +414,7 @@ ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -560,7 +560,7 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-IEEE-NEXT: v_fma_f32 v4, -v2, v3, 1.0 @@ -710,8 +710,8 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 +; GFX10-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v6, v4 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v7, v5 @@ -739,7 +739,7 @@ ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v6, vcc_lo, v0, v2, v0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -750,7 +750,7 @@ ; GFX10-FLUSH-NEXT: v_fmac_f32_e32 v7, v8, v5 ; GFX10-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 ; GFX10-FLUSH-NEXT: s_denorm_mode 0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v6, s4, v3, v3, v1 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v6, null, v3, v3, v1 ; GFX10-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v5, v6 ; GFX10-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -872,8 +872,8 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 +; GFX10-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v6, v4 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v7, v5 @@ -1040,8 +1040,8 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v3, s4, v1, v1, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v3, null, v1, v1, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v4, v2 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v5, v3 @@ -1069,7 +1069,7 @@ ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -1080,7 +1080,7 @@ ; GFX10-FLUSH-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 ; GFX10-FLUSH-NEXT: s_denorm_mode 0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, s4, v1, v1, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, null, v1, v1, 1.0 ; GFX10-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v4 ; GFX10-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0 @@ -1222,8 +1222,8 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v3, s4, v1, v1, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v3, null, v1, v1, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v4, v2 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v5, v3 @@ -1251,7 +1251,7 @@ ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -1262,7 +1262,7 @@ ; GFX10-FLUSH-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 ; GFX10-FLUSH-NEXT: s_denorm_mode 0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, s4, v1, v1, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, null, v1, v1, 1.0 ; GFX10-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v4 ; GFX10-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0 @@ -1459,8 +1459,8 @@ ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 +; GFX10-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v6, v4 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v7, v5 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -67,7 +67,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 @@ -170,7 +170,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 @@ -243,7 +243,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -316,7 +316,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -420,7 +420,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -523,7 +523,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 @@ -634,8 +634,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] -; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] +; GFX10-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] ; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] @@ -804,8 +804,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] -; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] +; GFX10-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] ; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] @@ -928,8 +928,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 -; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] @@ -1052,8 +1052,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 -; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] @@ -1223,8 +1223,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 -; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] @@ -1393,8 +1393,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] -; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] +; GFX10-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] ; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll @@ -54,7 +54,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, v2, v2, v1 +; GFX10-NEXT: v_div_scale_f32 v0, null, v2, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm @@ -121,7 +121,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, v1, v2, v1 +; GFX10-NEXT: v_div_scale_f32 v0, null, v1, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm @@ -193,7 +193,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] offset:8 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm @@ -265,7 +265,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] offset:8 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, v[0:1], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm @@ -328,7 +328,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, v0, v0, s0 +; GFX10-NEXT: v_div_scale_f32 v0, null, v0, v0, s0 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -388,7 +388,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, s0, v0, s0 +; GFX10-NEXT: v_div_scale_f32 v0, null, s0, v0, s0 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -448,7 +448,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, s0, s0, v0 +; GFX10-NEXT: v_div_scale_f32 v0, null, s0, s0, v0 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -508,7 +508,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, v0, s0, v0 +; GFX10-NEXT: v_div_scale_f32 v0, null, v0, s0, v0 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -568,7 +568,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, v[0:1], v[0:1], s[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], v[0:1], s[0:1] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -628,7 +628,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, s[0:1], v[0:1], s[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], v[0:1], s[0:1] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -688,7 +688,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, s[0:1], s[0:1], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], v[0:1] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -748,7 +748,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, v[0:1], s[0:1], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], s[0:1], v[0:1] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -797,7 +797,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, s5, s5, s4 +; GFX10-NEXT: v_div_scale_f32 v0, null, s5, s5, s4 ; GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) @@ -841,7 +841,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, s4, s5, s4 +; GFX10-NEXT: v_div_scale_f32 v0, null, s4, s5, s4 ; GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) @@ -887,7 +887,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[4:5], s[4:5], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[4:5], s[4:5], s[2:3] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) @@ -933,7 +933,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[2:3], s[4:5], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[2:3], s[4:5], s[2:3] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) @@ -985,7 +985,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, v0, v0, 1.0 +; GFX10-NEXT: v_div_scale_f32 v0, null, v0, v0, 1.0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1041,7 +1041,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, 2.0, 2.0, v0 +; GFX10-NEXT: v_div_scale_f32 v0, null, 2.0, 2.0, v0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1108,7 +1108,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_div_scale_f32 v0, s2, v2, v2, v0 +; GFX10-NEXT: v_div_scale_f32 v0, null, v2, v2, v0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1179,7 +1179,7 @@ ; GFX10-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v2 -; GFX10-NEXT: v_div_scale_f32 v0, s2, v0, v0, v1 +; GFX10-NEXT: v_div_scale_f32 v0, null, v0, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm @@ -1226,7 +1226,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, s0, s0, 0x41000000 +; GFX10-NEXT: v_div_scale_f32 v0, null, s0, s0, 0x41000000 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false) @@ -1263,7 +1263,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, 0x41000000, 0x41000000, s0 +; GFX10-NEXT: v_div_scale_f32 v0, null, 0x41000000, 0x41000000, s0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false) @@ -1298,7 +1298,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, s0, s0, s0 +; GFX10-NEXT: v_div_scale_f32 v0, null, s0, s0, s0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false) @@ -1337,7 +1337,7 @@ ; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: s_mov_b32 s3, 0x40200000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], s[2:3] ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -384,7 +384,7 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 ; GFX10-NEXT: v_mul_lo_u32 v3, v4, v3 ; GFX10-NEXT: v_mul_lo_u32 v2, v5, v2 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v2 @@ -514,10 +514,10 @@ ; GFX10-NEXT: v_mul_lo_u32 v2, v2, v3 ; GFX10-NEXT: v_mul_lo_u32 v5, v6, v5 ; GFX10-NEXT: v_mul_lo_u32 v8, v7, v4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v6, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v6, v3, 0 ; GFX10-NEXT: v_add3_u32 v2, v5, v8, v2 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v6, v4, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v7, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v7, v3, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i96 %num, %den ret i96 %result @@ -769,12 +769,12 @@ ; GFX10-NEXT: v_mov_b32_e32 v9, v1 ; GFX10-NEXT: v_mov_b32_e32 v10, v2 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, v4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v8, v6, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v8, v6, 0 ; GFX10-NEXT: v_mul_lo_u32 v7, v8, v7 ; GFX10-NEXT: v_mul_lo_u32 v6, v9, v6 -; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v9, v5, v[0:1] -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v8, v4, 0 -; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v10, v4, v[11:12] +; GFX10-NEXT: v_mad_u64_u32 v[11:12], null, v9, v5, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v8, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[11:12], null, v10, v4, v[11:12] ; GFX10-NEXT: v_mov_b32_e32 v2, v11 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], vcc_lo, v8, v5, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v5, v10, v5 @@ -1813,24 +1813,24 @@ ; GFX10-NEXT: v_mul_lo_u32 v27, v6, v9 ; GFX10-NEXT: v_mul_lo_u32 v28, v5, v10 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, v8 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v16, v14, 0 -; GFX10-NEXT: v_mad_u64_u32 v[18:19], s4, v16, v12, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v16, v14, 0 +; GFX10-NEXT: v_mad_u64_u32 v[18:19], null, v16, v12, 0 ; GFX10-NEXT: v_mul_lo_u32 v30, v17, v14 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v13, v[0:1] -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v2, v12, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v17, v13, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[18:19], s4, v17, v11, v[18:19] ; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, 1, s4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, v3, v11, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v2, v12, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v2, v10, v[18:19] ; GFX10-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[20:21], s4, v16, v10, 0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v10, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[20:21], null, v16, v10, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v3, v11, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v3, v9, v[18:19] ; GFX10-NEXT: v_add_co_ci_u32_e32 v24, vcc_lo, 0, v22, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v5, v9, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v10, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v4, v8, v[18:19] ; GFX10-NEXT: v_add_co_ci_u32_e32 v26, vcc_lo, 0, v24, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[22:23], s4, v6, v8, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v5, v9, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[22:23], null, v6, v8, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v9, v[20:21] ; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 ; GFX10-NEXT: v_mov_b32_e32 v20, v22 @@ -1842,7 +1842,7 @@ ; GFX10-NEXT: v_mul_lo_u32 v22, v16, v15 ; GFX10-NEXT: v_mad_u64_u32 v[24:25], vcc_lo, v17, v12, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[14:15], s6, v16, v11, v[19:20] -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, v16, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v16, v8, 0 ; GFX10-NEXT: v_mul_lo_u32 v20, v4, v11 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s6 ; GFX10-NEXT: v_mad_u64_u32 v[18:19], s5, v2, v11, v[24:25] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -488,7 +488,7 @@ ; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX10-NEXT: v_mul_lo_u32 v4, s10, v2 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s14, s10, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s10, v3, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s11, v3 ; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 ; GFX10-NEXT: v_add3_u32 v1, v1, v4, v5 @@ -514,7 +514,7 @@ ; GFX10-NEXT: v_add3_u32 v1, v5, v4, v1 ; GFX10-NEXT: v_mul_lo_u32 v4, s11, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s14, s10, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s10, v3, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s10, v2 ; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 @@ -561,7 +561,7 @@ ; GFX10-NEXT: v_mul_lo_u32 v4, s9, v2 ; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, 1 ; GFX10-NEXT: v_add3_u32 v3, v3, v0, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s10, s8, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s8, v2, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s8, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 @@ -1930,7 +1930,9 @@ ; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s10 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_xor_b64 s[14:15], s[6:7], s[12:13] +; GFX10-NEXT: s_sub_u32 s3, 0, s10 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; GFX10-NEXT: s_subb_u32 s6, 0, s11 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 @@ -1945,18 +1947,16 @@ ; GFX10-NEXT: v_trunc_f32_e32 v4, v4 ; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v0 ; GFX10-NEXT: v_mul_f32_e32 v2, 0xcf800000, v4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s3, s20, v6, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s20, v6, 0 ; GFX10-NEXT: v_mul_lo_u32 v8, s21, v6 ; GFX10-NEXT: v_add_f32_e32 v2, v2, v3 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v4 -; GFX10-NEXT: s_sub_u32 s3, 0, s10 -; GFX10-NEXT: s_subb_u32 s6, 0, s11 ; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 ; GFX10-NEXT: v_mul_lo_u32 v9, s3, v3 ; GFX10-NEXT: v_add3_u32 v7, v1, v7, v8 ; GFX10-NEXT: v_mul_lo_u32 v10, v5, v0 ; GFX10-NEXT: v_mul_hi_u32 v11, v6, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s7, s3, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, s3, v4, 0 ; GFX10-NEXT: v_mul_lo_u32 v8, s6, v4 ; GFX10-NEXT: v_mul_lo_u32 v12, v6, v7 ; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0 @@ -1980,8 +1980,8 @@ ; GFX10-NEXT: v_add_nc_u32_e32 v10, v12, v10 ; GFX10-NEXT: v_add_co_u32 v8, s7, v8, v15 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s7 -; GFX10-NEXT: v_mul_hi_u32 v16, v4, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11 +; GFX10-NEXT: v_mul_hi_u32 v16, v4, v2 ; GFX10-NEXT: v_add_co_u32 v1, s7, v14, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s7 ; GFX10-NEXT: v_add_co_u32 v0, s7, v0, v10 @@ -1990,14 +1990,14 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s7 ; GFX10-NEXT: v_add_co_u32 v9, s7, v1, v16 ; GFX10-NEXT: v_add3_u32 v7, v11, v10, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 ; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v8, v12, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v5, v7, vcc_lo ; GFX10-NEXT: v_mul_hi_u32 v2, v3, v2 -; GFX10-NEXT: v_add_nc_u32_e32 v10, v13, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s7, s20, v6, 0 ; GFX10-NEXT: v_add_co_u32 v7, s7, v9, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v10, v13, v1 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s20, v6, 0 ; GFX10-NEXT: v_mul_lo_u32 v9, s21, v6 ; GFX10-NEXT: v_mul_lo_u32 v11, s20, v5 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s7 @@ -2009,7 +2009,7 @@ ; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v2, vcc_lo ; GFX10-NEXT: v_mul_lo_u32 v12, v6, v7 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s7, s3, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, s3, v4, 0 ; GFX10-NEXT: v_mul_lo_u32 v9, s6, v4 ; GFX10-NEXT: v_mul_lo_u32 v11, s3, v3 ; GFX10-NEXT: v_mul_lo_u32 v13, v5, v7 @@ -2074,7 +2074,7 @@ ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX10-NEXT: v_mul_hi_u32 v8, s14, v4 ; GFX10-NEXT: v_add3_u32 v2, v6, v1, v2 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s3, s8, v9, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s8, v9, 0 ; GFX10-NEXT: v_mul_lo_u32 v6, s9, v9 ; GFX10-NEXT: v_mul_lo_u32 v7, s8, v5 ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v3, v2, vcc_lo @@ -2126,7 +2126,7 @@ ; GFX10-NEXT: v_add3_u32 v2, v3, v1, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v6, v19, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v6, v7, v20, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s10, v4, 0 ; GFX10-NEXT: v_mul_lo_u32 v7, s10, v2 ; GFX10-NEXT: v_mul_lo_u32 v11, s11, v4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v17 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -403,7 +403,7 @@ ; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX10-NEXT: v_mul_lo_u32 v4, s0, v2 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v3, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s1, v3 ; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 ; GFX10-NEXT: v_add3_u32 v1, v1, v4, v5 @@ -429,7 +429,7 @@ ; GFX10-NEXT: v_add3_u32 v1, v5, v4, v1 ; GFX10-NEXT: v_mul_lo_u32 v4, s1, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v3, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s0, v2 ; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 @@ -476,7 +476,7 @@ ; GFX10-NEXT: v_mul_lo_u32 v4, s11, v2 ; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, 1 ; GFX10-NEXT: v_add3_u32 v3, v3, v0, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s10, v2, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s10, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 @@ -1553,9 +1553,9 @@ ; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, s0, v4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s0, v5, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v5, 0 ; GFX10-NEXT: v_mul_lo_u32 v9, s1, v5 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s6, s2, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], null, s2, v8, 0 ; GFX10-NEXT: v_mul_lo_u32 v11, s3, v8 ; GFX10-NEXT: v_add3_u32 v1, v1, v7, v9 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v0 @@ -1599,14 +1599,15 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s6 ; GFX10-NEXT: v_add3_u32 v1, v9, v7, v1 ; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v5, v0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX10-NEXT: v_add3_u32 v3, v11, v10, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v6, v3, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s0, v5, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v5, 0 ; GFX10-NEXT: v_mul_lo_u32 v7, s1, v5 ; GFX10-NEXT: v_mul_lo_u32 v9, s0, v4 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s2, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], null, s2, v8, 0 ; GFX10-NEXT: v_mul_lo_u32 v10, s3, v8 ; GFX10-NEXT: v_mul_lo_u32 v11, s2, v6 ; GFX10-NEXT: v_mul_lo_u32 v12, v4, v0 @@ -1682,9 +1683,9 @@ ; GFX10-NEXT: v_mul_hi_u32 v11, s10, v3 ; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v7 ; GFX10-NEXT: v_add3_u32 v5, v5, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s12, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s12, v4, 0 ; GFX10-NEXT: v_mul_lo_u32 v12, s13, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 ; GFX10-NEXT: v_mul_lo_u32 v13, s12, v5 ; GFX10-NEXT: v_add_co_u32 v2, s0, v10, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 @@ -1695,14 +1696,13 @@ ; GFX10-NEXT: v_add3_u32 v1, v1, v13, v12 ; GFX10-NEXT: v_add_nc_u32_e32 v6, v7, v6 ; GFX10-NEXT: v_mul_hi_u32 v3, s11, v3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v9, 0 ; GFX10-NEXT: v_add_nc_u32_e32 v7, v10, v8 ; GFX10-NEXT: v_sub_nc_u32_e32 v8, s9, v1 ; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, s8, v0 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v11, s0, s9, v1, vcc_lo ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, s13, v8, vcc_lo ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s12, v10 -; GFX10-NEXT: v_mov_b32_e32 v9, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v10, s12 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v12, s0, 0, v0, vcc_lo @@ -1722,25 +1722,25 @@ ; GFX10-NEXT: v_add_co_u32 v6, s0, v2, v6 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX10-NEXT: v_add_co_u32 v15, s0, v16, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v17, s0 -; GFX10-NEXT: v_add3_u32 v3, v7, v1, v3 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v6, 0 ; GFX10-NEXT: v_mul_lo_u32 v19, s15, v6 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 +; GFX10-NEXT: v_add3_u32 v3, v7, v1, v3 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, s14, v6, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v17, s0 ; GFX10-NEXT: v_mul_lo_u32 v7, s14, v3 ; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v16, s0, v8, s12 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v20, s0, 0, v0, s0 -; GFX10-NEXT: v_add3_u32 v2, v2, v7, v19 -; GFX10-NEXT: v_sub_co_u32 v7, s0, s10, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v13 +; GFX10-NEXT: v_add3_u32 v2, v2, v7, v19 +; GFX10-NEXT: v_sub_co_u32 v7, s0, s10, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v15, vcc_lo ; GFX10-NEXT: v_sub_co_ci_u32_e64 v13, s1, s11, v2, s0 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, s11, v2 ; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v14 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v15, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s2, s15, v13 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v17, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s2, s15, v13 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s0, s15, v2, s0 ; GFX10-NEXT: v_cmp_le_u32_e64 s0, s14, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v16, s1 Index: llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll +++ llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll @@ -271,7 +271,7 @@ ; GFX10W64-NEXT: s_waitcnt vmcnt(0) ; GFX10W64-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1] +; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1] ; GFX10W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W64-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W64-NEXT: s_endpgm @@ -300,7 +300,7 @@ ; GFX10W32-NEXT: s_waitcnt vmcnt(0) ; GFX10W32-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1] +; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1] ; GFX10W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W32-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W32-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -292,7 +292,7 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s6, -1 -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1] ; GFX1064-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX1064-NEXT: s_endpgm ; @@ -329,7 +329,7 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v0, s[0:1] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, s2, v0, s[0:1] ; GFX1032-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX1032-NEXT: s_endpgm entry: @@ -705,7 +705,7 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -742,7 +742,7 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -922,8 +922,8 @@ ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s6, -1 -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v2, s[0:1] -; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v2, v[1:2] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[0:1] +; GFX1064-NEXT: v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2] ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX1064-NEXT: s_endpgm ; @@ -965,8 +965,8 @@ ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v2, s[0:1] -; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s0, s3, v2, v[1:2] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[0:1] +; GFX1032-NEXT: v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2] ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX1032-NEXT: s_endpgm entry: @@ -2043,12 +2043,12 @@ ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v2, 0 -; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 -; GFX1064-NEXT: s_mov_b32 s6, -1 -; GFX1064-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s3, v2, v[4:5] +; GFX1064-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 ; GFX1064-NEXT: v_readfirstlane_b32 s0, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1064-NEXT: s_mov_b32 s6, -1 +; GFX1064-NEXT: v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5] ; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s0, v3 ; GFX1064-NEXT: v_mov_b32_e32 v1, v4 ; GFX1064-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s1, v1, vcc @@ -2089,12 +2089,12 @@ ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_mad_u64_u32 v[3:4], s0, s2, v2, 0 +; GFX1032-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 +; GFX1032-NEXT: v_readfirstlane_b32 s0, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s1, v1 ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 -; GFX1032-NEXT: v_mad_u64_u32 v[4:5], s0, s3, v2, v[4:5] -; GFX1032-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1032-NEXT: v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5] ; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v3 ; GFX1032-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo Index: llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -296,7 +296,7 @@ ; GFX1064-NEXT: v_readfirstlane_b32 s0, v1 ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s6, v0, s[0:1] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, s6, v0, s[0:1] ; GFX1064-NEXT: s_mov_b32 s6, -1 ; GFX1064-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX1064-NEXT: s_endpgm @@ -330,7 +330,7 @@ ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v0, s[0:1] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, s2, v0, s[0:1] ; GFX1032-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX1032-NEXT: s_endpgm entry: @@ -852,7 +852,7 @@ ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -883,7 +883,7 @@ ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -1050,10 +1050,10 @@ ; GFX1064-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s5, v1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, s[4:5] -; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s3, v2, v[1:2] -; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5] ; GFX1064-NEXT: s_mov_b32 s2, -1 +; GFX1064-NEXT: v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2] +; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX1064-NEXT: s_endpgm ; @@ -1087,10 +1087,10 @@ ; GFX1032-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s5, v1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, s2, v2, s[4:5] -; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s2, s3, v2, v[1:2] -; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5] ; GFX1032-NEXT: s_mov_b32 s2, -1 +; GFX1032-NEXT: v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2] +; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX1032-NEXT: s_endpgm entry: @@ -2215,14 +2215,14 @@ ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s2, v2, 0 -; GFX1064-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1064-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v2, v[4:5] +; GFX1064-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1064-NEXT: v_readfirstlane_b32 s4, v1 +; GFX1064-NEXT: v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5] ; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s2, v3 -; GFX1064-NEXT: v_mov_b32_e32 v1, v4 +; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 +; GFX1064-NEXT: v_mov_b32_e32 v1, v4 ; GFX1064-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s4, v1, vcc ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX1064-NEXT: s_endpgm @@ -2255,14 +2255,14 @@ ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_mad_u64_u32 v[3:4], s2, s2, v2, 0 -; GFX1032-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1032-NEXT: v_mad_u64_u32 v[4:5], s2, s3, v2, v[4:5] +; GFX1032-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1032-NEXT: v_readfirstlane_b32 s4, v1 +; GFX1032-NEXT: v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5] ; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3 -; GFX1032-NEXT: v_mov_b32_e32 v1, v4 +; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 +; GFX1032-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s4, v1, vcc_lo ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX1032-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll +++ llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll @@ -270,7 +270,7 @@ ; GFX10W64-NEXT: s_waitcnt vmcnt(0) ; GFX10W64-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1] +; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1] ; GFX10W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W64-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W64-NEXT: s_endpgm @@ -299,7 +299,7 @@ ; GFX10W32-NEXT: s_waitcnt vmcnt(0) ; GFX10W32-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1] +; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1] ; GFX10W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W32-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W32-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll +++ llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll @@ -279,7 +279,7 @@ ; GFX10W64-NEXT: s_waitcnt vmcnt(0) ; GFX10W64-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1] +; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1] ; GFX10W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W64-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W64-NEXT: s_endpgm @@ -309,7 +309,7 @@ ; GFX10W32-NEXT: s_waitcnt vmcnt(0) ; GFX10W32-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1] +; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1] ; GFX10W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W32-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W32-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/carryout-selection.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/carryout-selection.ll +++ llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -52,7 +52,7 @@ ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} -; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -82,7 +82,7 @@ ; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} -; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]] +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, 0, 0x1234, [[CARRY]] define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -171,7 +171,7 @@ ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 -; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -237,7 +237,7 @@ ; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} -; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -267,7 +267,7 @@ ; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc ; ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} -; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]] +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, 0x1234, 0, [[CARRY]] define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -356,7 +356,7 @@ ; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 -; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: llvm/test/CodeGen/AMDGPU/frem.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/frem.ll +++ llvm/test/CodeGen/AMDGPU/frem.ll @@ -561,7 +561,7 @@ ; GFX10-NEXT: global_load_dword v1, v0, s[6:7] ; GFX10-NEXT: global_load_dword v2, v0, s[2:3] offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v4, s0, v2, v2, v1 +; GFX10-NEXT: v_div_scale_f32 v4, null, v2, v2, v1 ; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v1, v2, v1 ; GFX10-NEXT: v_rcp_f32_e32 v5, v4 ; GFX10-NEXT: s_denorm_mode 15 @@ -976,7 +976,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v12, s[6:7] ; GFX10-NEXT: global_load_dwordx2 v[2:3], v12, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[4:5], s0, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 ; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] @@ -2140,7 +2140,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[6:7] ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] offset:32 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v6, s0, v3, v3, v1 +; GFX10-NEXT: v_div_scale_f32 v6, null, v3, v3, v1 ; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v1, v3, v1 ; GFX10-NEXT: v_rcp_f32_e32 v7, v6 ; GFX10-NEXT: s_denorm_mode 15 @@ -2155,7 +2155,7 @@ ; GFX10-NEXT: v_div_fixup_f32 v5, v5, v3, v1 ; GFX10-NEXT: v_trunc_f32_e32 v5, v5 ; GFX10-NEXT: v_fma_f32 v1, -v5, v3, v1 -; GFX10-NEXT: v_div_scale_f32 v5, s0, v2, v2, v0 +; GFX10-NEXT: v_div_scale_f32 v5, null, v2, v2, v0 ; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v0, v2, v0 ; GFX10-NEXT: v_rcp_f32_e32 v6, v5 ; GFX10-NEXT: s_denorm_mode 15 @@ -2506,7 +2506,7 @@ ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[6:7] ; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:64 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v10, s0, v7, v7, v3 +; GFX10-NEXT: v_div_scale_f32 v10, null, v7, v7, v3 ; GFX10-NEXT: v_div_scale_f32 v9, vcc_lo, v3, v7, v3 ; GFX10-NEXT: v_rcp_f32_e32 v11, v10 ; GFX10-NEXT: s_denorm_mode 15 @@ -2521,7 +2521,7 @@ ; GFX10-NEXT: v_div_fixup_f32 v9, v9, v7, v3 ; GFX10-NEXT: v_trunc_f32_e32 v9, v9 ; GFX10-NEXT: v_fma_f32 v3, -v9, v7, v3 -; GFX10-NEXT: v_div_scale_f32 v9, s0, v6, v6, v2 +; GFX10-NEXT: v_div_scale_f32 v9, null, v6, v6, v2 ; GFX10-NEXT: v_div_scale_f32 v7, vcc_lo, v2, v6, v2 ; GFX10-NEXT: v_rcp_f32_e32 v10, v9 ; GFX10-NEXT: s_denorm_mode 15 @@ -2536,7 +2536,7 @@ ; GFX10-NEXT: v_div_fixup_f32 v7, v7, v6, v2 ; GFX10-NEXT: v_trunc_f32_e32 v7, v7 ; GFX10-NEXT: v_fma_f32 v2, -v7, v6, v2 -; GFX10-NEXT: v_div_scale_f32 v7, s0, v5, v5, v1 +; GFX10-NEXT: v_div_scale_f32 v7, null, v5, v5, v1 ; GFX10-NEXT: v_div_scale_f32 v6, vcc_lo, v1, v5, v1 ; GFX10-NEXT: v_rcp_f32_e32 v9, v7 ; GFX10-NEXT: s_denorm_mode 15 @@ -2551,7 +2551,7 @@ ; GFX10-NEXT: v_div_fixup_f32 v6, v6, v5, v1 ; GFX10-NEXT: v_trunc_f32_e32 v6, v6 ; GFX10-NEXT: v_fma_f32 v1, -v6, v5, v1 -; GFX10-NEXT: v_div_scale_f32 v6, s0, v4, v4, v0 +; GFX10-NEXT: v_div_scale_f32 v6, null, v4, v4, v0 ; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v4, v0 ; GFX10-NEXT: v_rcp_f32_e32 v7, v6 ; GFX10-NEXT: s_denorm_mode 15 @@ -2807,7 +2807,7 @@ ; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[6:7] ; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[2:3] offset:64 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[8:9], s0, v[6:7], v[6:7], v[2:3] +; GFX10-NEXT: v_div_scale_f64 v[8:9], null, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[8:9] ; GFX10-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 ; GFX10-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] @@ -2820,7 +2820,7 @@ ; GFX10-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3] ; GFX10-NEXT: v_trunc_f64_e32 v[8:9], v[8:9] ; GFX10-NEXT: v_fma_f64 v[2:3], -v[8:9], v[6:7], v[2:3] -; GFX10-NEXT: v_div_scale_f64 v[6:7], s0, v[4:5], v[4:5], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[6:7], null, v[4:5], v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] ; GFX10-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 ; GFX10-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] Index: llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll +++ llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll @@ -121,7 +121,7 @@ ; GFX10-LABEL: global_xchg_saddr_i32_rtn_2048: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v2, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc, 0, v3, vcc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) Index: llvm/test/CodeGen/AMDGPU/global-saddr-load.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/global-saddr-load.ll +++ llvm/test/CodeGen/AMDGPU/global-saddr-load.ll @@ -86,7 +86,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_neg4096: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -111,7 +111,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_neg4097: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -136,7 +136,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_neg4098: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -240,7 +240,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_neg2049: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff800, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -263,7 +263,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_neg2050: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff800, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -308,7 +308,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_4294967296: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -332,7 +332,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_4294967297: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -356,7 +356,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_4294971391: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x800, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -380,7 +380,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_4294971392: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x1000, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -405,7 +405,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967295: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x800, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -429,7 +429,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967296: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -453,7 +453,7 @@ ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967297: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -494,7 +494,7 @@ ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -525,7 +525,7 @@ ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x1000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off @@ -551,7 +551,7 @@ ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off @@ -582,7 +582,7 @@ ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 @@ -624,7 +624,7 @@ ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off @@ -666,7 +666,7 @@ ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 @@ -692,7 +692,7 @@ ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll @@ -131,9 +131,9 @@ ; GFX1013-NEXT: v_mov_b32_e32 v10, 0x41000000 ; GFX1013-NEXT: s_waitcnt lgkmcnt(0) ; GFX1013-NEXT: v_add_co_u32 v2, s4, s4, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v3, s4, s5, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v3, null, s5, 0, s4 ; GFX1013-NEXT: v_add_co_u32 v4, s4, s6, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, s4, s7, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, null, s7, 0, s4 ; GFX1013-NEXT: flat_load_dword v0, v[2:3] ; GFX1013-NEXT: flat_load_dword v1, v[4:5] ; GFX1013-NEXT: v_mov_b32_e32 v2, 0 @@ -160,9 +160,9 @@ ; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v2 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1030-NEXT: v_add_co_u32 v2, s4, s6, v2 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, s4, s7, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s4 ; GFX1030-NEXT: flat_load_dword v0, v[0:1] ; GFX1030-NEXT: flat_load_dword v1, v[2:3] ; GFX1030-NEXT: v_mov_b32_e32 v2, 0 @@ -202,9 +202,9 @@ ; GFX1013-NEXT: v_mov_b32_e32 v7, 0x48004700 ; GFX1013-NEXT: s_waitcnt lgkmcnt(0) ; GFX1013-NEXT: v_add_co_u32 v2, s4, s4, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v3, s4, s5, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v3, null, s5, 0, s4 ; GFX1013-NEXT: v_add_co_u32 v4, s4, s6, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, s4, s7, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, null, s7, 0, s4 ; GFX1013-NEXT: flat_load_dword v0, v[2:3] ; GFX1013-NEXT: flat_load_dword v1, v[4:5] ; GFX1013-NEXT: v_mov_b32_e32 v2, 0 @@ -228,9 +228,9 @@ ; GFX1030-NEXT: v_mov_b32_e32 v7, 0x48004700 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v2 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1030-NEXT: v_add_co_u32 v2, s4, s6, v2 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, s4, s7, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s4 ; GFX1030-NEXT: flat_load_dword v0, v[0:1] ; GFX1030-NEXT: flat_load_dword v1, v[2:3] ; GFX1030-NEXT: v_mov_b32_e32 v2, 0 @@ -277,7 +277,7 @@ ; GFX1013-NEXT: v_mov_b32_e32 v11, 0x41000000 ; GFX1013-NEXT: s_waitcnt lgkmcnt(0) ; GFX1013-NEXT: v_add_co_u32 v0, s4, s4, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1013-NEXT: flat_load_dword v2, v[0:1] ; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7 ; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102 @@ -303,7 +303,7 @@ ; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v0 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1030-NEXT: flat_load_dword v2, v[0:1] ; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102 ; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7 @@ -344,7 +344,7 @@ ; GFX1013-NEXT: v_mov_b32_e32 v8, 0x48004700 ; GFX1013-NEXT: s_waitcnt lgkmcnt(0) ; GFX1013-NEXT: v_add_co_u32 v0, s4, s4, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1013-NEXT: flat_load_dword v2, v[0:1] ; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6 ; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102 @@ -367,7 +367,7 @@ ; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v0 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1030-NEXT: flat_load_dword v2, v[0:1] ; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102 ; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c6 Index: llvm/test/CodeGen/AMDGPU/llvm.mulo.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -57,10 +57,10 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 -; GFX10-NEXT: v_mad_u64_u32 v[6:7], s4, v4, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[9:10], s4, v5, v2, 0 -; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v5, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[6:7], null, v4, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[9:10], null, v5, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[11:12], null, v5, v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, v1 ; GFX10-NEXT: v_mul_lo_u32 v5, v5, v2 ; GFX10-NEXT: v_mul_lo_u32 v4, v4, v3 @@ -160,10 +160,10 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 -; GFX10-NEXT: v_mad_u64_u32 v[6:7], s4, v4, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[9:10], s4, v5, v2, 0 -; GFX10-NEXT: v_mad_i64_i32 v[11:12], s4, v5, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[6:7], null, v4, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[9:10], null, v5, v2, 0 +; GFX10-NEXT: v_mad_i64_i32 v[11:12], null, v5, v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, v1 ; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v8, v6 ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo Index: llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll +++ llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll @@ -2,6 +2,8 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s ; RUN: llc -march=amdgcn -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) { ; GFX9-LABEL: mad_i32_vvv: @@ -11,14 +13,14 @@ ; ; GFX10-LABEL: mad_i32_vvv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v1, v[2:3] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvv: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v3, v1 ; GFX11-NEXT: v_mov_b32_e32 v4, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v4, v3, v[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v3, v[2:3] ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c @@ -47,14 +49,14 @@ ; ; GFX10-LABEL: mad_i32_vvc: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v1, 42 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 42 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvc: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v3, v2, 42 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 42 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, 42 @@ -72,14 +74,14 @@ ; ; GFX10-LABEL: mad_i32_vvi: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v1, 0x12d687 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 0x12d687 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvi: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v3, v2, 0x12d687 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 0x12d687 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, 1234567 @@ -95,12 +97,12 @@ ; ; GFX10-LABEL: mad_i32_vcv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, 42, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, v[1:2] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vcv: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, v0, 42, v[1:2] +; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, 42, v[1:2] ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, 42 @@ -117,13 +119,13 @@ ; ; GFX10-LABEL: mad_i32_vcc: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, 42, 43 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, 43 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vcc: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v2, 42, 43 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, 42, 43 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, 42 %add = add i32 %mul, 43 @@ -139,14 +141,14 @@ ; ; GFX10-LABEL: mad_i32_vvs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v1, s[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, s[0:1] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvs: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v3, v2, s[0:1] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, s[0:1] ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c @@ -162,12 +164,12 @@ ; ; GFX10-LABEL: mad_i32_vsv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, v[1:2] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vsv: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, v0, s0, v[1:2] +; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, s0, v[1:2] ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b @@ -184,12 +186,12 @@ ; ; GFX10-LABEL: mad_i32_svv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s0, v0, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, v[1:2] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_svv: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, s0, v0, v[1:2] +; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, s0, v0, v[1:2] ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b @@ -208,14 +210,14 @@ ; GFX10-LABEL: mad_i32_vss: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_mov_b32 s2, s1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, s[2:3] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, s[2:3] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vss: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: s_mov_b32 s2, s1 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v2, s0, s[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, s[2:3] ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c @@ -233,14 +235,14 @@ ; GFX10-LABEL: mad_i32_svs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_mov_b32 s2, s1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s0, v0, s[2:3] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, s[2:3] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_svs: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: s_mov_b32 s2, s1 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, s0, v2, s[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, s0, v2, s[2:3] ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c @@ -257,12 +259,12 @@ ; ; GFX10-LABEL: mad_i32_ssv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s0, s1, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, s1, v[0:1] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_ssv: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_mad_u64_u32 v[1:2], s0, s0, s1, v[0:1] +; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, s0, s1, v[0:1] ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -169,7 +169,7 @@ ; GFX10-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] slc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 @@ -183,7 +183,7 @@ ; GFX10-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] slc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 @@ -270,7 +270,7 @@ ; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] slc ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 @@ -284,7 +284,7 @@ ; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] slc ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 @@ -462,7 +462,7 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[1:2] -; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 glc slc ; GFX10-WGP-NEXT: s_endpgm @@ -476,7 +476,7 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-CU-NEXT: flat_load_dword v2, v[1:2] -; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 glc slc ; GFX10-CU-NEXT: s_endpgm @@ -563,7 +563,7 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1 ; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[1:2] -; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 glc slc ; GFX11-WGP-NEXT: s_endpgm @@ -577,7 +577,7 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 ; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[1:2] -; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 glc slc ; GFX11-CU-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -120,7 +120,7 @@ ; GFX10-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 @@ -135,7 +135,7 @@ ; GFX10-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 @@ -166,7 +166,7 @@ ; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc dlc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 @@ -181,7 +181,7 @@ ; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc dlc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 @@ -315,7 +315,7 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[1:2] -; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -330,7 +330,7 @@ ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-CU-NEXT: flat_load_dword v2, v[1:2] -; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -361,7 +361,7 @@ ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1 ; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[1:2] -; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -376,7 +376,7 @@ ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 ; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[1:2] -; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 Index: llvm/test/CodeGen/AMDGPU/offset-split-global.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -749,7 +749,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -778,7 +778,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -883,7 +883,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -912,7 +912,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -941,7 +941,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -971,7 +971,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1001,7 +1001,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1031,7 +1031,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1061,7 +1061,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1091,7 +1091,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1800, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1121,7 +1121,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off Index: llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -186,7 +186,7 @@ ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0x1000 @@ -622,7 +622,7 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s35, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, s0, s34, v2 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x5000, v3 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo ; GFX10-NEXT: .LBB1_1: ; %for.cond.preheader ; GFX10-NEXT: ; =>This Loop Header: Depth=1 @@ -1087,7 +1087,7 @@ ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, 0x800, v0 @@ -1367,7 +1367,7 @@ ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0xfffff800 @@ -1594,7 +1594,7 @@ ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0x80000000 @@ -1838,9 +1838,9 @@ ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v0 ; GFX10-NEXT: v_add_co_u32 v0, s0, s36, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s37, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s37, 0, s0 ; GFX10-NEXT: v_add_co_u32 v14, s0, s38, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s0, s39, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v15, null, s39, 0, s0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, 0x1800 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v14, 0x3000 @@ -2090,7 +2090,7 @@ ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, 0x3800, v0 @@ -2366,7 +2366,7 @@ ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v4 Index: llvm/test/CodeGen/AMDGPU/saddo.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/saddo.ll +++ llvm/test/CodeGen/AMDGPU/saddo.ll @@ -90,7 +90,7 @@ ; GFX10-NEXT: s_xor_b32 s2, s2, s3 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 ; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind Index: llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -39,7 +39,7 @@ ; MUBUF-NEXT: buffer_load_dword v2, v0, s[36:39], 0 offen offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_add_nc_u32_e32 v0, v2, v1 -; MUBUF-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 +; MUBUF-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, 0x3039 ; MUBUF-NEXT: buffer_store_dword v0, v0, s[36:39], 0 offen ; MUBUF-NEXT: .LBB0_2: ; %shader_eval_surface.exit ; MUBUF-NEXT: s_endpgm @@ -71,7 +71,7 @@ ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: v_add_nc_u32_e32 v0, v1, v0 -; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 +; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, 0x3039 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; FLATSCR-NEXT: .LBB0_2: ; %shader_eval_surface.exit ; FLATSCR-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/udiv.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/udiv.ll +++ llvm/test/CodeGen/AMDGPU/udiv.ll @@ -2788,9 +2788,9 @@ ; GFX1030-NEXT: v_add_co_u32 v5, vcc_lo, v2, v3 ; GFX1030-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s4, v4, vcc_lo ; GFX1030-NEXT: v_mul_hi_u32 v8, v0, v5 -; GFX1030-NEXT: v_mad_u64_u32 v[4:5], s4, v1, v5, 0 -; GFX1030-NEXT: v_mad_u64_u32 v[2:3], s4, v0, v6, 0 -; GFX1030-NEXT: v_mad_u64_u32 v[6:7], s4, v1, v6, 0 +; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, v5, 0 +; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, v6, 0 +; GFX1030-NEXT: v_mad_u64_u32 v[6:7], null, v1, v6, 0 ; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v8, v2 ; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2798,8 +2798,8 @@ ; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v7, vcc_lo ; GFX1030-NEXT: v_add_co_u32 v5, vcc_lo, v2, v6 ; GFX1030-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v3, vcc_lo -; GFX1030-NEXT: v_mad_u64_u32 v[2:3], s4, 0x186a0, v5, 0 -; GFX1030-NEXT: v_mad_u64_u32 v[3:4], s4, 0x186a0, v6, v[3:4] +; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x186a0, v5, 0 +; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x186a0, v6, v[3:4] ; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 ; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo ; GFX1030-NEXT: v_subrev_co_u32 v2, vcc_lo, 0x186a0, v0 Index: llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -476,7 +476,7 @@ ; SI-NEXT: {{ $}} ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset.cast, align 4, addrspace 4) ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %50, 0, implicit $exec - ; SI-NEXT: %43:vgpr_32, dead %45:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; SI-NEXT: %43:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %43, %subreg.sub1 ; SI-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8) from %ir.i10, addrspace 1) ; SI-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec Index: llvm/test/CodeGen/AMDGPU/wave32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/wave32.ll +++ llvm/test/CodeGen/AMDGPU/wave32.ll @@ -360,8 +360,8 @@ } ; GCN-LABEL: {{^}}test_div_scale_f32: -; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1032: v_div_scale_f32 v{{[0-9]+}}, null, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1064: v_div_scale_f32 v{{[0-9]+}}, null, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -377,8 +377,8 @@ } ; GCN-LABEL: {{^}}test_div_scale_f64: -; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], s{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] -; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] +; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], null, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] +; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], null, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -394,8 +394,8 @@ } ; GCN-LABEL: {{^}}test_mad_i64_i32: -; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] -; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] +; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] +; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 { %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 @@ -405,8 +405,8 @@ } ; GCN-LABEL: {{^}}test_mad_u64_u32: -; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] -; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] +; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] +; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] define i64 @test_mad_u64_u32(i32 %arg0, i32 %arg1, i64 %arg2) #0 { %sext0 = zext i32 %arg0 to i64 %sext1 = zext i32 %arg1 to i64 @@ -478,8 +478,8 @@ } ; GCN-LABEL: {{^}}fdiv_f32: -; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GFX1032: v_div_scale_f32 v{{[0-9]+}}, null, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GFX1064: v_div_scale_f32 v{{[0-9]+}}, null, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_rcp_f32_e32 v{{[0-9]+}}, v{{[0-9]+}} ; GFX1032: v_div_scale_f32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: v_div_scale_f32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}