Index: llvm/include/llvm/CodeGen/MachineInstrBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -281,6 +281,11 @@ return *this; } + const MachineInstrBuilder &setOperandDead(unsigned OpIdx) const { + MI->getOperand(OpIdx).setIsDead(); + return *this; + } + // Add a displacement from an existing MachineOperand with an added offset. const MachineInstrBuilder &addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags = 0) const { Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -159,11 +159,15 @@ // TODO: Skip masking high bits if def is known boolean. + bool IsSGPR = TRI.isSGPRClass(SrcRC); unsigned AndOpc = - TRI.isSGPRClass(SrcRC) ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32; - BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg) + IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32; + auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg) .addImm(1) .addReg(SrcReg); + if (IsSGPR) + And.setOperandDead(3); // Dead scc + BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) .addImm(0) .addReg(MaskedReg); @@ -323,7 +327,8 @@ MachineInstr *Add = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) .add(I.getOperand(1)) - .add(I.getOperand(2)); + .add(I.getOperand(2)) + .setOperandDead(3); // Dead scc I.eraseFromParent(); return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); } @@ -370,7 +375,8 @@ .add(Lo2); BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) .add(Hi1) - .add(Hi2); + .add(Hi2) + .setOperandDead(3); // Dead scc } else { const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); Register CarryReg = MRI->createVirtualRegister(CarryRC); @@ -437,14 +443,18 @@ unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; - BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg) + auto CarryInst = BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg) .add(I.getOperand(2)) .add(I.getOperand(3)); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg) - .addReg(AMDGPU::SCC); - if (!MRI->getRegClassOrNull(Dst1Reg)) - MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass); + if (MRI->use_nodbg_empty(Dst1Reg)) { + CarryInst.setOperandDead(3); // Dead scc + } else { + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg) + .addReg(AMDGPU::SCC); + if (!MRI->getRegClassOrNull(Dst1Reg)) + MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass); + } if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) || !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) || @@ -741,7 +751,8 @@ // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16 auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst) .addReg(ShiftSrc0) - .addImm(16); + .addImm(16) + .setOperandDead(3); // Dead scc MI.eraseFromParent(); return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); @@ -1630,7 +1641,8 @@ Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base) .addReg(BaseOffset) - .addImm(16); + .addImm(16) + .setOperandDead(3); // Dead scc BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) .addReg(M0Base); @@ -2195,7 +2207,8 @@ } else { BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0) .addReg(HiReg) - .addImm(16); + .addImm(16) + .setOperandDead(3); // Dead scc } unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; @@ -2204,12 +2217,17 @@ BuildMI(*MBB, I, DL, TII.get(MovOpc), ImmReg) .addImm(0xffff); - BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1) + auto And = BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1) .addReg(LoReg) .addReg(ImmReg); - BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg) + auto Or = BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg) .addReg(TmpReg0) .addReg(TmpReg1); + + if (!IsVALU) { + And.setOperandDead(3); // Dead scc + Or.setOperandDead(3); // Dead scc + } } I.eraseFromParent(); @@ -2354,7 +2372,8 @@ if (Signed) { BuildMI(MBB, I, DL, TII.get(AMDGPU::S_ASHR_I32), HiReg) .addReg(SrcReg, 0, SubReg) - .addImm(31); + .addImm(31) + .setOperandDead(3); // Dead scc } else { BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg) .addImm(0); @@ -2398,7 +2417,8 @@ if (!Signed && shouldUseAndMask(SrcSize, Mask)) { BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) .addReg(SrcReg) - .addImm(Mask); + .addImm(Mask) + .setOperandDead(3); // Dead scc } else { BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) .addReg(SrcReg) @@ -2532,7 +2552,8 @@ unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32; BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg) .addReg(HiReg) - .addReg(ConstReg); + .addReg(ConstReg) + .setOperandDead(3); // Dead scc BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst) .addReg(LoReg) .addImm(AMDGPU::sub0) @@ -2573,7 +2594,8 @@ // TODO: Should this used S_BITSET0_*? BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg) .addReg(HiReg) - .addReg(ConstReg); + .addReg(ConstReg) + .setOperandDead(3); // Dead scc BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst) .addReg(LoReg) .addImm(AMDGPU::sub0) @@ -2731,7 +2753,8 @@ Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC()); BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg) .addReg(CondReg) - .addReg(Exec); + .addReg(Exec) + .setOperandDead(3); // Dead scc CondReg = TmpReg; } @@ -2794,7 +2817,8 @@ !CanCopyLow32 && !CanCopyHi32) { auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg) .addReg(SrcReg) - .addReg(MaskReg); + .addReg(MaskReg) + .setOperandDead(3); // Dead scc I.eraseFromParent(); return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } @@ -2817,9 +2841,12 @@ assert(MaskTy.getSizeInBits() == 32 && "ptrmask should have been narrowed during legalize"); - BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg) + auto NewOp = BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg) .addReg(SrcReg) .addReg(MaskReg); + + if (!IsVGPR) + NewOp.setOperandDead(3); // Dead scc I.eraseFromParent(); return true; } @@ -3325,7 +3352,8 @@ } else { BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg) .addReg(SrcReg) - .addImm(Subtarget->getWavefrontSizeLog2()); + .addImm(Subtarget->getWavefrontSizeLog2()) + .setOperandDead(3); // Dead scc } const TargetRegisterClass &RC = @@ -4114,7 +4142,8 @@ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr) .addFrameIndex(FI) - .addReg(RHSDef->Reg); + .addReg(RHSDef->Reg) + .setOperandDead(3); // Dead scc } } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -6594,38 +6594,36 @@ ; GFX6-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX6-NEXT: s_cmp_lg_u32 s13, 0 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 1 -; GFX6-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v2 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 1 -; GFX6-NEXT: s_sub_i32 s2, s4, 64 -; GFX6-NEXT: s_sub_i32 s3, 64, s4 +; GFX6-NEXT: s_sub_i32 s0, s4, 64 +; GFX6-NEXT: s_sub_i32 s1, 64, s4 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64 ; GFX6-NEXT: s_cselect_b32 s5, 1, 0 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s4 -; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s3 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s1 ; GFX6-NEXT: s_cselect_b32 s8, 1, 0 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s4 -; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s2 -; GFX6-NEXT: s_and_b32 s2, 1, s5 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s0 +; GFX6-NEXT: s_and_b32 s0, 1, s5 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX6-NEXT: s_and_b32 s2, 1, s8 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 +; GFX6-NEXT: s_and_b32 s0, 1, s8 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX6-NEXT: s_and_b32 s2, 1, s5 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 +; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc ; GFX6-NEXT: v_or_b32_e32 v0, s6, v0 ; GFX6-NEXT: v_or_b32_e32 v1, s7, v1 -; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshl_i128_svs: @@ -6649,38 +6647,36 @@ ; GFX8-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX8-NEXT: s_cmp_lg_u32 s13, 0 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] -; GFX8-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v2 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] -; GFX8-NEXT: s_sub_i32 s2, s4, 64 -; GFX8-NEXT: s_sub_i32 s3, 64, s4 +; GFX8-NEXT: s_sub_i32 s0, s4, 64 +; GFX8-NEXT: s_sub_i32 s1, 64, s4 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64 ; GFX8-NEXT: s_cselect_b32 s5, 1, 0 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] -; GFX8-NEXT: v_lshlrev_b64 v[6:7], s3, v[2:3] +; GFX8-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] ; GFX8-NEXT: s_cselect_b32 s8, 1, 0 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3] -; GFX8-NEXT: v_lshrrev_b64 v[2:3], s2, v[2:3] -; GFX8-NEXT: s_and_b32 s2, 1, s5 +; GFX8-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] +; GFX8-NEXT: s_and_b32 s0, 1, s5 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX8-NEXT: s_and_b32 s2, 1, s8 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 +; GFX8-NEXT: s_and_b32 s0, 1, s8 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX8-NEXT: s_and_b32 s2, 1, s5 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 +; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc ; GFX8-NEXT: v_or_b32_e32 v0, s6, v0 ; GFX8-NEXT: v_or_b32_e32 v1, s7, v1 -; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 -; GFX8-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: v_fshl_i128_svs: @@ -6704,37 +6700,35 @@ ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] ; GFX9-NEXT: s_cmp_lg_u32 s13, 0 -; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 31, v1 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] -; GFX9-NEXT: s_sub_i32 s2, s4, 64 -; GFX9-NEXT: s_sub_i32 s3, 64, s4 +; GFX9-NEXT: s_sub_i32 s0, s4, 64 +; GFX9-NEXT: s_sub_i32 s1, 64, s4 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64 ; GFX9-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] -; GFX9-NEXT: v_lshlrev_b64 v[6:7], s3, v[2:3] +; GFX9-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] ; GFX9-NEXT: s_cselect_b32 s8, 1, 0 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3] -; GFX9-NEXT: v_lshrrev_b64 v[2:3], s2, v[2:3] -; GFX9-NEXT: s_and_b32 s2, 1, s5 +; GFX9-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] +; GFX9-NEXT: s_and_b32 s0, 1, s5 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX9-NEXT: s_and_b32 s2, 1, s8 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 +; GFX9-NEXT: s_and_b32 s0, 1, s8 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 -; GFX9-NEXT: s_and_b32 s2, 1, s5 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc ; GFX9-NEXT: v_or_b32_e32 v0, s6, v0 ; GFX9-NEXT: v_or_b32_e32 v1, s7, v1 -; GFX9-NEXT: v_or_b32_e32 v2, s0, v2 -; GFX9-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: v_fshl_i128_svs: @@ -6775,20 +6769,18 @@ ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 ; GFX10-NEXT: s_and_b32 s0, 1, s5 -; GFX10-NEXT: s_and_b32 s1, 1, s1 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 -; GFX10-NEXT: v_or_b32_e32 v0, s8, v0 -; GFX10-NEXT: v_or_b32_e32 v1, s9, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX10-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX10-NEXT: v_or_b32_e32 v1, s9, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: v_fshl_i128_svs: @@ -6830,21 +6822,20 @@ ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 ; GFX11-NEXT: s_and_b32 s0, 1, s5 -; GFX11-NEXT: s_and_b32 s1, 1, s1 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5 -; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_or_b32_e32 v0, s8, v0 -; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX11-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 ; GFX11-NEXT: ; return to shader part epilog %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) %cast.result = bitcast i128 %result to <4 x float> Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -6651,35 +6651,33 @@ ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] ; GFX6-NEXT: s_cmp_lg_u32 s13, 0 -; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] -; GFX6-NEXT: s_sub_i32 s4, s8, 64 -; GFX6-NEXT: s_sub_i32 s5, 64, s8 +; GFX6-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5] +; GFX6-NEXT: s_sub_i32 s0, s8, 64 +; GFX6-NEXT: s_sub_i32 s1, 64, s8 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64 ; GFX6-NEXT: s_cselect_b32 s6, 1, 0 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s8 -; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s5 +; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s1 ; GFX6-NEXT: s_cselect_b32 s7, 1, 0 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s8 -; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s4 -; GFX6-NEXT: s_and_b32 s4, 1, s6 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s0 +; GFX6-NEXT: s_and_b32 s0, 1, s6 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 -; GFX6-NEXT: s_and_b32 s4, 1, s7 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 +; GFX6-NEXT: s_and_b32 s0, 1, s7 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 -; GFX6-NEXT: s_and_b32 s4, 1, s6 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc ; GFX6-NEXT: v_or_b32_e32 v0, s2, v0 ; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 -; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX6-NEXT: v_or_b32_e32 v2, s4, v2 +; GFX6-NEXT: v_or_b32_e32 v3, s5, v3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_fshr_i128_svs: @@ -6707,35 +6705,33 @@ ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] ; GFX8-NEXT: s_cmp_lg_u32 s13, 0 -; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] -; GFX8-NEXT: s_sub_i32 s4, s8, 64 -; GFX8-NEXT: s_sub_i32 s5, 64, s8 +; GFX8-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5] +; GFX8-NEXT: s_sub_i32 s0, s8, 64 +; GFX8-NEXT: s_sub_i32 s1, 64, s8 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64 ; GFX8-NEXT: s_cselect_b32 s6, 1, 0 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] -; GFX8-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] +; GFX8-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] ; GFX8-NEXT: s_cselect_b32 s7, 1, 0 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] -; GFX8-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] -; GFX8-NEXT: s_and_b32 s4, 1, s6 +; GFX8-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] +; GFX8-NEXT: s_and_b32 s0, 1, s6 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 -; GFX8-NEXT: s_and_b32 s4, 1, s7 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 +; GFX8-NEXT: s_and_b32 s0, 1, s7 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 -; GFX8-NEXT: s_and_b32 s4, 1, s6 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc ; GFX8-NEXT: v_or_b32_e32 v0, s2, v0 ; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 -; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 -; GFX8-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX8-NEXT: v_or_b32_e32 v2, s4, v2 +; GFX8-NEXT: v_or_b32_e32 v3, s5, v3 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: v_fshr_i128_svs: @@ -6763,35 +6759,33 @@ ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] ; GFX9-NEXT: s_cmp_lg_u32 s13, 0 -; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] -; GFX9-NEXT: s_sub_i32 s4, s8, 64 -; GFX9-NEXT: s_sub_i32 s5, 64, s8 +; GFX9-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5] +; GFX9-NEXT: s_sub_i32 s0, s8, 64 +; GFX9-NEXT: s_sub_i32 s1, 64, s8 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64 ; GFX9-NEXT: s_cselect_b32 s6, 1, 0 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] -; GFX9-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] +; GFX9-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] ; GFX9-NEXT: s_cselect_b32 s7, 1, 0 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] -; GFX9-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] -; GFX9-NEXT: s_and_b32 s4, 1, s6 +; GFX9-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] +; GFX9-NEXT: s_and_b32 s0, 1, s6 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 -; GFX9-NEXT: s_and_b32 s4, 1, s7 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 +; GFX9-NEXT: s_and_b32 s0, 1, s7 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 -; GFX9-NEXT: s_and_b32 s4, 1, s6 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc ; GFX9-NEXT: v_or_b32_e32 v0, s2, v0 ; GFX9-NEXT: v_or_b32_e32 v1, s3, v1 -; GFX9-NEXT: v_or_b32_e32 v2, s0, v2 -; GFX9-NEXT: v_or_b32_e32 v3, s1, v3 +; GFX9-NEXT: v_or_b32_e32 v2, s4, v2 +; GFX9-NEXT: v_or_b32_e32 v3, s5, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: v_fshr_i128_svs: @@ -6834,20 +6828,18 @@ ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 ; GFX10-NEXT: s_and_b32 s0, 1, s6 -; GFX10-NEXT: s_and_b32 s1, 1, s1 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo -; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 -; GFX10-NEXT: v_or_b32_e32 v0, s4, v0 -; GFX10-NEXT: v_or_b32_e32 v1, s5, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX10-NEXT: v_or_b32_e32 v0, s4, v0 +; GFX10-NEXT: v_or_b32_e32 v1, s5, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: v_fshr_i128_svs: @@ -6891,21 +6883,20 @@ ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 ; GFX11-NEXT: s_and_b32 s0, 1, s6 -; GFX11-NEXT: s_and_b32 s1, 1, s1 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] +; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5 -; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_or_b32_e32 v0, s4, v0 -; GFX11-NEXT: v_or_b32_e32 v1, s5, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3 +; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 +; GFX11-NEXT: v_or_b32_e32 v0, s4, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: v_or_b32_e32 v1, s5, v1 ; GFX11-NEXT: ; return to shader part epilog %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) %cast.result = bitcast i128 %result to <4 x float> Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -31,6 +31,7 @@ ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} @@ -46,6 +47,7 @@ ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} @@ -55,6 +57,7 @@ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} @@ -64,6 +67,7 @@ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -73,6 +77,7 @@ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -115,6 +120,7 @@ ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} @@ -130,6 +136,7 @@ ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} @@ -149,6 +156,7 @@ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} @@ -168,6 +176,7 @@ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -177,6 +186,7 @@ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -221,6 +231,7 @@ ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} @@ -236,6 +247,7 @@ ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} @@ -245,6 +257,7 @@ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -254,6 +267,7 @@ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -263,6 +277,7 @@ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} @@ -305,6 +320,7 @@ ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} @@ -320,6 +336,7 @@ ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} @@ -339,6 +356,7 @@ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -358,6 +376,7 @@ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -367,6 +386,7 @@ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} @@ -421,6 +441,7 @@ ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} @@ -446,6 +467,7 @@ ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} @@ -465,6 +487,7 @@ ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} @@ -484,6 +507,7 @@ ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -493,6 +517,7 @@ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -535,6 +560,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} @@ -548,6 +574,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} @@ -556,6 +583,7 @@ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} @@ -564,6 +592,7 @@ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -572,6 +601,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -610,6 +640,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} @@ -623,6 +654,7 @@ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} @@ -631,6 +663,7 @@ ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -639,6 +672,7 @@ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -647,6 +681,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} @@ -686,6 +721,7 @@ ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} @@ -700,6 +736,7 @@ ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} @@ -710,6 +747,7 @@ ; GFX7-FLAT-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} @@ -720,6 +758,7 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -730,6 +769,7 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -772,6 +812,7 @@ ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] + ; ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX7: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} @@ -786,6 +827,7 @@ ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX7-NEXT: $vgpr0 = COPY [[COPY3]] + ; ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX7-FLAT: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} @@ -800,12 +842,13 @@ ; GFX7-FLAT-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX7-FLAT-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX7-FLAT-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc - ; GFX7-FLAT-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def $scc, implicit $scc + ; GFX7-FLAT-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def dead $scc, implicit $scc ; GFX7-FLAT-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-FLAT-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} @@ -820,12 +863,13 @@ ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY3]], [[COPY4]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY5]], [[COPY6]], implicit-def dead $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -836,6 +880,7 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] + ; ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-wave-address.mir @@ -12,10 +12,11 @@ body: | bb.0: ; WAVE32-LABEL: name: wave_address_s - ; WAVE32: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc + ; WAVE32: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + ; ; WAVE64-LABEL: name: wave_address_s - ; WAVE64: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; WAVE64: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] %0:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 S_ENDPGM 0, implicit %0 @@ -33,6 +34,7 @@ ; WAVE32-LABEL: name: wave_address_v ; WAVE32: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; ; WAVE64-LABEL: name: wave_address_v ; WAVE64: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -24,6 +24,7 @@ ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] + ; ; WAVE32-LABEL: name: and_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -61,6 +62,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; ; WAVE32-LABEL: name: and_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -93,6 +95,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; ; WAVE32-LABEL: name: and_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -125,6 +128,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; ; WAVE32-LABEL: name: and_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -157,6 +161,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; ; WAVE32-LABEL: name: and_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -187,6 +192,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] + ; ; WAVE32-LABEL: name: and_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -217,6 +223,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; ; WAVE32-LABEL: name: and_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -247,6 +254,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] + ; ; WAVE32-LABEL: name: and_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -277,6 +285,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] + ; ; WAVE32-LABEL: name: and_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -307,6 +316,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; ; WAVE32-LABEL: name: and_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -337,6 +347,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; ; WAVE32-LABEL: name: and_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -369,6 +380,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; WAVE64-NEXT: [[AND:%[0-9]+]]:vgpr(s64) = G_AND [[COPY]], [[COPY1]] ; WAVE64-NEXT: S_ENDPGM 0, implicit [[AND]](s64) + ; ; WAVE32-LABEL: name: and_s64_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} @@ -403,6 +415,7 @@ ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] + ; ; WAVE32-LABEL: name: and_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -444,11 +457,12 @@ ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B64_]] ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -456,7 +470,7 @@ ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]] @@ -492,10 +506,11 @@ ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] + ; ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -503,7 +518,7 @@ ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]] @@ -539,6 +554,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; ; WAVE32-LABEL: name: and_s32_sgpr_sgpr_sgpr_result_reg_class ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -280,11 +280,11 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY2]], implicit-def $scc + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY2]], implicit-def dead $scc ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_NE_U32_e64_]], implicit-def $scc - ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def $scc + ; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def dead $scc ; GCN-NEXT: $vcc = COPY [[S_AND_B64_1]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} @@ -322,7 +322,7 @@ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1 ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[S_MOV_B64_]], implicit-def $scc - ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_XOR_B64_]], $exec, implicit-def $scc + ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_XOR_B64_]], $exec, implicit-def dead $scc ; GCN-NEXT: $vcc = COPY [[S_AND_B64_]] ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc ; GCN-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -70,6 +70,7 @@ ; GFX9-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + ; ; GFX11-LABEL: name: test_build_vector_trunc_s_pack_hl ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} @@ -306,7 +307,7 @@ ; GFX9PLUS: liveins: $sgpr0 ; GFX9PLUS-NEXT: {{ $}} ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def $scc + ; GFX9PLUS-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 16, implicit-def dead $scc ; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_LSHR_B32_]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = COPY $sgpr0 @@ -392,6 +393,7 @@ ; GFX9-NEXT: [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[S_LSHR_B32_1]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]], implicit [[S_LSHR_B32_1]] + ; ; GFX11-LABEL: name: test_build_vector_trunc_s_pack_hh_multi_use_rhs ; GFX11: liveins: $sgpr0, $sgpr1 ; GFX11-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -18,6 +18,7 @@ ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; ; WAVE32-LABEL: name: constant_v_s32 ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec @@ -48,6 +49,7 @@ ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; ; WAVE32-LABEL: name: constant_s_s32 ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 @@ -78,6 +80,7 @@ ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; ; WAVE32-LABEL: name: constant_v_s16 ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec @@ -108,6 +111,7 @@ ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; ; WAVE32-LABEL: name: constant_s_s16 ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 @@ -157,6 +161,7 @@ ; WAVE64-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; ; WAVE32-LABEL: name: constant_v_s64 ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -220,6 +225,7 @@ ; WAVE64-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; ; WAVE32-LABEL: name: constant_s_s64 ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 @@ -262,6 +268,7 @@ ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1 ; WAVE64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]] + ; ; WAVE32-LABEL: name: constant_i1_vcc ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 -1 ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 0 @@ -288,6 +295,7 @@ ; WAVE64-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 ; WAVE64-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; ; WAVE32-LABEL: name: constant_s_p3 ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; WAVE32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 @@ -318,6 +326,7 @@ ; WAVE64-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec ; WAVE64-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; ; WAVE32-LABEL: name: constant_v_p3 ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec @@ -359,6 +368,7 @@ ; WAVE64-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; ; WAVE32-LABEL: name: constant_s_p1 ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 @@ -422,6 +432,7 @@ ; WAVE64-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; ; WAVE32-LABEL: name: constant_v_p1 ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -485,6 +496,7 @@ ; WAVE64-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 ; WAVE64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; ; WAVE32-LABEL: name: constant_s_p999 ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; WAVE32-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 @@ -548,6 +560,7 @@ ; WAVE64-NEXT: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec ; WAVE64-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; ; WAVE32-LABEL: name: constant_v_p999 ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; WAVE32-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -598,7 +611,7 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]] - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def dead $scc ; WAVE64-NEXT: $scc = COPY [[S_AND_B32_]] ; WAVE64-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; WAVE64-NEXT: S_BRANCH %bb.2 @@ -608,12 +621,13 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: bb.2: + ; ; WAVE32-LABEL: name: zext_sgpr_s1_to_sgpr_s32 ; WAVE32: bb.0: ; WAVE32-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], 1, implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], 1, implicit-def dead $scc ; WAVE32-NEXT: $scc = COPY [[S_AND_B32_]] ; WAVE32-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc ; WAVE32-NEXT: S_BRANCH %bb.2 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -21,6 +21,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; WAVE64-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; ; WAVE32-LABEL: name: copy ; WAVE32: liveins: $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -50,10 +51,11 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} @@ -61,7 +63,7 @@ ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) @@ -90,13 +92,14 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} @@ -106,7 +109,7 @@ ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]] ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY4]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec ; WAVE32-NEXT: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) @@ -140,6 +143,7 @@ ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec ; WAVE64-NEXT: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc ; WAVE32-NEXT: {{ $}} @@ -171,6 +175,7 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; ; WAVE32-LABEL: name: copy_sgpr_no_type ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -197,6 +202,7 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; ; WAVE32-LABEL: name: copy_vgpr_no_type ; WAVE32: liveins: $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -223,6 +229,7 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; ; WAVE32-LABEL: name: copy_maybe_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} @@ -252,6 +259,7 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]] ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; ; WAVE32-LABEL: name: copy_s1_vcc_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} @@ -281,6 +289,7 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: $vcc = COPY [[COPY]] ; WAVE64-NEXT: S_ENDPGM 0, implicit $vcc + ; ; WAVE32-LABEL: name: copy_s64_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} @@ -309,6 +318,7 @@ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: $vcc_lo = COPY [[COPY]] ; WAVE64-NEXT: S_ENDPGM 0, implicit $vcc + ; ; WAVE32-LABEL: name: copy_s32_to_vcc_lo ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -336,6 +346,7 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $vcc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; ; WAVE32-LABEL: name: copy_vcc_to_s64 ; WAVE32: liveins: $vcc ; WAVE32-NEXT: {{ $}} @@ -361,6 +372,7 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $vcc_lo ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY]] + ; ; WAVE32-LABEL: name: copy_vcc_lo_to_s32 ; WAVE32: liveins: $vcc ; WAVE32-NEXT: {{ $}} @@ -386,15 +398,16 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] + ; ; WAVE32-LABEL: name: copy_s1_to_vcc ; WAVE32: liveins: $sgpr0_sgpr1 ; WAVE32-NEXT: {{ $}} ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY1]], implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 @@ -419,6 +432,7 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]] + ; ; WAVE32-LABEL: name: copy_s1_false_to_vcc ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -445,6 +459,7 @@ ; WAVE64-NEXT: {{ $}} ; WAVE64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_MOV_B64_]] + ; ; WAVE32-LABEL: name: copy_s1_true_to_vcc ; WAVE32: liveins: $sgpr0 ; WAVE32-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -21,6 +21,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; VI-LABEL: name: fabs_s32_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -28,6 +29,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; GFX9-LABEL: name: fabs_s32_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -35,6 +37,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; GFX10-LABEL: name: fabs_s32_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -63,6 +66,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; VI-LABEL: name: fabs_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -70,6 +74,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; GFX9-LABEL: name: fabs_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -77,6 +82,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; GFX10-LABEL: name: fabs_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -104,18 +110,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; VI-LABEL: name: fabs_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; GFX9-LABEL: name: fabs_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](s32) + ; ; GFX10-LABEL: name: fabs_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -143,6 +152,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; VI-LABEL: name: fabs_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -150,6 +160,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; GFX9-LABEL: name: fabs_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -157,6 +168,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; GFX10-LABEL: name: fabs_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -185,6 +197,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; VI-LABEL: name: fabs_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -192,6 +205,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; GFX9-LABEL: name: fabs_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -199,6 +213,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: $sgpr0 = COPY [[S_AND_B32_]] + ; ; GFX10-LABEL: name: fabs_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -229,6 +244,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; VI-LABEL: name: fabs_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -236,6 +252,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; GFX9-LABEL: name: fabs_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -243,6 +260,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32767 ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; GFX10-LABEL: name: fabs_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -275,6 +293,7 @@ ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; VI-LABEL: name: fabs_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -283,6 +302,7 @@ ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; GFX9-LABEL: name: fabs_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -291,6 +311,7 @@ ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(s16) = G_FABS [[TRUNC]] ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; GFX10-LABEL: name: fabs_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -322,6 +343,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; SI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; VI-LABEL: name: fabs_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -329,6 +351,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; VI-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; GFX9-LABEL: name: fabs_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -336,6 +359,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]] + ; ; GFX10-LABEL: name: fabs_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -363,18 +387,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; VI-LABEL: name: fabs_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; GFX9-LABEL: name: fabs_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FABS [[COPY]] ; GFX9-NEXT: $vgpr0 = COPY [[FABS]](<2 x s16>) + ; ; GFX10-LABEL: name: fabs_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -402,9 +429,10 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; VI-LABEL: name: fabs_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -412,9 +440,10 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: fabs_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -422,9 +451,10 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: fabs_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -432,7 +462,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 @@ -459,6 +489,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; VI-LABEL: name: fabs_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -469,6 +500,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: fabs_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -479,6 +511,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: fabs_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -509,18 +542,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; SI-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] ; SI-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; ; VI-LABEL: name: fabs_s64_vs ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; VI-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] ; VI-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; ; GFX9-LABEL: name: fabs_s64_vs ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[FABS]](s64) + ; ; GFX10-LABEL: name: fabs_s64_vs ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -552,6 +588,7 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; VI-LABEL: name: fabs_s64_vv_no_src_constraint ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -562,6 +599,7 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: fabs_s64_vv_no_src_constraint ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -572,6 +610,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: fabs_s64_vv_no_src_constraint ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -603,9 +642,10 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; VI-LABEL: name: fabs_s64_ss_no_src_constraint ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -613,9 +653,10 @@ ; VI-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: fabs_s64_ss_no_src_constraint ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -623,9 +664,10 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: fabs_s64_ss_no_src_constraint ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -633,7 +675,7 @@ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -21,6 +21,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; VI-LABEL: name: fneg_s32_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -28,6 +29,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; GFX9-LABEL: name: fneg_s32_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -35,6 +37,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; GFX10-LABEL: name: fneg_s32_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -63,6 +66,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; VI-LABEL: name: fneg_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -70,6 +74,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; GFX9-LABEL: name: fneg_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -77,6 +82,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; GFX10-LABEL: name: fneg_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -104,18 +110,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; ; VI-LABEL: name: fneg_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; ; GFX9-LABEL: name: fneg_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](s32) + ; ; GFX10-LABEL: name: fneg_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -143,6 +152,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; VI-LABEL: name: fneg_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -150,6 +160,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; GFX9-LABEL: name: fneg_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -157,6 +168,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; GFX10-LABEL: name: fneg_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -187,6 +199,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; VI-LABEL: name: fneg_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -194,6 +207,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; GFX9-LABEL: name: fneg_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -201,6 +215,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; GFX10-LABEL: name: fneg_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -233,6 +248,7 @@ ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; VI-LABEL: name: fneg_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -241,6 +257,7 @@ ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] ; VI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; GFX9-LABEL: name: fneg_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -249,6 +266,7 @@ ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[TRUNC]] ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FNEG]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; GFX10-LABEL: name: fneg_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -280,6 +298,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; VI-LABEL: name: fneg_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -287,6 +306,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; GFX9-LABEL: name: fneg_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -294,6 +314,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: $sgpr0 = COPY [[S_XOR_B32_]] + ; ; GFX10-LABEL: name: fneg_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -322,6 +343,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; VI-LABEL: name: fneg_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -329,6 +351,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; GFX9-LABEL: name: fneg_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -336,6 +359,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; GFX10-LABEL: name: fneg_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -363,18 +387,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] ; SI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; ; VI-LABEL: name: fneg_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] ; VI-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; ; GFX9-LABEL: name: fneg_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr_32(<2 x s16>) = G_FNEG [[COPY]] ; GFX9-NEXT: $vgpr0 = COPY [[FNEG]](<2 x s16>) + ; ; GFX10-LABEL: name: fneg_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -402,9 +429,10 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; VI-LABEL: name: fneg_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -412,9 +440,10 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: fneg_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -422,9 +451,10 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: fneg_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -432,7 +462,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 @@ -459,6 +489,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; VI-LABEL: name: fneg_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -469,6 +500,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: fneg_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -479,6 +511,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: fneg_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -509,18 +542,21 @@ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; SI-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] ; SI-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; ; VI-LABEL: name: fneg_s64_vs ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; VI-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] ; VI-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; ; GFX9-LABEL: name: fneg_s64_vs ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 ; GFX9-NEXT: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]] ; GFX9-NEXT: S_ENDPGM 0, implicit [[FNEG]](s64) + ; ; GFX10-LABEL: name: fneg_s64_vs ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -549,6 +585,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; VI-LABEL: name: fneg_fabs_s32_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -556,6 +593,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; GFX9-LABEL: name: fneg_fabs_s32_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -563,6 +601,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; GFX10-LABEL: name: fneg_fabs_s32_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -592,6 +631,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; ; VI-LABEL: name: fneg_fabs_s32_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -599,6 +639,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; ; GFX9-LABEL: name: fneg_fabs_s32_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -606,6 +647,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; ; GFX10-LABEL: name: fneg_fabs_s32_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -636,6 +678,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec ; SI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; ; VI-LABEL: name: fneg_fabs_s32_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -644,6 +687,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec ; VI-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; ; GFX9-LABEL: name: fneg_fabs_s32_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -652,6 +696,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]](s32) + ; ; GFX10-LABEL: name: fneg_fabs_s32_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -682,6 +727,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; ; VI-LABEL: name: fneg_fabs_s16_ss ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -689,6 +735,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; ; GFX9-LABEL: name: fneg_fabs_s16_ss ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -696,6 +743,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; ; GFX10-LABEL: name: fneg_fabs_s16_ss ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -728,6 +776,7 @@ ; SI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] ; SI-NEXT: $vgpr0 = COPY [[COPY1]] + ; ; VI-LABEL: name: fneg_fabs_s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -736,6 +785,7 @@ ; VI-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] ; VI-NEXT: $vgpr0 = COPY [[COPY1]] + ; ; GFX9-LABEL: name: fneg_fabs_s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -744,6 +794,7 @@ ; GFX9-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_OR_B32_e64_]] ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]] + ; ; GFX10-LABEL: name: fneg_fabs_s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -779,6 +830,7 @@ ; SI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) ; SI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; VI-LABEL: name: fneg_fabs_s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -788,6 +840,7 @@ ; VI-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) ; VI-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; GFX9-LABEL: name: fneg_fabs_s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -797,6 +850,7 @@ ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:vgpr_32(s16) = G_FNEG [[FNEG]] ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FNEG1]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; ; GFX10-LABEL: name: fneg_fabs_s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -830,6 +884,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; SI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; ; VI-LABEL: name: fneg_fabs_v2s16_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -837,6 +892,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; VI-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; ; GFX9-LABEL: name: fneg_fabs_v2s16_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -844,6 +900,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX9-NEXT: $sgpr0 = COPY [[S_OR_B32_]] + ; ; GFX10-LABEL: name: fneg_fabs_v2s16_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -873,6 +930,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; VI-LABEL: name: fneg_fabs_v2s16_vv ; VI: liveins: $vgpr0 ; VI-NEXT: {{ $}} @@ -880,6 +938,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; GFX9-LABEL: name: fneg_fabs_v2s16_vv ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -887,6 +946,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]] + ; ; GFX10-LABEL: name: fneg_fabs_v2s16_vv ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -917,6 +977,7 @@ ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; SI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec ; SI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; ; VI-LABEL: name: fneg_fabs_v2s16_vs ; VI: liveins: $sgpr0 ; VI-NEXT: {{ $}} @@ -925,6 +986,7 @@ ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; VI-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec ; VI-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; ; GFX9-LABEL: name: fneg_fabs_v2s16_vs ; GFX9: liveins: $sgpr0 ; GFX9-NEXT: {{ $}} @@ -933,6 +995,7 @@ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147516416 ; GFX9-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_XOR_B32_e64 [[S_MOV_B32_]](s16), [[FABS]](<2 x s16>), implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_XOR_B32_e64_]](<2 x s16>) + ; ; GFX10-LABEL: name: fneg_fabs_v2s16_vs ; GFX10: liveins: $sgpr0 ; GFX10-NEXT: {{ $}} @@ -963,9 +1026,10 @@ ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; SI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; SI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; VI-LABEL: name: fneg_fabs_s64_ss ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -973,9 +1037,10 @@ ; VI-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; VI-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; VI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; VI-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: fneg_fabs_s64_ss ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -983,9 +1048,10 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: fneg_fabs_s64_ss ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -993,7 +1059,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1 ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 @@ -1021,6 +1087,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; VI-LABEL: name: fneg_fabs_s64_vv ; VI: liveins: $vgpr0_vgpr1 ; VI-NEXT: {{ $}} @@ -1031,6 +1098,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: fneg_fabs_s64_vv ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -1041,6 +1109,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-LABEL: name: fneg_fabs_s64_vv ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} @@ -1077,6 +1146,7 @@ ; SI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 ; SI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; ; VI-LABEL: name: fneg_fabs_s64_vs ; VI: liveins: $sgpr0_sgpr1 ; VI-NEXT: {{ $}} @@ -1088,6 +1158,7 @@ ; VI-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) ; VI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 ; VI-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; ; GFX9-LABEL: name: fneg_fabs_s64_vs ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9-NEXT: {{ $}} @@ -1099,6 +1170,7 @@ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e64_]](s16), %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64) + ; ; GFX10-LABEL: name: fneg_fabs_s64_vs ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-i1-copy.mir @@ -30,6 +30,7 @@ ; WAVE64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0 + ; ; WAVE32-LABEL: name: i1_vcc_to_vcc_copy ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; WAVE32-NEXT: {{ $}} @@ -87,16 +88,17 @@ ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec ; WAVE64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; WAVE64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; WAVE64-NEXT: EXP_DONE 0, [[V_CNDMASK_B32_e64_]], [[V_CNDMASK_B32_e64_1]], [[COPY6]], [[COPY7]], -1, 0, 15, implicit $exec ; WAVE64-NEXT: S_ENDPGM 0 + ; ; WAVE32-LABEL: name: i1_sgpr_to_vcc_copy ; WAVE32: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; WAVE32-NEXT: {{ $}} @@ -109,10 +111,10 @@ ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $scc - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY5]], implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec ; WAVE32-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY4]], 0, [[COPY3]], [[V_CMP_NE_U32_e64_1]], implicit $exec ; WAVE32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[DEF]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -23,18 +23,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX7-LABEL: name: load_constant_s32_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX8-LABEL: name: load_constant_s32_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_s32_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -65,18 +68,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX7-LABEL: name: load_constant_v2s16_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX8-LABEL: name: load_constant_v2s16_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_v2s16_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -106,18 +112,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX7-LABEL: name: load_constant_v2s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX8-LABEL: name: load_constant_v2s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX10-LABEL: name: load_constant_v2s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -146,18 +155,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX7-LABEL: name: load_constant_v2s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX8-LABEL: name: load_constant_v2s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX10-LABEL: name: load_constant_v2s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -186,18 +198,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX7-LABEL: name: load_constant_v4s16_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX8-LABEL: name: load_constant_v4s16_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX10-LABEL: name: load_constant_v4s16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -227,18 +242,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX7-LABEL: name: load_constant_v4s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX8-LABEL: name: load_constant_v4s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX10-LABEL: name: load_constant_v4s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -268,18 +286,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX7-LABEL: name: load_constant_s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX8-LABEL: name: load_constant_s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX10-LABEL: name: load_constant_s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -309,18 +330,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX7-LABEL: name: load_constant_s64_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX8-LABEL: name: load_constant_s64_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX10-LABEL: name: load_constant_s64_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -350,18 +374,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX7-LABEL: name: load_constant_v2s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX8-LABEL: name: load_constant_v2s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX10-LABEL: name: load_constant_v2s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -391,18 +418,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX7-LABEL: name: load_constant_v2p1 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX8-LABEL: name: load_constant_v2p1 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) + ; ; GFX10-LABEL: name: load_constant_v2p1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -432,18 +462,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; ; GFX7-LABEL: name: load_constant_s128_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; ; GFX8-LABEL: name: load_constant_s128_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) + ; ; GFX10-LABEL: name: load_constant_s128_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -473,18 +506,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX7-LABEL: name: load_constant_p3_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX8-LABEL: name: load_constant_p3_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_p3_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -514,18 +550,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX7-LABEL: name: load_constant_p4_from_8 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX8-LABEL: name: load_constant_p4_from_8 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX10-LABEL: name: load_constant_p4_from_8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -555,18 +594,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; ; GFX7-LABEL: name: load_constant_p999_from_8 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; ; GFX8-LABEL: name: load_constant_p999_from_8 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](p999) + ; ; GFX10-LABEL: name: load_constant_p999_from_8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -596,18 +638,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX7-LABEL: name: load_constant_v2p3 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX8-LABEL: name: load_constant_v2p3 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) + ; ; GFX10-LABEL: name: load_constant_v2p3 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -637,18 +682,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX7-LABEL: name: load_constant_v2s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX8-LABEL: name: load_constant_v2s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_v2s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -678,18 +726,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX7-LABEL: name: load_constant_v4s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX8-LABEL: name: load_constant_v4s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] + ; ; GFX10-LABEL: name: load_constant_v4s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -719,18 +770,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX7-LABEL: name: load_constant_v8s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX8-LABEL: name: load_constant_v8s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] + ; ; GFX10-LABEL: name: load_constant_v8s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -760,18 +814,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; ; GFX7-LABEL: name: load_constant_v8s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; ; GFX8-LABEL: name: load_constant_v8s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] + ; ; GFX10-LABEL: name: load_constant_v8s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -801,18 +858,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; ; GFX7-LABEL: name: load_constant_v16s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; ; GFX8-LABEL: name: load_constant_v16s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; ; GFX10-LABEL: name: load_constant_v16s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -842,18 +902,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; ; GFX7-LABEL: name: load_constant_v8s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; ; GFX8-LABEL: name: load_constant_v8s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] + ; ; GFX10-LABEL: name: load_constant_v8s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -887,18 +950,21 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -931,18 +997,21 @@ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] + ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -975,6 +1044,7 @@ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} @@ -982,12 +1052,14 @@ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -1021,12 +1093,14 @@ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] + ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} @@ -1034,6 +1108,7 @@ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -1067,6 +1142,7 @@ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 ; GFX6-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} @@ -1074,6 +1150,7 @@ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 ; GFX7-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} @@ -1081,6 +1158,7 @@ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 ; GFX8-NEXT: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] + ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -1117,10 +1195,11 @@ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} @@ -1131,10 +1210,11 @@ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} @@ -1145,10 +1225,11 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -1186,10 +1267,11 @@ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) ; GFX6-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7-NEXT: {{ $}} @@ -1202,10 +1284,11 @@ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX7-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX7-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) ; GFX7-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} @@ -1218,10 +1301,11 @@ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) ; GFX8-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] + ; ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir @@ -20,6 +20,7 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -27,6 +28,7 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -60,6 +62,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -67,6 +70,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -103,6 +107,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -110,6 +115,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -156,6 +162,7 @@ ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -173,6 +180,7 @@ ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -219,6 +227,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -246,6 +255,7 @@ ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -284,6 +294,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10-NEXT: {{ $}} @@ -311,6 +322,7 @@ ; GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1 ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX11: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX11-NEXT: {{ $}} @@ -347,6 +359,7 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -354,6 +367,7 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -387,6 +401,7 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -394,6 +409,7 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -432,11 +448,12 @@ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -449,11 +466,12 @@ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -466,7 +484,7 @@ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) @@ -497,6 +515,7 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -504,6 +523,7 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -537,6 +557,7 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -549,11 +570,12 @@ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -586,6 +608,7 @@ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -593,6 +616,7 @@ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -630,11 +654,12 @@ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -647,11 +672,12 @@ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -664,7 +690,7 @@ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) @@ -700,11 +726,12 @@ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -717,11 +744,12 @@ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -734,7 +762,7 @@ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) @@ -770,11 +798,12 @@ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -787,11 +816,12 @@ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -804,7 +834,7 @@ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) @@ -839,11 +869,12 @@ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} @@ -856,11 +887,12 @@ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} @@ -873,7 +905,7 @@ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 ; GFX11-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc - ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc + ; GFX11-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) @@ -900,11 +932,13 @@ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX10: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX11: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] @@ -929,10 +963,12 @@ ; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX9-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX10-LABEL: name: load_global_s32_from_undef_vgpr ; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX10-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10-NEXT: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; ; GFX11-LABEL: name: load_global_s32_from_undef_vgpr ; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GFX11-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -24,6 +24,7 @@ ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] + ; ; WAVE32-LABEL: name: or_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -61,6 +62,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; WAVE32-LABEL: name: or_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -93,6 +95,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; WAVE32-LABEL: name: or_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -125,6 +128,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] + ; ; WAVE32-LABEL: name: or_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -157,6 +161,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; WAVE32-LABEL: name: or_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -187,6 +192,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] + ; ; WAVE32-LABEL: name: or_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -217,6 +223,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; WAVE32-LABEL: name: or_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -247,6 +254,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] + ; ; WAVE32-LABEL: name: or_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -277,6 +285,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] + ; ; WAVE32-LABEL: name: or_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -307,6 +316,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] + ; ; WAVE32-LABEL: name: or_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -337,6 +347,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] + ; ; WAVE32-LABEL: name: or_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -369,6 +380,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; WAVE64-NEXT: [[OR:%[0-9]+]]:vgpr(s64) = G_OR [[COPY]], [[COPY1]] ; WAVE64-NEXT: S_ENDPGM 0, implicit [[OR]](s64) + ; ; WAVE32-LABEL: name: or_s64_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} @@ -403,6 +415,7 @@ ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] + ; ; WAVE32-LABEL: name: or_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -444,11 +457,12 @@ ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B64_]] ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -456,7 +470,7 @@ ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] @@ -492,10 +506,11 @@ ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]] + ; ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -503,7 +518,7 @@ ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir @@ -25,9 +25,10 @@ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX8-LABEL: name: gep_p0_sgpr_sgpr ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} @@ -38,9 +39,10 @@ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: gep_p0_sgpr_sgpr ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} @@ -51,9 +53,10 @@ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE64-NEXT: {{ $}} @@ -64,9 +67,10 @@ ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE32-NEXT: {{ $}} @@ -77,7 +81,7 @@ ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 @@ -105,9 +109,10 @@ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX8-LABEL: name: gep_p0_vgpr_vgpr ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -118,9 +123,10 @@ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: gep_p0_vgpr_vgpr ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -131,9 +137,10 @@ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE64-LABEL: name: gep_p0_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE64-NEXT: {{ $}} @@ -144,9 +151,10 @@ ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE32-LABEL: name: gep_p0_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE32-NEXT: {{ $}} @@ -157,8 +165,8 @@ ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -185,9 +193,10 @@ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX8-LABEL: name: gep_p0_sgpr_vgpr ; GFX8: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -198,9 +207,10 @@ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: gep_p0_sgpr_vgpr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -211,9 +221,10 @@ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX10-WAVE64-NEXT: {{ $}} @@ -224,9 +235,10 @@ ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 ; GFX10-WAVE32-NEXT: {{ $}} @@ -237,8 +249,8 @@ ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -260,35 +272,39 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX8-LABEL: name: gep_p3_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX9-LABEL: name: gep_p3_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -310,15 +326,17 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; ; GFX8-LABEL: name: gep_p3_vgpr_vgpr ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; ; GFX9-LABEL: name: gep_p3_vgpr_vgpr ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -326,6 +344,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] + ; ; GFX10-WAVE64-LABEL: name: gep_p3_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0, $vgpr1 ; GFX10-WAVE64-NEXT: {{ $}} @@ -333,6 +352,7 @@ ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] + ; ; GFX10-WAVE32-LABEL: name: gep_p3_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0, $vgpr1 ; GFX10-WAVE32-NEXT: {{ $}} @@ -360,15 +380,17 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX6-NEXT: S_ENDPGM 0, implicit %2 + ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; ; GFX8-LABEL: name: gep_p3_sgpr_vgpr ; GFX8: liveins: $sgpr0, $vgpr0 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec - ; GFX8-NEXT: S_ENDPGM 0, implicit %2 + ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec + ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]] + ; ; GFX9-LABEL: name: gep_p3_sgpr_vgpr ; GFX9: liveins: $sgpr0, $vgpr0 ; GFX9-NEXT: {{ $}} @@ -376,6 +398,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] + ; ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_vgpr ; GFX10-WAVE64: liveins: $sgpr0, $vgpr0 ; GFX10-WAVE64-NEXT: {{ $}} @@ -383,6 +406,7 @@ ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-WAVE64-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] + ; ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_vgpr ; GFX10-WAVE32: liveins: $sgpr0, $vgpr0 ; GFX10-WAVE32-NEXT: {{ $}} @@ -410,35 +434,39 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX8-LABEL: name: gep_p6_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX9-LABEL: name: gep_p6_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX10-WAVE64-LABEL: name: gep_p6_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX10-WAVE32-LABEL: name: gep_p6_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p6) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -460,35 +488,39 @@ ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX8-LABEL: name: gep_p2_sgpr_sgpr ; GFX8: liveins: $sgpr0, $sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX9-LABEL: name: gep_p2_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX9-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX10-WAVE64-LABEL: name: gep_p2_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE64-NEXT: {{ $}} ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; ; GFX10-WAVE32-LABEL: name: gep_p2_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0, $sgpr1 ; GFX10-WAVE32-NEXT: {{ $}} ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10-WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def dead $scc ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]] %0:sgpr(p2) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -515,9 +547,10 @@ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX6-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX6-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX8-LABEL: name: gep_p999_sgpr_sgpr ; GFX8: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} @@ -528,9 +561,10 @@ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX8-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX8-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: gep_p999_sgpr_sgpr ; GFX9: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX9-NEXT: {{ $}} @@ -541,9 +575,10 @@ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX9-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX9-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE64-LABEL: name: gep_p999_sgpr_sgpr ; GFX10-WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE64-NEXT: {{ $}} @@ -554,9 +589,10 @@ ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE64-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE32-LABEL: name: gep_p999_sgpr_sgpr ; GFX10-WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX10-WAVE32-NEXT: {{ $}} @@ -567,7 +603,7 @@ ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE32-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc - ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc + ; GFX10-WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def dead $scc, implicit $scc ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p999) = COPY $sgpr0_sgpr1 @@ -595,9 +631,10 @@ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX6-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX6-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX6-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX8-LABEL: name: gep_p999_vgpr_vgpr ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-NEXT: {{ $}} @@ -608,9 +645,10 @@ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX8-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX8-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX9-LABEL: name: gep_p999_vgpr_vgpr ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -621,9 +659,10 @@ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX9-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX9-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE64-LABEL: name: gep_p999_vgpr_vgpr ; GFX10-WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE64-NEXT: {{ $}} @@ -634,9 +673,10 @@ ; GFX10-WAVE64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE64-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; ; GFX10-WAVE32-LABEL: name: gep_p999_vgpr_vgpr ; GFX10-WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-WAVE32-NEXT: {{ $}} @@ -647,8 +687,8 @@ ; GFX10-WAVE32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX10-WAVE32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 ; GFX10-WAVE32-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1 + ; GFX10-WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10-WAVE32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 ; GFX10-WAVE32-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p999) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -15,7 +15,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -38,7 +38,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -252645136 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -252645136 @@ -61,7 +61,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -1 @@ -84,7 +84,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 0 @@ -107,7 +107,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2147483648 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -2147483648 @@ -130,7 +130,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1073741824 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -1073741824 @@ -153,7 +153,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -2 @@ -176,7 +176,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -4 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -4 @@ -199,7 +199,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -8 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -8 @@ -222,7 +222,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -16 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -16 @@ -245,7 +245,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -536870912 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -536870912 @@ -268,7 +268,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -315,7 +315,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 0 @@ -340,7 +340,7 @@ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096 @@ -365,7 +365,7 @@ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808 @@ -419,7 +419,7 @@ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def dead $scc ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 4294967296 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext-inreg.mir @@ -225,7 +225,7 @@ ; GCN: liveins: $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]].sub0, 31, implicit-def $scc + ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]].sub0, 31, implicit-def dead $scc ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir @@ -128,7 +128,7 @@ ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], 31, implicit-def $scc + ; GCN-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], 31, implicit-def dead $scc ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1 ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-trunc.v2s16.mir @@ -20,32 +20,34 @@ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX6-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc + ; GFX6-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def dead $scc ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; GFX8-LABEL: name: trunc_sgpr_v2s32_to_v2s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX8-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc + ; GFX8-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def dead $scc ; GFX8-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] + ; ; GFX11-LABEL: name: trunc_sgpr_v2s32_to_v2s16 ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def $scc + ; GFX11-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def $scc + ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX11-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_LSHL_B32_]], [[S_AND_B32_]], implicit-def dead $scc ; GFX11-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(<2 x s16>) = G_TRUNC %0 @@ -72,6 +74,7 @@ ; GFX6-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; GFX6-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_LSHLREV_B32_e64_]], [[V_AND_B32_e64_]], implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_OR_B32_e64_]] + ; ; GFX8-LABEL: name: trunc_vgpr_v2s32_to_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -80,6 +83,7 @@ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX8-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY2]], 0, 5, 2, 4, implicit $exec, implicit [[COPY1]](tied-def 0) ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_sdwa]] + ; ; GFX11-LABEL: name: trunc_vgpr_v2s32_to_v2s16 ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir @@ -29,6 +29,7 @@ ; WAVE64-NEXT: $scc = COPY [[COPY4]] ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]] + ; ; WAVE32-LABEL: name: uadde_s32_s1_sss ; WAVE32: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE32-NEXT: {{ $}} @@ -74,6 +75,7 @@ ; WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; WAVE32-LABEL: name: uadde_s32_s1_vvv ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} @@ -94,3 +96,85 @@ %7:vgpr(s32) = G_SELECT %6, %0, %1 S_ENDPGM 0, implicit %5, implicit %7 ... + +--- +name: uadde_s32_s1_sss_unused_co +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; WAVE64-LABEL: name: uadde_s32_s1_sss_unused_co + ; WAVE64: liveins: $sgpr0, $sgpr1, $sgpr2 + ; WAVE64-NEXT: {{ $}} + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE64-NEXT: $scc = COPY [[COPY3]] + ; WAVE64-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def dead $scc, implicit $scc + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]] + ; + ; WAVE32-LABEL: name: uadde_s32_s1_sss_unused_co + ; WAVE32: liveins: $sgpr0, $sgpr1, $sgpr2 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE32-NEXT: $scc = COPY [[COPY3]] + ; WAVE32-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def dead $scc, implicit $scc + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_ADDC_U32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = G_CONSTANT i32 0 + %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3 + %5:sgpr(s32), %6:sgpr(s32) = G_UADDE %0, %1, %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: uadde_s32_s1_vvv_unused_co +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; WAVE64-LABEL: name: uadde_s32_s1_vvv_unused_co + ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2 + ; WAVE64-NEXT: {{ $}} + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]] + ; + ; WAVE32-LABEL: name: uadde_s32_s1_vvv_unused_co + ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_CONSTANT i32 0 + %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4 + S_ENDPGM 0, implicit %5 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir @@ -29,6 +29,7 @@ ; WAVE64-NEXT: $scc = COPY [[COPY4]] ; WAVE64-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]] + ; ; WAVE32-LABEL: name: usube_s32_s1_sss ; WAVE32: liveins: $sgpr0, $sgpr1, $sgpr2 ; WAVE32-NEXT: {{ $}} @@ -74,6 +75,7 @@ ; WAVE64-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec ; WAVE64-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; ; WAVE32-LABEL: name: usube_s32_s1_vvv ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 ; WAVE32-NEXT: {{ $}} @@ -94,3 +96,85 @@ %7:vgpr(s32) = G_SELECT %6, %0, %1 S_ENDPGM 0, implicit %5, implicit %7 ... + +--- +name: usube_s32_s1_sss_unused_co +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; WAVE64-LABEL: name: usube_s32_s1_sss_unused_co + ; WAVE64: liveins: $sgpr0, $sgpr1, $sgpr2 + ; WAVE64-NEXT: {{ $}} + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE64-NEXT: $scc = COPY [[COPY3]] + ; WAVE64-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def dead $scc, implicit $scc + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]] + ; + ; WAVE32-LABEL: name: usube_s32_s1_sss_unused_co + ; WAVE32: liveins: $sgpr0, $sgpr1, $sgpr2 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE32-NEXT: $scc = COPY [[COPY3]] + ; WAVE32-NEXT: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def dead $scc, implicit $scc + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_SUBB_U32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = G_CONSTANT i32 0 + %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3 + %5:sgpr(s32), %6:sgpr(s32) = G_USUBE %0, %1, %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: usube_s32_s1_vvv_unused_co +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; WAVE64-LABEL: name: usube_s32_s1_vvv_unused_co + ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2 + ; WAVE64-NEXT: {{ $}} + ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]] + ; + ; WAVE32-LABEL: name: usube_s32_s1_vvv_unused_co + ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 + ; WAVE32-NEXT: {{ $}} + ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32-NEXT: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_CONSTANT i32 0 + %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4 + S_ENDPGM 0, implicit %5 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -24,6 +24,7 @@ ; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] + ; ; WAVE32-LABEL: name: xor_s1_vcc_vcc_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -62,6 +63,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] + ; ; WAVE32-LABEL: name: xor_s1_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -94,6 +96,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] + ; ; WAVE32-LABEL: name: xor_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -126,6 +129,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; ; WAVE32-LABEL: name: xor_s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -158,6 +162,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] + ; ; WAVE32-LABEL: name: xor_s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -188,6 +193,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] + ; ; WAVE32-LABEL: name: xor_s64_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -218,6 +224,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; WAVE64-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]] + ; ; WAVE32-LABEL: name: xor_v2s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 ; WAVE32-NEXT: {{ $}} @@ -248,6 +255,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] + ; ; WAVE32-LABEL: name: xor_v2s32_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -278,6 +286,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[COPY]], [[COPY1]], implicit-def dead $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] + ; ; WAVE32-LABEL: name: xor_v4s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; WAVE32-NEXT: {{ $}} @@ -308,6 +317,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; ; WAVE32-LABEL: name: xor_s32_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -338,6 +348,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]] + ; ; WAVE32-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -370,6 +381,7 @@ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 ; WAVE64-NEXT: [[XOR:%[0-9]+]]:vgpr(s64) = G_XOR [[COPY]], [[COPY1]] ; WAVE64-NEXT: S_ENDPGM 0, implicit [[XOR]](s64) + ; ; WAVE32-LABEL: name: xor_s64_vgpr_vgpr_vgpr ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; WAVE32-NEXT: {{ $}} @@ -404,6 +416,7 @@ ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] + ; ; WAVE32-LABEL: name: xor_s1_vcc_copy_to_vcc ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -445,11 +458,12 @@ ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B64_]] ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]] + ; ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -457,7 +471,7 @@ ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] @@ -493,10 +507,11 @@ ; WAVE64-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]] + ; ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32-NEXT: {{ $}} @@ -504,7 +519,7 @@ ; WAVE32-NEXT: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc + ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def dead $scc ; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir @@ -14,7 +14,7 @@ ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def dead $scc ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_AND_B32_]] ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] %0:sgpr(s32) = COPY $sgpr0 @@ -37,7 +37,7 @@ ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 @@ -232,7 +232,7 @@ ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll @@ -15,7 +15,7 @@ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 40 ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @lds - ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_1]], [[S_MOV_B32_]], implicit-def dead $scc ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 ; GFX9: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 36, 0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]]