diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3255,6 +3255,7 @@ Opc == AMDGPU::V_MAC_LEGACY_F32_e64 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64; + bool Src0Literal = false; switch (Opc) { default: @@ -3281,7 +3282,7 @@ return nullptr; if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0)) - return nullptr; + Src0Literal = true; break; } @@ -3319,7 +3320,7 @@ }; int64_t Imm; - if (getFoldableImm(Src2, Imm, &DefMI)) { + if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) { unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32) : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); @@ -3339,7 +3340,7 @@ unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32) : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); - if (getFoldableImm(Src1, Imm, &DefMI)) { + if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) { if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) .add(*Dst) @@ -3353,7 +3354,11 @@ return MIB; } } - if (getFoldableImm(Src0, Imm, &DefMI)) { + if (Src0Literal || getFoldableImm(Src0, Imm, &DefMI)) { + if (Src0Literal) { + Imm = Src0->getImm(); + DefMI = nullptr; + } if (pseudoToMCOpcode(NewOpc) != -1 && isOperandLegal( MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0), @@ -3366,12 +3371,19 @@ updateLiveVariables(LV, MI, *MIB); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *MIB); - killDef(); + if (DefMI) + killDef(); return MIB; } } } + // VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma + // because VOP3 does not allow a literal operand. + // TODO: Remove this restriction for GFX10. + if (Src0Literal) + return nullptr; + unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64 : IsF64 ? AMDGPU::V_FMA_F64_e64 : IsLegacy diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -1102,66 +1102,65 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8 +; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, s8 ; GISEL-NEXT: s_sub_u32 s6, 0, s8 ; GISEL-NEXT: s_cselect_b32 s4, 1, 0 -; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 -; GISEL-NEXT: v_mov_b32_e32 v6, v4 +; GISEL-NEXT: v_madmk_f32 v6, v4, 0x4f800000, v5 ; GISEL-NEXT: s_and_b32 s4, s4, 1 -; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 +; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v4 ; GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000 ; GISEL-NEXT: s_bfe_i32 s5, -1, 0x10000 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v5 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 ; GISEL-NEXT: v_mov_b32_e32 v5, s4 ; GISEL-NEXT: v_mov_b32_e32 v4, s5 -; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 ; GISEL-NEXT: s_sub_u32 s9, 0, s8 ; GISEL-NEXT: s_cselect_b32 s4, 1, 0 -; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 ; GISEL-NEXT: v_trunc_f32_e32 v8, v8 -; GISEL-NEXT: s_and_b32 s4, s4, 1 ; GISEL-NEXT: v_trunc_f32_e32 v9, v9 +; GISEL-NEXT: s_and_b32 s4, s4, 1 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 ; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9 ; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v10, s6, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 ; GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; GISEL-NEXT: s_subb_u32 s10, 0, 0 -; GISEL-NEXT: v_mul_lo_u32 v10, s9, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, s9, v9 ; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000 ; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_lo_u32 v11, s6, v9 -; GISEL-NEXT: v_mul_lo_u32 v12, s9, v6 -; GISEL-NEXT: v_mul_lo_u32 v13, s10, v6 -; GISEL-NEXT: v_mul_hi_u32 v14, s9, v6 -; GISEL-NEXT: v_mov_b32_e32 v15, s4 -; GISEL-NEXT: v_mul_lo_u32 v16, s6, v7 -; GISEL-NEXT: v_mul_lo_u32 v17, s7, v7 -; GISEL-NEXT: v_mul_hi_u32 v18, s6, v7 +; GISEL-NEXT: v_mul_lo_u32 v12, s6, v6 +; GISEL-NEXT: v_mul_lo_u32 v13, s7, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, s6, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, s9, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, s10, v7 +; GISEL-NEXT: v_mul_hi_u32 v17, s9, v7 +; GISEL-NEXT: v_mov_b32_e32 v18, s4 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 ; GISEL-NEXT: v_mul_lo_u32 v13, v8, v12 ; GISEL-NEXT: v_mul_hi_u32 v19, v6, v12 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v16 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v9, v15 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v16 -; GISEL-NEXT: v_mul_hi_u32 v16, v9, v16 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v7, v11 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v7, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v9, v15 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v7, v11 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14 ; GISEL-NEXT: v_mul_lo_u32 v14, v6, v10 -; GISEL-NEXT: v_mul_lo_u32 v17, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v10 ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v19 @@ -1170,124 +1169,124 @@ ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] ; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 ; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11 -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v17, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; GISEL-NEXT: v_mul_hi_u32 v18, v7, v11 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 +; GISEL-NEXT: v_mul_hi_u32 v17, v7, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v15 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v17 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v16 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, s9, v6 -; GISEL-NEXT: v_mul_lo_u32 v12, s10, v6 -; GISEL-NEXT: v_mul_hi_u32 v13, s9, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v16 +; GISEL-NEXT: v_mul_lo_u32 v10, s6, v6 +; GISEL-NEXT: v_mul_lo_u32 v12, s7, v6 +; GISEL-NEXT: v_mul_hi_u32 v13, s6, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, s6, v7 -; GISEL-NEXT: v_mul_lo_u32 v14, s7, v7 -; GISEL-NEXT: v_mul_hi_u32 v16, s6, v7 -; GISEL-NEXT: v_mul_lo_u32 v17, s9, v8 -; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, s9, v7 +; GISEL-NEXT: v_mul_lo_u32 v14, s10, v7 +; GISEL-NEXT: v_mul_hi_u32 v15, s9, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, s6, v8 +; GISEL-NEXT: v_mul_lo_u32 v17, v8, v10 ; GISEL-NEXT: v_mul_hi_u32 v19, v6, v10 ; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 -; GISEL-NEXT: v_mul_lo_u32 v17, s6, v9 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, s9, v9 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v9, v11 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 ; GISEL-NEXT: v_mul_hi_u32 v13, v7, v11 ; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v7, v14 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v14 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13 ; GISEL-NEXT: v_mul_lo_u32 v13, v6, v12 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12 -; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v18, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v19 ; GISEL-NEXT: v_mul_hi_u32 v13, v6, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v19 +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v17, v19 ; GISEL-NEXT: v_mul_lo_u32 v19, v9, v14 -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v16, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; GISEL-NEXT: v_mul_hi_u32 v17, v7, v14 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_mul_hi_u32 v16, v7, v14 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v19, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 ; GISEL-NEXT: v_mov_b32_e32 v19, s11 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 ; GISEL-NEXT: v_mul_hi_u32 v14, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v16 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v15 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v12, v2, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 +; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v12, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7 -; GISEL-NEXT: v_mul_hi_u32 v13, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 -; GISEL-NEXT: v_mul_lo_u32 v14, v2, v8 -; GISEL-NEXT: v_mul_lo_u32 v16, v3, v8 -; GISEL-NEXT: v_mul_hi_u32 v17, v2, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8 -; GISEL-NEXT: v_mul_lo_u32 v18, v0, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v13, v2, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7 +; GISEL-NEXT: v_mul_lo_u32 v14, v0, v8 +; GISEL-NEXT: v_mul_lo_u32 v15, v1, v8 +; GISEL-NEXT: v_mul_hi_u32 v16, v0, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 +; GISEL-NEXT: v_mul_lo_u32 v17, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 -; GISEL-NEXT: v_mul_hi_u32 v13, v0, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v3, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v16, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v15, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] ; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v17 +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v16 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v17 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v16 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc @@ -1299,60 +1298,60 @@ ; GISEL-NEXT: v_mul_hi_u32 v6, s8, v6 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 ; GISEL-NEXT: v_mul_lo_u32 v13, s8, v7 -; GISEL-NEXT: v_mul_lo_u32 v16, 0, v7 +; GISEL-NEXT: v_mul_lo_u32 v15, 0, v7 ; GISEL-NEXT: v_mul_hi_u32 v7, s8, v7 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 ; GISEL-NEXT: v_mul_lo_u32 v8, s8, v8 ; GISEL-NEXT: v_mul_lo_u32 v9, s8, v9 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v6, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13 -; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], v1, v7, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v7 -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v0 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v13 +; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], v3, v7, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v7 +; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v6, v15, v6, s[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2 -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v6, v18, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s8, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s8, v2 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v19, v10, vcc -; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s8, v7 -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc -; GISEL-NEXT: v_subrev_i32_e32 v12, vcc, s8, v11 -; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, v11, v12, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[4:5] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, s8, v7 +; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v19, v12, vcc +; GISEL-NEXT: v_subrev_i32_e32 v14, vcc, s8, v11 +; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v7, v10, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v11, v14, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_urem_v2i64_oddk_denom: diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir --- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir @@ -164,7 +164,7 @@ ... # GCN-LABEL: name: test_fmaak_otherimm_src0_f32 -# GCN: %2:vgpr_32 = V_FMAC_F32_e32 1120403456, %0, %2, implicit $mode, implicit $exec +# GCN: %2:vgpr_32 = V_FMAMK_F32 %0, 1120403456, %1, implicit $mode, implicit $exec --- name: test_fmaak_otherimm_src0_f32 diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir --- a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir @@ -152,7 +152,7 @@ # Non-inline immediate uses constant bus already. # GCN-LABEL: name: test_madak_otherimm_src0_f32 -# GCN: %1:vgpr_32 = V_MAC_F32_e32 1120403456, %0, %1, implicit $mode, implicit $exec +# GCN: %1:vgpr_32 = V_MADMK_F32 %0, 1120403456, %2:vgpr_32, implicit $mode, implicit $exec --- name: test_madak_otherimm_src0_f32