diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -251,6 +251,12 @@ applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl &Operands); + /// Transform G_UNMERGE Constant -> Constant1, Constant2, ... + bool matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts); + bool applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -390,6 +390,15 @@ (apply [{ return Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]) >; +// Fold (unmerge cst) -> cst1, cst2, ... +def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector">; +def unmerge_cst : GICombineRule< + (defs root:$d, unmerge_cst_matchinfo:$info), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeConstant(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -418,4 +427,4 @@ shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, - not_cmp_fold, unmerge_merge]>; + not_cmp_fold, unmerge_merge, unmerge_cst]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1615,6 +1615,46 @@ return true; } +bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts) { + unsigned SrcIdx = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(SrcIdx).getReg(); + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + // Break down the big constant in smaller ones. + const MachineOperand &CstVal = SrcInstr->getOperand(1); + unsigned BitWidth = MRI.getType(SrcReg).getSizeInBits(); + APInt Val = CstVal.getCImm()->getValue(); + + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned ShiftAmt = Dst0Ty.getSizeInBits(); + // Unmerge a constant. + for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { + Csts.emplace_back(Val.trunc(ShiftAmt)); + Val = Val.lshr(ShiftAmt); + } + + return true; +} + +bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Csts.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Builder.buildConstant(DstReg, Csts[Idx]); + } + + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -181,3 +181,92 @@ $w1 = COPY %4(s32) ... +# Unmerge a constant into a bunch of smaller constant. +# Constant is 0x0102030405060708090a0b0c0d0e0f10 and we break it down into +# bytes: +# cst1 0x10 +# cst2 0x0f +# cst3 0x0e +# ... +--- +name: test_combine_unmerge_cst +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_cst + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 16 + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 15 + ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 14 + ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 13 + ; CHECK: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 12 + ; CHECK: [[C5:%[0-9]+]]:_(s8) = G_CONSTANT i8 11 + ; CHECK: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 10 + ; CHECK: [[C7:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 + ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 + ; CHECK: [[C9:%[0-9]+]]:_(s8) = G_CONSTANT i8 7 + ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6 + ; CHECK: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 + ; CHECK: [[C13:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK: [[C14:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 + ; CHECK: [[C15:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK: $b0 = COPY [[C]](s8) + ; CHECK: $b1 = COPY [[C1]](s8) + ; CHECK: $b2 = COPY [[C2]](s8) + ; CHECK: $b3 = COPY [[C3]](s8) + ; CHECK: $b4 = COPY [[C4]](s8) + ; CHECK: $b5 = COPY [[C5]](s8) + ; CHECK: $b6 = COPY [[C6]](s8) + ; CHECK: $b7 = COPY [[C7]](s8) + ; CHECK: $b8 = COPY [[C8]](s8) + ; CHECK: $b9 = COPY [[C9]](s8) + ; CHECK: $b10 = COPY [[C10]](s8) + ; CHECK: $b11 = COPY [[C11]](s8) + ; CHECK: $b12 = COPY [[C12]](s8) + ; CHECK: $b13 = COPY [[C13]](s8) + ; CHECK: $b14 = COPY [[C14]](s8) + ; CHECK: $b15 = COPY [[C15]](s8) + %0:_(s128) = G_CONSTANT i128 1339673755198158349044581307228491536 + %1:_(s8),%2:_(s8),%3:_(s8),%4:_(s8),%5:_(s8),%6:_(s8),%7:_(s8),%8:_(s8),%9:_(s8),%10:_(s8),%11:_(s8),%12:_(s8),%13:_(s8),%14:_(s8),%15:_(s8),%16:_(s8) = G_UNMERGE_VALUES %0(s128) + $b0 = COPY %1(s8) + $b1 = COPY %2(s8) + $b2 = COPY %3(s8) + $b3 = COPY %4(s8) + $b4 = COPY %5(s8) + $b5 = COPY %6(s8) + $b6 = COPY %7(s8) + $b7 = COPY %8(s8) + $b8 = COPY %9(s8) + $b9 = COPY %10(s8) + $b10 = COPY %11(s8) + $b11 = COPY %12(s8) + $b12 = COPY %13(s8) + $b13 = COPY %14(s8) + $b14 = COPY %15(s8) + $b15 = COPY %16(s8) +... + +# Unmerge a constant on a non-power of 2 type into a bunch of smaller constant. +# Constant is a 3 | 2 | 1 in chunks of 13-bit. +--- +name: test_combine_unmerge_cst_36bit +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_cst_36bit + ; CHECK: [[C:%[0-9]+]]:_(s13) = G_CONSTANT i13 1 + ; CHECK: [[C1:%[0-9]+]]:_(s13) = G_CONSTANT i13 2 + ; CHECK: [[C2:%[0-9]+]]:_(s13) = G_CONSTANT i13 3 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[C]](s13) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[C1]](s13) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s16) = G_ZEXT [[C2]](s13) + ; CHECK: $h0 = COPY [[ZEXT]](s16) + ; CHECK: $h1 = COPY [[ZEXT1]](s16) + ; CHECK: $h2 = COPY [[ZEXT2]](s16) + %0:_(s39) = G_CONSTANT i39 201342977 + %1:_(s13),%2:_(s13),%3:_(s13) = G_UNMERGE_VALUES %0(s39) + %4:_(s16) = G_ZEXT %1(s13) + %5:_(s16) = G_ZEXT %2(s13) + %6:_(s16) = G_ZEXT %3(s13) + $h0 = COPY %4(s16) + $h1 = COPY %5(s16) + $h2 = COPY %6(s16) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -4999,24 +4999,22 @@ ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: s_brev_b32 s8, 1 -; GFX6-NEXT: v_mov_b32_e32 v1, s8 -; GFX6-NEXT: v_add_i32_e64 v4, s[6:7], 0, v0 +; GFX6-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX6-NEXT: v_add_i32_e64 v1, s[6:7], 0, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX6-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v6 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v3, v7, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_add_i32_e64 v6, s[6:7], 0, v2 -; GFX6-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX6-NEXT: v_add_i32_e64 v3, s[6:7], 0, v2 +; GFX6-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_saddsat_v2i64: @@ -5027,24 +5025,22 @@ ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: s_brev_b32 s8, 1 -; GFX8-NEXT: v_mov_b32_e32 v1, s8 -; GFX8-NEXT: v_add_u32_e64 v4, s[6:7], 0, v0 +; GFX8-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX8-NEXT: v_add_u32_e64 v1, s[6:7], 0, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v6 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v3, v7, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 -; GFX8-NEXT: v_add_u32_e64 v6, s[6:7], 0, v2 -; GFX8-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX8-NEXT: v_add_u32_e64 v3, s[6:7], 0, v2 +; GFX8-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_saddsat_v2i64: @@ -5055,56 +5051,53 @@ ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: s_brev_b32 s8, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: v_add_co_u32_e64 v4, s[6:7], 0, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX9-NEXT: v_add_co_u32_e64 v1, s[6:7], 0, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v2, v6 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v3, v7, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_add_co_u32_e64 v6, s[6:7], 0, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX9-NEXT: v_add_co_u32_e64 v3, s[6:7], 0, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_saddsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v9, v0 -; GFX10-NEXT: v_mov_b32_e32 v10, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, v2 -; GFX10-NEXT: v_mov_b32_e32 v14, v3 +; GFX10-NEXT: v_mov_b32_e32 v14, v0 +; GFX10-NEXT: v_mov_b32_e32 v15, v1 +; GFX10-NEXT: v_mov_b32_e32 v17, v2 +; GFX10-NEXT: v_mov_b32_e32 v18, v3 ; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[4:5] -; GFX10-NEXT: v_add_co_u32_e64 v19, vcc_lo, v9, v4 -; GFX10-NEXT: s_brev_b32 s8, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, v10, v5, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v23, vcc_lo, v13, v6 +; GFX10-NEXT: v_add_co_u32_e64 v8, vcc_lo, v14, v4 ; GFX10-NEXT: v_cmp_gt_i64_e64 s6, 0, v[6:7] -; GFX10-NEXT: v_add_co_ci_u32_e32 v24, vcc_lo, v14, v7, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32_e64 v19, vcc_lo, v17, v6 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo +; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 +; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15] ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v20 -; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[19:20], v[9:10] -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v24 -; GFX10-NEXT: v_add_co_u32_e64 v4, s5, v0, 0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s5, s8, v0, s5 -; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[23:24], v[13:14] -; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v1, 0 +; GFX10-NEXT: v_add_co_u32_e64 v1, s5, v12, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 +; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[19:20], v[17:18] +; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v0, 0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, s8, v1, s7 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v19, v4, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v20, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc_lo ; GFX10-NEXT: s_xor_b32 vcc_lo, s6, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v23, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v24, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v20, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) ret <2 x i64> %result @@ -6225,15 +6218,14 @@ ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0, v0 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6-NEXT: s_brev_b32 s4, 1 -; GFX6-NEXT: v_mov_b32_e32 v8, s4 +; GFX6-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6248,43 +6240,42 @@ ; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX6-NEXT: s_cselect_b32 s5, 1, 0 +; GFX6-NEXT: s_cselect_b32 s4, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX6-NEXT: s_cmp_eq_u32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX6-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: s_cselect_b32 s5, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX6-NEXT: v_ashr_i64 v[12:13], v[10:11], s6 ; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX6-NEXT: s_and_b32 s5, 1, s5 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX6-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], s6 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[10:11], s8 -; GFX6-NEXT: s_and_b32 s6, 1, s5 +; GFX6-NEXT: s_and_b32 s6, 1, s4 ; GFX6-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX6-NEXT: v_ashr_i64 v[4:5], v[10:11], s7 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s6, 1, s9 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s5, 1, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_and_b32 s4, 1, s4 +; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX6-NEXT: v_mov_b32_e32 v12, s4 ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6334,15 +6325,14 @@ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: s_brev_b32 s4, 1 -; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6357,43 +6347,42 @@ ; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX8-NEXT: s_cselect_b32 s5, 1, 0 +; GFX8-NEXT: s_cselect_b32 s4, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX8-NEXT: s_cmp_eq_u32 s6, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: s_cselect_b32 s5, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX8-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX8-NEXT: s_and_b32 s5, 1, s5 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX8-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX8-NEXT: s_and_b32 s6, 1, s5 +; GFX8-NEXT: s_and_b32 s6, 1, s4 ; GFX8-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX8-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s6, 1, s9 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s5, 1, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_and_b32 s4, 1, s4 +; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0, v4 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX8-NEXT: v_mov_b32_e32 v12, s4 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6443,15 +6432,14 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: s_brev_b32 s4, 1 -; GFX9-NEXT: v_mov_b32_e32 v8, s4 +; GFX9-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6466,43 +6454,42 @@ ; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX9-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-NEXT: s_cselect_b32 s4, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX9-NEXT: s_cmp_eq_u32 s6, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX9-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX9-NEXT: s_and_b32 s5, 1, s5 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX9-NEXT: s_and_b32 s6, 1, s5 +; GFX9-NEXT: s_and_b32 s6, 1, s4 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX9-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s6, 1, s9 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s5, 1, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_and_b32 s4, 1, s4 +; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, 0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v12, s4 ; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v12, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6561,7 +6548,6 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s8 ; GFX10-NEXT: v_xor_b32_e32 v9, v10, v20 -; GFX10-NEXT: s_brev_b32 s8, 1 ; GFX10-NEXT: s_cmp_lt_u32 s5, 64 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc_lo @@ -6571,7 +6557,7 @@ ; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, s8, v1, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX10-NEXT: v_add_co_u32_e64 v8, s4, v26, v12 ; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v27, v13, s4 @@ -6619,7 +6605,7 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, 0, v3, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v19, v21, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, s8, v4, s4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, 0x80000000, v4, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v5, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v6, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v7, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1056,10 +1056,9 @@ ; CHECK-LABEL: v_sdiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1074,9 +1073,9 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1103,9 +1102,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1113,6 +1112,7 @@ ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -1501,10 +1501,9 @@ ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s6, 0x1000 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xfffff000 +; CGP-NEXT: s_movk_i32 s6, 0xf000 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -1519,19 +1518,19 @@ ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_movk_i32 s7, 0x1000 ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -1552,9 +1551,9 @@ ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -1562,7 +1561,7 @@ ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -1587,6 +1586,7 @@ ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -1605,9 +1605,9 @@ ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 @@ -1616,8 +1616,8 @@ ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v10 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 @@ -1626,7 +1626,7 @@ ; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc @@ -1645,9 +1645,9 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 @@ -1676,9 +1676,9 @@ ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -1733,9 +1733,9 @@ ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 @@ -1744,8 +1744,8 @@ ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 @@ -1754,7 +1754,7 @@ ; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc @@ -1779,10 +1779,9 @@ ; CHECK-LABEL: v_sdiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1797,9 +1796,9 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1826,9 +1825,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1836,6 +1835,7 @@ ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -2224,10 +2224,9 @@ ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s6, 0x12d8fb -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_mov_b32 s6, 0xffed2705 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -2242,19 +2241,19 @@ ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_mov_b32 s7, 0x12d8fb ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -2275,9 +2274,9 @@ ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -2285,7 +2284,7 @@ ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -2310,6 +2309,7 @@ ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -2328,9 +2328,9 @@ ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 @@ -2339,8 +2339,8 @@ ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v10 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 @@ -2349,7 +2349,7 @@ ; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc @@ -2368,9 +2368,9 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 @@ -2399,9 +2399,9 @@ ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -2456,9 +2456,9 @@ ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 @@ -2467,8 +2467,8 @@ ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 @@ -2477,7 +2477,7 @@ ; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1036,10 +1036,9 @@ ; CHECK-LABEL: v_srem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1054,9 +1053,9 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1083,9 +1082,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1093,6 +1092,7 @@ ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -1477,10 +1477,9 @@ ; CGP-LABEL: v_srem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s6, 0x1000 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xfffff000 +; CGP-NEXT: s_movk_i32 s6, 0xf000 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -1495,19 +1494,19 @@ ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_movk_i32 s7, 0x1000 ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -1528,9 +1527,9 @@ ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -1538,7 +1537,7 @@ ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -1563,6 +1562,7 @@ ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -1581,9 +1581,9 @@ ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 @@ -1591,20 +1591,20 @@ ; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v0 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v9 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s6, v9 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 ; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc @@ -1618,9 +1618,9 @@ ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 @@ -1650,9 +1650,9 @@ ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -1707,9 +1707,9 @@ ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 @@ -1717,20 +1717,20 @@ ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s6, v2 +; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v7 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v7 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc @@ -1751,10 +1751,9 @@ ; CHECK-LABEL: v_srem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1769,9 +1768,9 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1798,9 +1797,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1808,6 +1807,7 @@ ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -2192,10 +2192,9 @@ ; CGP-LABEL: v_srem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s6, 0x12d8fb -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_mov_b32 s6, 0xffed2705 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -2210,19 +2209,19 @@ ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_mov_b32 s7, 0x12d8fb ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -2243,9 +2242,9 @@ ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -2253,7 +2252,7 @@ ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -2278,6 +2277,7 @@ ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -2296,9 +2296,9 @@ ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 @@ -2306,20 +2306,20 @@ ; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v0 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v9 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s6, v9 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 ; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc @@ -2333,9 +2333,9 @@ ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 @@ -2365,9 +2365,9 @@ ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -2422,9 +2422,9 @@ ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 @@ -2432,20 +2432,20 @@ ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s6, v2 +; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v7 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v7 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -4984,24 +4984,22 @@ ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: s_brev_b32 s8, 1 -; GFX6-NEXT: v_mov_b32_e32 v1, s8 -; GFX6-NEXT: v_add_i32_e64 v4, s[6:7], 0, v0 +; GFX6-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX6-NEXT: v_add_i32_e64 v1, s[6:7], 0, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX6-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v2, v6 ; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v3, v7, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_add_i32_e64 v6, s[6:7], 0, v2 -; GFX6-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX6-NEXT: v_add_i32_e64 v3, s[6:7], 0, v2 +; GFX6-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_ssubsat_v2i64: @@ -5012,24 +5010,22 @@ ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: s_brev_b32 s8, 1 -; GFX8-NEXT: v_mov_b32_e32 v1, s8 -; GFX8-NEXT: v_add_u32_e64 v4, s[6:7], 0, v0 +; GFX8-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX8-NEXT: v_add_u32_e64 v1, s[6:7], 0, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v2, v6 ; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v3, v7, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 -; GFX8-NEXT: v_add_u32_e64 v6, s[6:7], 0, v2 -; GFX8-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX8-NEXT: v_add_u32_e64 v3, s[6:7], 0, v2 +; GFX8-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ssubsat_v2i64: @@ -5040,56 +5036,53 @@ ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: s_brev_b32 s8, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: v_add_co_u32_e64 v4, s[6:7], 0, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX9-NEXT: v_add_co_u32_e64 v1, s[6:7], 0, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v2, v6 ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v7, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_add_co_u32_e64 v6, s[6:7], 0, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX9-NEXT: v_add_co_u32_e64 v3, s[6:7], 0, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_ssubsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v9, v0 -; GFX10-NEXT: v_mov_b32_e32 v10, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, v2 -; GFX10-NEXT: v_mov_b32_e32 v14, v3 +; GFX10-NEXT: v_mov_b32_e32 v14, v0 +; GFX10-NEXT: v_mov_b32_e32 v15, v1 +; GFX10-NEXT: v_mov_b32_e32 v17, v2 +; GFX10-NEXT: v_mov_b32_e32 v18, v3 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[4:5] -; GFX10-NEXT: v_sub_co_u32_e64 v19, vcc_lo, v9, v4 -; GFX10-NEXT: s_brev_b32 s8, 1 -; GFX10-NEXT: v_sub_co_ci_u32_e32 v20, vcc_lo, v10, v5, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v23, vcc_lo, v13, v6 +; GFX10-NEXT: v_sub_co_u32_e64 v8, vcc_lo, v14, v4 ; GFX10-NEXT: v_cmp_lt_i64_e64 s6, 0, v[6:7] -; GFX10-NEXT: v_sub_co_ci_u32_e32 v24, vcc_lo, v14, v7, vcc_lo +; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo +; GFX10-NEXT: v_sub_co_u32_e64 v19, vcc_lo, v17, v6 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_sub_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo +; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 +; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15] ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v20 -; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[19:20], v[9:10] -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v24 -; GFX10-NEXT: v_add_co_u32_e64 v4, s5, v0, 0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s5, s8, v0, s5 -; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[23:24], v[13:14] -; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v1, 0 +; GFX10-NEXT: v_add_co_u32_e64 v1, s5, v12, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 +; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[19:20], v[17:18] +; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v0, 0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, s8, v1, s7 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v19, v4, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v20, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc_lo ; GFX10-NEXT: s_xor_b32 vcc_lo, s6, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v23, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v24, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v20, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) ret <2 x i64> %result @@ -6210,15 +6203,14 @@ ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0, v0 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6-NEXT: s_brev_b32 s4, 1 -; GFX6-NEXT: v_mov_b32_e32 v8, s4 +; GFX6-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6233,43 +6225,42 @@ ; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX6-NEXT: s_cselect_b32 s5, 1, 0 +; GFX6-NEXT: s_cselect_b32 s4, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX6-NEXT: s_cmp_eq_u32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: s_cselect_b32 s5, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX6-NEXT: v_ashr_i64 v[12:13], v[10:11], s6 ; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX6-NEXT: s_and_b32 s5, 1, s5 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX6-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], s6 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[10:11], s8 -; GFX6-NEXT: s_and_b32 s6, 1, s5 +; GFX6-NEXT: s_and_b32 s6, 1, s4 ; GFX6-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX6-NEXT: v_ashr_i64 v[4:5], v[10:11], s7 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s6, 1, s9 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s5, 1, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_and_b32 s4, 1, s4 +; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX6-NEXT: v_mov_b32_e32 v12, s4 ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6319,15 +6310,14 @@ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: s_brev_b32 s4, 1 -; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6342,43 +6332,42 @@ ; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX8-NEXT: s_cselect_b32 s5, 1, 0 +; GFX8-NEXT: s_cselect_b32 s4, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX8-NEXT: s_cmp_eq_u32 s6, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: s_cselect_b32 s5, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX8-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX8-NEXT: s_and_b32 s5, 1, s5 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX8-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX8-NEXT: s_and_b32 s6, 1, s5 +; GFX8-NEXT: s_and_b32 s6, 1, s4 ; GFX8-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX8-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s6, 1, s9 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s5, 1, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_and_b32 s4, 1, s4 +; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0, v4 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX8-NEXT: v_mov_b32_e32 v12, s4 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6428,15 +6417,14 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: s_brev_b32 s4, 1 -; GFX9-NEXT: v_mov_b32_e32 v8, s4 +; GFX9-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6451,43 +6439,42 @@ ; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX9-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-NEXT: s_cselect_b32 s4, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX9-NEXT: s_cmp_eq_u32 s6, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX9-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX9-NEXT: s_and_b32 s5, 1, s5 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX9-NEXT: s_and_b32 s6, 1, s5 +; GFX9-NEXT: s_and_b32 s6, 1, s4 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX9-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s6, 1, s9 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s5, 1, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_and_b32 s4, 1, s4 +; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, 0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v12, s4 ; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v12, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6546,7 +6533,6 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s8 ; GFX10-NEXT: v_xor_b32_e32 v9, v10, v20 -; GFX10-NEXT: s_brev_b32 s8, 1 ; GFX10-NEXT: s_cmp_lt_u32 s5, 64 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc_lo @@ -6556,7 +6542,7 @@ ; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, s8, v1, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX10-NEXT: v_sub_co_u32_e64 v8, s4, v26, v12 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, s4, v27, v13, s4 @@ -6604,7 +6590,7 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, 0, v3, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v19, v21, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, s8, v4, s4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, 0x80000000, v4, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v5, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v6, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v7, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -962,22 +962,22 @@ ; CHECK-LABEL: v_udiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 +; CHECK-NEXT: s_movk_i32 s7, 0x1000 +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1004,10 +1004,10 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1054,11 +1054,11 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 ; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 @@ -1068,16 +1068,16 @@ ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v4, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc @@ -1363,14 +1363,14 @@ ; CGP-LABEL: v_udiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s8, 0xf000 ; CGP-NEXT: s_movk_i32 s10, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 -; CGP-NEXT: s_mov_b32 s8, 0xfffff000 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 @@ -1623,22 +1623,22 @@ ; CHECK-LABEL: v_udiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s7, 0x12d8fb +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1665,10 +1665,10 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1715,11 +1715,11 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 ; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 @@ -1729,16 +1729,16 @@ ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v4, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc @@ -2024,14 +2024,14 @@ ; CGP-LABEL: v_udiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s10, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s8, 0xffed2705 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: s_mov_b32 s10, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -948,22 +948,22 @@ ; CHECK-LABEL: v_urem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 +; CHECK-NEXT: s_movk_i32 s7, 0x1000 +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -990,10 +990,10 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1040,30 +1040,30 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v3, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v3, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v4 +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s7, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc @@ -1343,14 +1343,14 @@ ; CGP-LABEL: v_urem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s8, 0xf000 ; CGP-NEXT: s_movk_i32 s10, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 -; CGP-NEXT: s_mov_b32 s8, 0xfffff000 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 @@ -1599,22 +1599,22 @@ ; CHECK-LABEL: v_urem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s7, 0x12d8fb +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1641,10 +1641,10 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1691,30 +1691,30 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v3, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v3, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v4 +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s7, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc @@ -1994,14 +1994,14 @@ ; CGP-LABEL: v_urem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s10, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s8, 0xffed2705 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: s_mov_b32 s10, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5