diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -252,6 +252,12 @@ applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl &Operands); + /// Transform G_UNMERGE Constant -> Constant1, Constant2, ... + bool matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts); + bool applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -412,6 +412,15 @@ (apply [{ return Helper.applyCombineFAbsOfFAbs(*${root}, ${matchinfo}); }]) >; +// Fold (unmerge cst) -> cst1, cst2, ... +def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector">; +def unmerge_cst : GICombineRule< + (defs root:$d, unmerge_cst_matchinfo:$info), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeConstant(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -443,4 +452,4 @@ width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, not_cmp_fold, opt_brcond_by_inverting_cond, - unmerge_merge, fabs_fabs_fold]>; + unmerge_merge, fabs_fabs_fold, unmerge_cst]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1612,6 +1612,48 @@ return true; } +bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts) { + unsigned SrcIdx = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(SrcIdx).getReg(); + MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); + if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT && + SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT) + return false; + // Break down the big constant in smaller ones. + const MachineOperand &CstVal = SrcInstr->getOperand(1); + APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT + ? CstVal.getCImm()->getValue() + : CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + + LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned ShiftAmt = Dst0Ty.getSizeInBits(); + // Unmerge a constant. + for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) { + Csts.emplace_back(Val.trunc(ShiftAmt)); + Val = Val.lshr(ShiftAmt); + } + + return true; +} + +bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl &Csts) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && + "Expected an unmerge"); + assert((MI.getNumOperands() - 1 == Csts.size()) && + "Not enough operands to replace all defs"); + unsigned NumElems = MI.getNumOperands() - 1; + Builder.setInstrAndDebugLoc(MI); + for (unsigned Idx = 0; Idx < NumElems; ++Idx) { + Register DstReg = MI.getOperand(Idx).getReg(); + Builder.buildConstant(DstReg, Csts[Idx]); + } + + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -181,3 +181,114 @@ $w1 = COPY %4(s32) ... +# Unmerge a constant into a bunch of smaller constant. +# Constant is 0x0102030405060708090a0b0c0d0e0f10 and we break it down into +# bytes: +# cst1 0x10 +# cst2 0x0f +# cst3 0x0e +# ... +--- +name: test_combine_unmerge_cst +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_cst + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 16 + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 15 + ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 14 + ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 13 + ; CHECK: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 12 + ; CHECK: [[C5:%[0-9]+]]:_(s8) = G_CONSTANT i8 11 + ; CHECK: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 10 + ; CHECK: [[C7:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 + ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 + ; CHECK: [[C9:%[0-9]+]]:_(s8) = G_CONSTANT i8 7 + ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6 + ; CHECK: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 + ; CHECK: [[C13:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK: [[C14:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 + ; CHECK: [[C15:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK: $b0 = COPY [[C]](s8) + ; CHECK: $b1 = COPY [[C1]](s8) + ; CHECK: $b2 = COPY [[C2]](s8) + ; CHECK: $b3 = COPY [[C3]](s8) + ; CHECK: $b4 = COPY [[C4]](s8) + ; CHECK: $b5 = COPY [[C5]](s8) + ; CHECK: $b6 = COPY [[C6]](s8) + ; CHECK: $b7 = COPY [[C7]](s8) + ; CHECK: $b8 = COPY [[C8]](s8) + ; CHECK: $b9 = COPY [[C9]](s8) + ; CHECK: $b10 = COPY [[C10]](s8) + ; CHECK: $b11 = COPY [[C11]](s8) + ; CHECK: $b12 = COPY [[C12]](s8) + ; CHECK: $b13 = COPY [[C13]](s8) + ; CHECK: $b14 = COPY [[C14]](s8) + ; CHECK: $b15 = COPY [[C15]](s8) + %0:_(s128) = G_CONSTANT i128 1339673755198158349044581307228491536 + %1:_(s8),%2:_(s8),%3:_(s8),%4:_(s8),%5:_(s8),%6:_(s8),%7:_(s8),%8:_(s8),%9:_(s8),%10:_(s8),%11:_(s8),%12:_(s8),%13:_(s8),%14:_(s8),%15:_(s8),%16:_(s8) = G_UNMERGE_VALUES %0(s128) + $b0 = COPY %1(s8) + $b1 = COPY %2(s8) + $b2 = COPY %3(s8) + $b3 = COPY %4(s8) + $b4 = COPY %5(s8) + $b5 = COPY %6(s8) + $b6 = COPY %7(s8) + $b7 = COPY %8(s8) + $b8 = COPY %9(s8) + $b9 = COPY %10(s8) + $b10 = COPY %11(s8) + $b11 = COPY %12(s8) + $b12 = COPY %13(s8) + $b13 = COPY %14(s8) + $b14 = COPY %15(s8) + $b15 = COPY %16(s8) +... + +# Unmerge a constant on a non-power of 2 type into a bunch of smaller constant. +# Constant is a 3 | 2 | 1 in chunks of 13-bit. +--- +name: test_combine_unmerge_cst_36bit +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_cst_36bit + ; CHECK: [[C:%[0-9]+]]:_(s13) = G_CONSTANT i13 1 + ; CHECK: [[C1:%[0-9]+]]:_(s13) = G_CONSTANT i13 2 + ; CHECK: [[C2:%[0-9]+]]:_(s13) = G_CONSTANT i13 3 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[C]](s13) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[C1]](s13) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s16) = G_ZEXT [[C2]](s13) + ; CHECK: $h0 = COPY [[ZEXT]](s16) + ; CHECK: $h1 = COPY [[ZEXT1]](s16) + ; CHECK: $h2 = COPY [[ZEXT2]](s16) + %0:_(s39) = G_CONSTANT i39 201342977 + %1:_(s13),%2:_(s13),%3:_(s13) = G_UNMERGE_VALUES %0(s39) + %4:_(s16) = G_ZEXT %1(s13) + %5:_(s16) = G_ZEXT %2(s13) + %6:_(s16) = G_ZEXT %3(s13) + $h0 = COPY %4(s16) + $h1 = COPY %5(s16) + $h2 = COPY %6(s16) +... + +# Unmerge floating point constant. +--- +name: test_combine_unmerge_fpcst +body: | + bb.1: + ; CHECK-LABEL: name: test_combine_unmerge_fpcst + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 + ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK: $h0 = COPY [[C]](s16) + ; CHECK: $h1 = COPY [[C1]](s16) + ; CHECK: $h2 = COPY [[C2]](s16) + ; CHECK: $h3 = COPY [[C3]](s16) + %0:_(s64) = G_FCONSTANT double 0x0004000300020001 + %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64) + $h0 = COPY %1(s16) + $h1 = COPY %2(s16) + $h2 = COPY %3(s16) + $h3 = COPY %4(s16) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -4999,24 +4999,22 @@ ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: s_brev_b32 s8, 1 -; GFX6-NEXT: v_mov_b32_e32 v1, s8 -; GFX6-NEXT: v_add_i32_e64 v4, s[6:7], 0, v0 +; GFX6-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX6-NEXT: v_add_i32_e64 v1, s[6:7], 0, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX6-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v6 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v3, v7, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_add_i32_e64 v6, s[6:7], 0, v2 -; GFX6-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX6-NEXT: v_add_i32_e64 v3, s[6:7], 0, v2 +; GFX6-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_saddsat_v2i64: @@ -5027,24 +5025,22 @@ ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: s_brev_b32 s8, 1 -; GFX8-NEXT: v_mov_b32_e32 v1, s8 -; GFX8-NEXT: v_add_u32_e64 v4, s[6:7], 0, v0 +; GFX8-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX8-NEXT: v_add_u32_e64 v1, s[6:7], 0, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v6 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v3, v7, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 -; GFX8-NEXT: v_add_u32_e64 v6, s[6:7], 0, v2 -; GFX8-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX8-NEXT: v_add_u32_e64 v3, s[6:7], 0, v2 +; GFX8-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_saddsat_v2i64: @@ -5055,56 +5051,53 @@ ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: s_brev_b32 s8, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: v_add_co_u32_e64 v4, s[6:7], 0, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX9-NEXT: v_add_co_u32_e64 v1, s[6:7], 0, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v2, v6 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v3, v7, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7] ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_add_co_u32_e64 v6, s[6:7], 0, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX9-NEXT: v_add_co_u32_e64 v3, s[6:7], 0, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_saddsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v9, v0 -; GFX10-NEXT: v_mov_b32_e32 v10, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, v2 -; GFX10-NEXT: v_mov_b32_e32 v14, v3 +; GFX10-NEXT: v_mov_b32_e32 v14, v0 +; GFX10-NEXT: v_mov_b32_e32 v15, v1 +; GFX10-NEXT: v_mov_b32_e32 v17, v2 +; GFX10-NEXT: v_mov_b32_e32 v18, v3 ; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[4:5] -; GFX10-NEXT: v_add_co_u32_e64 v19, vcc_lo, v9, v4 -; GFX10-NEXT: s_brev_b32 s8, 1 -; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, v10, v5, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v23, vcc_lo, v13, v6 +; GFX10-NEXT: v_add_co_u32_e64 v8, vcc_lo, v14, v4 ; GFX10-NEXT: v_cmp_gt_i64_e64 s6, 0, v[6:7] -; GFX10-NEXT: v_add_co_ci_u32_e32 v24, vcc_lo, v14, v7, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo +; GFX10-NEXT: v_add_co_u32_e64 v19, vcc_lo, v17, v6 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo +; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 +; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15] ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v20 -; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[19:20], v[9:10] -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v24 -; GFX10-NEXT: v_add_co_u32_e64 v4, s5, v0, 0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s5, s8, v0, s5 -; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[23:24], v[13:14] -; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v1, 0 +; GFX10-NEXT: v_add_co_u32_e64 v1, s5, v12, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 +; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[19:20], v[17:18] +; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v0, 0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, s8, v1, s7 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v19, v4, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v20, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc_lo ; GFX10-NEXT: s_xor_b32 vcc_lo, s6, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v23, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v24, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v20, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) ret <2 x i64> %result @@ -6225,15 +6218,14 @@ ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0, v0 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6-NEXT: s_brev_b32 s4, 1 -; GFX6-NEXT: v_mov_b32_e32 v8, s4 +; GFX6-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6248,43 +6240,42 @@ ; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX6-NEXT: s_cselect_b32 s5, 1, 0 +; GFX6-NEXT: s_cselect_b32 s4, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX6-NEXT: s_cmp_eq_u32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX6-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: s_cselect_b32 s5, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX6-NEXT: v_ashr_i64 v[12:13], v[10:11], s6 ; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX6-NEXT: s_and_b32 s5, 1, s5 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX6-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], s6 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[10:11], s8 -; GFX6-NEXT: s_and_b32 s6, 1, s5 +; GFX6-NEXT: s_and_b32 s6, 1, s4 ; GFX6-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX6-NEXT: v_ashr_i64 v[4:5], v[10:11], s7 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s6, 1, s9 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s5, 1, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_and_b32 s4, 1, s4 +; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX6-NEXT: v_mov_b32_e32 v12, s4 ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6334,15 +6325,14 @@ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: s_brev_b32 s4, 1 -; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6357,43 +6347,42 @@ ; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX8-NEXT: s_cselect_b32 s5, 1, 0 +; GFX8-NEXT: s_cselect_b32 s4, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX8-NEXT: s_cmp_eq_u32 s6, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: s_cselect_b32 s5, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX8-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX8-NEXT: s_and_b32 s5, 1, s5 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX8-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX8-NEXT: s_and_b32 s6, 1, s5 +; GFX8-NEXT: s_and_b32 s6, 1, s4 ; GFX8-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX8-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s6, 1, s9 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s5, 1, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_and_b32 s4, 1, s4 +; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0, v4 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX8-NEXT: v_mov_b32_e32 v12, s4 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6443,15 +6432,14 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: s_brev_b32 s4, 1 -; GFX9-NEXT: v_mov_b32_e32 v8, s4 +; GFX9-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6466,43 +6454,42 @@ ; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX9-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-NEXT: s_cselect_b32 s4, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX9-NEXT: s_cmp_eq_u32 s6, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, 0, v[12:13] -; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[14:15] ; GFX9-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX9-NEXT: s_and_b32 s5, 1, s5 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX9-NEXT: s_and_b32 s6, 1, s5 +; GFX9-NEXT: s_and_b32 s6, 1, s4 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX9-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s6, 1, s9 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s5, 1, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_and_b32 s4, 1, s4 +; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, 0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v12, s4 ; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v12, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6561,7 +6548,6 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s8 ; GFX10-NEXT: v_xor_b32_e32 v9, v10, v20 -; GFX10-NEXT: s_brev_b32 s8, 1 ; GFX10-NEXT: s_cmp_lt_u32 s5, 64 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc_lo @@ -6571,7 +6557,7 @@ ; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, s8, v1, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX10-NEXT: v_add_co_u32_e64 v8, s4, v26, v12 ; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v27, v13, s4 @@ -6619,7 +6605,7 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, 0, v3, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v19, v21, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, s8, v4, s4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, 0x80000000, v4, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v5, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v6, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v7, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1057,10 +1057,9 @@ ; CHECK-LABEL: v_sdiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1075,9 +1074,9 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1104,9 +1103,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1114,6 +1113,7 @@ ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -1502,10 +1502,9 @@ ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s6, 0x1000 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xfffff000 +; CGP-NEXT: s_movk_i32 s6, 0xf000 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -1520,19 +1519,19 @@ ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_movk_i32 s7, 0x1000 ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -1553,9 +1552,9 @@ ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -1563,7 +1562,7 @@ ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -1588,6 +1587,7 @@ ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -1606,9 +1606,9 @@ ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 @@ -1617,8 +1617,8 @@ ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v10 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 @@ -1627,7 +1627,7 @@ ; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc @@ -1646,9 +1646,9 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 @@ -1677,9 +1677,9 @@ ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -1734,9 +1734,9 @@ ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 @@ -1745,8 +1745,8 @@ ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 @@ -1755,7 +1755,7 @@ ; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc @@ -1780,10 +1780,9 @@ ; CHECK-LABEL: v_sdiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1798,9 +1797,9 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1827,9 +1826,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1837,6 +1836,7 @@ ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -2225,10 +2225,9 @@ ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s6, 0x12d8fb -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_mov_b32 s6, 0xffed2705 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -2243,19 +2242,19 @@ ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_mov_b32 s7, 0x12d8fb ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -2276,9 +2275,9 @@ ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -2286,7 +2285,7 @@ ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -2311,6 +2310,7 @@ ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -2329,9 +2329,9 @@ ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 @@ -2340,8 +2340,8 @@ ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v10 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 +; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 @@ -2350,7 +2350,7 @@ ; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc @@ -2369,9 +2369,9 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 @@ -2400,9 +2400,9 @@ ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -2457,9 +2457,9 @@ ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 @@ -2468,8 +2468,8 @@ ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 @@ -2478,7 +2478,7 @@ ; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1037,10 +1037,9 @@ ; CHECK-LABEL: v_srem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1055,9 +1054,9 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1084,9 +1083,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1094,6 +1093,7 @@ ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -1478,10 +1478,9 @@ ; CGP-LABEL: v_srem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s6, 0x1000 -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xfffff000 +; CGP-NEXT: s_movk_i32 s6, 0xf000 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -1496,19 +1495,19 @@ ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_movk_i32 s7, 0x1000 ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -1529,9 +1528,9 @@ ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -1539,7 +1538,7 @@ ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -1564,6 +1563,7 @@ ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -1582,9 +1582,9 @@ ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 @@ -1592,20 +1592,20 @@ ; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v0 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v9 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s6, v9 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 ; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc @@ -1619,9 +1619,9 @@ ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 @@ -1651,9 +1651,9 @@ ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -1708,9 +1708,9 @@ ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 @@ -1718,20 +1718,20 @@ ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s6, v2 +; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v7 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v7 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc @@ -1752,10 +1752,9 @@ ; CHECK-LABEL: v_srem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 @@ -1770,9 +1769,9 @@ ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s7, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1799,9 +1798,9 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v6 +; CHECK-NEXT: v_mul_hi_u32 v10, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v9, s6, v2 ; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 @@ -1809,6 +1808,7 @@ ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v7 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v9 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9 +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb ; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] ; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 @@ -2193,10 +2193,9 @@ ; CGP-LABEL: v_srem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s6, 0x12d8fb -; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_mov_b32 s6, 0xffed2705 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_mov_b32_e32 v7, v4 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 @@ -2211,19 +2210,19 @@ ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; CGP-NEXT: s_mov_b32 s7, 0x12d8fb ; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 @@ -2244,9 +2243,9 @@ ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v8, v9, vcc ; CGP-NEXT: v_mul_lo_u32 v11, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v10 -; CGP-NEXT: v_mul_hi_u32 v14, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v13, s7, v7 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v7 ; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v9 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 @@ -2254,7 +2253,7 @@ ; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v13 ; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 @@ -2279,6 +2278,7 @@ ; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 @@ -2297,9 +2297,9 @@ ; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 @@ -2307,20 +2307,20 @@ ; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v0 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v9 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s6, v9 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 ; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc @@ -2334,9 +2334,9 @@ ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc ; CGP-NEXT: v_mul_lo_u32 v8, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 @@ -2366,9 +2366,9 @@ ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_addc_u32_e64 v9, s[4:5], v7, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v10, -1, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v12, s7, v4 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v4 ; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 ; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 @@ -2423,9 +2423,9 @@ ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s6, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 @@ -2433,20 +2433,20 @@ ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 ; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v2 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s6, v2 +; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s6, v7 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s6, v7 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -4984,24 +4984,22 @@ ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX6-NEXT: s_brev_b32 s8, 1 -; GFX6-NEXT: v_mov_b32_e32 v1, s8 -; GFX6-NEXT: v_add_i32_e64 v4, s[6:7], 0, v0 +; GFX6-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX6-NEXT: v_add_i32_e64 v1, s[6:7], 0, v0 ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX6-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v2, v6 ; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v3, v7, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX6-NEXT: v_mov_b32_e32 v3, s8 -; GFX6-NEXT: v_add_i32_e64 v6, s[6:7], 0, v2 -; GFX6-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX6-NEXT: v_add_i32_e64 v3, s[6:7], 0, v2 +; GFX6-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_ssubsat_v2i64: @@ -5012,24 +5010,22 @@ ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX8-NEXT: s_brev_b32 s8, 1 -; GFX8-NEXT: v_mov_b32_e32 v1, s8 -; GFX8-NEXT: v_add_u32_e64 v4, s[6:7], 0, v0 +; GFX8-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX8-NEXT: v_add_u32_e64 v1, s[6:7], 0, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX8-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v2, v6 ; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v3, v7, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 -; GFX8-NEXT: v_add_u32_e64 v6, s[6:7], 0, v2 -; GFX8-NEXT: v_addc_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX8-NEXT: v_add_u32_e64 v3, s[6:7], 0, v2 +; GFX8-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ssubsat_v2i64: @@ -5040,56 +5036,53 @@ ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] ; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 -; GFX9-NEXT: s_brev_b32 s8, 1 -; GFX9-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NEXT: v_add_co_u32_e64 v4, s[6:7], 0, v0 +; GFX9-NEXT: v_bfrev_b32_e32 v10, 1 +; GFX9-NEXT: v_add_co_u32_e64 v1, s[6:7], 0, v0 ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v4, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc ; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v2, v6 ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v7, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] ; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v5 -; GFX9-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-NEXT: v_add_co_u32_e64 v6, s[6:7], 0, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v3, s[6:7], v2, v3, s[6:7] +; GFX9-NEXT: v_add_co_u32_e64 v3, s[6:7], 0, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7] ; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc -; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_ssubsat_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v9, v0 -; GFX10-NEXT: v_mov_b32_e32 v10, v1 -; GFX10-NEXT: v_mov_b32_e32 v13, v2 -; GFX10-NEXT: v_mov_b32_e32 v14, v3 +; GFX10-NEXT: v_mov_b32_e32 v14, v0 +; GFX10-NEXT: v_mov_b32_e32 v15, v1 +; GFX10-NEXT: v_mov_b32_e32 v17, v2 +; GFX10-NEXT: v_mov_b32_e32 v18, v3 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[4:5] -; GFX10-NEXT: v_sub_co_u32_e64 v19, vcc_lo, v9, v4 -; GFX10-NEXT: s_brev_b32 s8, 1 -; GFX10-NEXT: v_sub_co_ci_u32_e32 v20, vcc_lo, v10, v5, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v23, vcc_lo, v13, v6 +; GFX10-NEXT: v_sub_co_u32_e64 v8, vcc_lo, v14, v4 ; GFX10-NEXT: v_cmp_lt_i64_e64 s6, 0, v[6:7] -; GFX10-NEXT: v_sub_co_ci_u32_e32 v24, vcc_lo, v14, v7, vcc_lo +; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo +; GFX10-NEXT: v_sub_co_u32_e64 v19, vcc_lo, v17, v6 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_sub_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo +; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 +; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15] ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v20 -; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[19:20], v[9:10] -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v24 -; GFX10-NEXT: v_add_co_u32_e64 v4, s5, v0, 0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s5, s8, v0, s5 -; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[23:24], v[13:14] -; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v1, 0 +; GFX10-NEXT: v_add_co_u32_e64 v1, s5, v12, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 +; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[19:20], v[17:18] +; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v0, 0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, s8, v1, s7 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v19, v4, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v20, v5, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc_lo ; GFX10-NEXT: s_xor_b32 vcc_lo, s6, s5 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v23, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v24, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v3, v20, v3, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) ret <2 x i64> %result @@ -6210,15 +6203,14 @@ ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0, v0 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6-NEXT: s_brev_b32 s4, 1 -; GFX6-NEXT: v_mov_b32_e32 v8, s4 +; GFX6-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX6-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6233,43 +6225,42 @@ ; GFX6-NEXT: s_cmp_lt_u32 s6, 64 ; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX6-NEXT: s_cselect_b32 s5, 1, 0 +; GFX6-NEXT: s_cselect_b32 s4, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX6-NEXT: s_cmp_eq_u32 s6, 0 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX6-NEXT: s_cselect_b32 s9, 1, 0 +; GFX6-NEXT: s_cselect_b32 s5, 1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX6-NEXT: v_ashr_i64 v[12:13], v[10:11], s6 ; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX6-NEXT: s_and_b32 s5, 1, s5 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX6-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], s6 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[10:11], s8 -; GFX6-NEXT: s_and_b32 s6, 1, s5 +; GFX6-NEXT: s_and_b32 s6, 1, s4 ; GFX6-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX6-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX6-NEXT: v_ashr_i64 v[4:5], v[10:11], s7 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s6, 1, s9 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX6-NEXT: s_and_b32 s5, 1, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: s_and_b32 s4, 1, s4 +; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX6-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0, v4 ; GFX6-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX6-NEXT: v_mov_b32_e32 v12, s4 ; GFX6-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX6-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX6-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6319,15 +6310,14 @@ ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: s_brev_b32 s4, 1 -; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6342,43 +6332,42 @@ ; GFX8-NEXT: s_cmp_lt_u32 s6, 64 ; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX8-NEXT: s_cselect_b32 s5, 1, 0 +; GFX8-NEXT: s_cselect_b32 s4, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX8-NEXT: s_cmp_eq_u32 s6, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX8-NEXT: s_cselect_b32 s9, 1, 0 +; GFX8-NEXT: s_cselect_b32 s5, 1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX8-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX8-NEXT: s_and_b32 s5, 1, s5 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX8-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX8-NEXT: s_and_b32 s6, 1, s5 +; GFX8-NEXT: s_and_b32 s6, 1, s4 ; GFX8-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX8-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX8-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s6, 1, s9 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX8-NEXT: s_and_b32 s5, 1, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: s_and_b32 s4, 1, s4 +; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0, v4 ; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GFX8-NEXT: v_mov_b32_e32 v12, s4 ; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v12, vcc +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, v7, v20, vcc ; GFX8-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6428,15 +6417,14 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v19 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v2, v11, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v11, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: s_brev_b32 s4, 1 -; GFX9-NEXT: v_mov_b32_e32 v8, s4 +; GFX9-NEXT: v_bfrev_b32_e32 v20, 1 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v8, 1, v10 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc @@ -6451,43 +6439,42 @@ ; GFX9-NEXT: s_cmp_lt_u32 s6, 64 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] -; GFX9-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-NEXT: s_cselect_b32 s4, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] ; GFX9-NEXT: s_cmp_eq_u32 s6, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GFX9-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] ; GFX9-NEXT: v_ashrrev_i64 v[12:13], s6, v[10:11] ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 +; GFX9-NEXT: s_and_b32 s5, 1, s5 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc ; GFX9-NEXT: v_xor_b32_e32 v14, v5, v4 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[8:9] ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s8, v[10:11] -; GFX9-NEXT: s_and_b32 s6, 1, s5 +; GFX9-NEXT: s_and_b32 s6, 1, s4 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v6 ; GFX9-NEXT: v_or_b32_e32 v7, v5, v7 ; GFX9-NEXT: v_ashrrev_i64 v[4:5], s7, v[10:11] ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s6, 1, s9 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 -; GFX9-NEXT: s_and_b32 s5, 1, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: s_and_b32 s4, 1, s4 +; GFX9-NEXT: v_ashrrev_i32_e32 v15, 31, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 +; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v15, v12, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v7, v15, v13, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, 0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v12, s4 ; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v12, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v20, vcc ; GFX9-NEXT: v_and_b32_e32 v12, 1, v14 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc @@ -6546,7 +6533,6 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s8 ; GFX10-NEXT: v_xor_b32_e32 v9, v10, v20 -; GFX10-NEXT: s_brev_b32 s8, 1 ; GFX10-NEXT: s_cmp_lt_u32 s5, 64 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc_lo @@ -6556,7 +6542,7 @@ ; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, s8, v1, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX10-NEXT: v_sub_co_u32_e64 v8, s4, v26, v12 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, s4, v27, v13, s4 @@ -6604,7 +6590,7 @@ ; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, 0, v3, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v19, v21, vcc_lo -; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, s8, v4, s4 +; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s4, 0x80000000, v4, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v5, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v6, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v7, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -963,22 +963,22 @@ ; CHECK-LABEL: v_udiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 +; CHECK-NEXT: s_movk_i32 s7, 0x1000 +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1005,10 +1005,10 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1055,11 +1055,11 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 ; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 @@ -1069,16 +1069,16 @@ ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v4, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc @@ -1364,14 +1364,14 @@ ; CGP-LABEL: v_udiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s8, 0xf000 ; CGP-NEXT: s_movk_i32 s10, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 -; CGP-NEXT: s_mov_b32 s8, 0xfffff000 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 @@ -1624,22 +1624,22 @@ ; CHECK-LABEL: v_udiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s7, 0x12d8fb +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1666,10 +1666,10 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1716,11 +1716,11 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 ; CHECK-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 @@ -1730,16 +1730,16 @@ ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v4, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc @@ -2025,14 +2025,14 @@ ; CGP-LABEL: v_udiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s10, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s8, 0xffed2705 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: s_mov_b32 s10, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -949,22 +949,22 @@ ; CHECK-LABEL: v_urem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_movk_i32 s6, 0xf000 +; CHECK-NEXT: s_movk_i32 s7, 0x1000 +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -991,10 +991,10 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1041,30 +1041,30 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v3, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v3, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v4 +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s7, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc @@ -1344,14 +1344,14 @@ ; CGP-LABEL: v_urem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s8, 0xf000 ; CGP-NEXT: s_movk_i32 s10, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 -; CGP-NEXT: s_mov_b32 s8, 0xfffff000 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 @@ -1600,22 +1600,22 @@ ; CHECK-LABEL: v_urem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v2, 0 -; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 -; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s6, 0xffed2705 +; CHECK-NEXT: s_mov_b32 s7, 0x12d8fb +; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; CHECK-NEXT: v_trunc_f32_e32 v3, v3 ; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 -; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 @@ -1642,10 +1642,10 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc ; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 ; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 ; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 @@ -1692,30 +1692,30 @@ ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s7, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; CHECK-NEXT: v_mul_lo_u32 v3, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v3, s7, v3 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s7, v0 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v4 +; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s7, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc @@ -1995,14 +1995,14 @@ ; CGP-LABEL: v_urem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s10, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s8, 0xffed2705 -; CGP-NEXT: v_mov_b32_e32 v6, v5 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 +; CGP-NEXT: s_mov_b32 s10, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v6, v4 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5