diff --git a/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/llvm/lib/Transforms/Utils/IntegerDivision.cpp --- a/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -53,6 +53,8 @@ // ; %urem = urem i32 %dividend, %divisor // ; %xored = xor i32 %urem, %dividend_sgn // ; %srem = sub i32 %xored, %dividend_sgn + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); Value *DividendSign = Builder.CreateAShr(Dividend, Shift); Value *DivisorSign = Builder.CreateAShr(Divisor, Shift); Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); @@ -84,6 +86,8 @@ // ; %quotient = udiv i32 %dividend, %divisor // ; %product = mul i32 %divisor, %quotient // ; %remainder = sub i32 %dividend, %product + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); Value *Product = Builder.CreateMul(Divisor, Quotient); Value *Remainder = Builder.CreateSub(Dividend, Product); @@ -126,6 +130,8 @@ // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr // ; %tmp4 = xor i32 %q_mag, %q_sgn // ; %q = sub i32 %tmp4, %q_sgn + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); Value *Tmp = Builder.CreateAShr(Dividend, Shift); Value *Tmp1 = Builder.CreateAShr(Divisor, Shift); Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); @@ -241,12 +247,14 @@ // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) // ; %sr = sub nsw i32 %tmp0, %tmp1 // ; %ret0_4 = icmp ugt i32 %sr, 31 - // ; %ret0 = or i1 %ret0_3, %ret0_4 + // ; %ret0 = select i1 %ret0_3, i1 1, i1 %ret0_4 // ; %retDividend = icmp eq i32 %sr, 31 // ; %retVal = select i1 %ret0, i32 0, i32 %dividend - // ; %earlyRet = or i1 %ret0, %retDividend + // ; %earlyRet = select i1 %ret0, i1 1, i1 %retDividend // ; br i1 %earlyRet, label %end, label %bb1 Builder.SetInsertPoint(SpecialCases); + Divisor = Builder.CreateFreeze(Divisor); + Dividend = Builder.CreateFreeze(Dividend); Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); @@ -254,10 +262,10 @@ Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True}); Value *SR = Builder.CreateSub(Tmp0, Tmp1); Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); - Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); + Value *Ret0 = Builder.CreateLogicalOr(Ret0_3, Ret0_4); Value *RetDividend = Builder.CreateICmpEQ(SR, MSB); Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); - Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); + Value *EarlyRet = Builder.CreateLogicalOr(Ret0, RetDividend); Builder.CreateCondBr(EarlyRet, End, BB1); // ; bb1: ; preds = %special-cases diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -168,12 +168,11 @@ ; GCN-IR-NEXT: s_sub_u32 s10, s14, s18 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s15, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1 -; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s16, s10, 1 @@ -524,7 +523,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -694,7 +693,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 31 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -761,7 +760,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 23 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -828,7 +827,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 25 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -910,7 +909,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[10:11], 40 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v2, s10 ; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v3, s6 @@ -926,7 +925,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| ; GCN-IR-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-IR-NEXT: v_bfe_i32 v2, v2, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v3, 31, v2 @@ -977,20 +976,20 @@ ; GCN-IR-LABEL: s_test_sdiv24_48: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_sext_i32_i16 s7, s7 +; GCN-IR-NEXT: s_sext_i32_i16 s1, s1 +; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 24 +; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 24 ; GCN-IR-NEXT: s_ashr_i32 s0, s7, 31 -; GCN-IR-NEXT: s_sext_i32_i16 s3, s3 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[6:7], 24 ; GCN-IR-NEXT: s_mov_b32 s1, s0 -; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[2:3], 24 -; GCN-IR-NEXT: s_ashr_i32 s2, s3, 31 -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[0:1], s[8:9] +; GCN-IR-NEXT: s_ashr_i32 s2, s9, 31 +; GCN-IR-NEXT: s_xor_b64 s[6:7], s[0:1], s[6:7] ; GCN-IR-NEXT: s_mov_b32 s3, s2 ; GCN-IR-NEXT: s_sub_u32 s12, s6, s0 ; GCN-IR-NEXT: s_subb_u32 s13, s7, s0 -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[2:3], s[10:11] +; GCN-IR-NEXT: s_xor_b64 s[6:7], s[2:3], s[8:9] ; GCN-IR-NEXT: s_sub_u32 s6, s6, s2 ; GCN-IR-NEXT: s_subb_u32 s7, s7, s2 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[6:7], 0 @@ -1008,12 +1007,11 @@ ; GCN-IR-NEXT: s_sub_u32 s10, s14, s18 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s15, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1 -; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s16, s10, 1 @@ -1208,20 +1206,19 @@ ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3] ; GCN-IR-NEXT: s_sub_u32 s2, s2, s4 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s4 -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 -; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 -; GCN-IR-NEXT: s_min_u32 s10, s6, s7 +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 +; GCN-IR-NEXT: s_add_i32 s8, s8, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 +; GCN-IR-NEXT: s_min_u32 s10, s8, s9 ; GCN-IR-NEXT: s_add_u32 s8, s10, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[14:15] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[14:15], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[16:17], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[14:15], s[16:17], s[14:15] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[14:15] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -1823,7 +1820,7 @@ ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -1880,7 +1877,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v2, s0 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s8 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -140,12 +140,11 @@ ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -202,8 +201,8 @@ ; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 ; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s10, -1 @@ -505,7 +504,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -576,7 +575,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -701,7 +700,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -839,7 +838,7 @@ ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s3, v0 @@ -995,17 +994,17 @@ ; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[6:7], 31 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 31 -; GCN-IR-NEXT: s_ashr_i32 s0, s7, 31 -; GCN-IR-NEXT: s_ashr_i32 s6, s1, 31 +; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[0:1], 31 +; GCN-IR-NEXT: s_ashr_i32 s0, s3, 31 +; GCN-IR-NEXT: s_ashr_i32 s10, s7, 31 ; GCN-IR-NEXT: s_mov_b32 s1, s0 -; GCN-IR-NEXT: s_mov_b32 s7, s6 +; GCN-IR-NEXT: s_mov_b32 s11, s10 ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1] -; GCN-IR-NEXT: s_xor_b64 s[8:9], s[8:9], s[6:7] +; GCN-IR-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GCN-IR-NEXT: s_sub_u32 s2, s2, s0 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s0 -; GCN-IR-NEXT: s_sub_u32 s8, s8, s6 -; GCN-IR-NEXT: s_subb_u32 s9, s9, s6 +; GCN-IR-NEXT: s_sub_u32 s8, s6, s10 +; GCN-IR-NEXT: s_subb_u32 s9, s7, s10 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 @@ -1021,12 +1020,11 @@ ; GCN-IR-NEXT: s_sub_u32 s10, s12, s16 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s13, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1 -; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s10, 1 @@ -1143,22 +1141,22 @@ ; GCN-IR-LABEL: s_test_srem24_48: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_sext_i32_i16 s7, s7 -; GCN-IR-NEXT: s_sext_i32_i16 s3, s3 -; GCN-IR-NEXT: s_ashr_i32 s0, s7, 31 -; GCN-IR-NEXT: s_ashr_i32 s12, s3, 31 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[6:7], 24 -; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[2:3], 24 +; GCN-IR-NEXT: s_sext_i32_i16 s1, s1 +; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[6:7], 24 +; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[0:1], 24 +; GCN-IR-NEXT: s_ashr_i32 s0, s3, 31 +; GCN-IR-NEXT: s_ashr_i32 s8, s7, 31 ; GCN-IR-NEXT: s_mov_b32 s1, s0 -; GCN-IR-NEXT: s_mov_b32 s13, s12 -; GCN-IR-NEXT: s_xor_b64 s[2:3], s[8:9], s[0:1] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[10:11], s[12:13] +; GCN-IR-NEXT: s_mov_b32 s9, s8 +; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1] +; GCN-IR-NEXT: s_xor_b64 s[6:7], s[6:7], s[8:9] ; GCN-IR-NEXT: s_sub_u32 s2, s2, s0 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s0 -; GCN-IR-NEXT: s_sub_u32 s6, s6, s12 -; GCN-IR-NEXT: s_subb_u32 s7, s7, s12 +; GCN-IR-NEXT: s_sub_u32 s6, s6, s8 +; GCN-IR-NEXT: s_subb_u32 s7, s7, s8 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[6:7], 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 @@ -1174,12 +1172,11 @@ ; GCN-IR-NEXT: s_sub_u32 s10, s12, s16 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s13, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1 -; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s10, 1 @@ -1376,20 +1373,19 @@ ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7] ; GCN-IR-NEXT: s_sub_u32 s4, s2, s6 ; GCN-IR-NEXT: s_subb_u32 s5, s3, s6 -; GCN-IR-NEXT: s_flbit_i32_b32 s2, s4 -; GCN-IR-NEXT: s_add_i32 s2, s2, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s3, s5 -; GCN-IR-NEXT: s_min_u32 s8, s2, s3 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s4 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s5 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[2:3], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[2:3] ; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1993,7 +1989,7 @@ ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 @@ -2055,7 +2051,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GCN-IR-NEXT: s_movk_i32 s3, 0x5b7f -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s3 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: s_mov_b32 s5, s1 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -141,12 +141,11 @@ ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -795,14 +794,14 @@ ; GCN-IR-NEXT: s_and_b32 s1, s1, 0xffff ; GCN-IR-NEXT: s_and_b32 s0, s0, 0xff000000 ; GCN-IR-NEXT: s_lshr_b64 s[8:9], s[2:3], 24 -; GCN-IR-NEXT: s_lshr_b64 s[2:3], s[0:1], 24 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 +; GCN-IR-NEXT: s_lshr_b64 s[0:1], s[0:1], 24 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[0:1], 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 0 -; GCN-IR-NEXT: s_mov_b64 s[0:1], 0 +; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[10:11] -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s0 ; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s1 ; GCN-IR-NEXT: s_min_u32 s10, s6, s7 ; GCN-IR-NEXT: s_flbit_i32_b32 s6, s8 ; GCN-IR-NEXT: s_add_i32 s6, s6, 32 @@ -811,12 +810,11 @@ ; GCN-IR-NEXT: s_sub_u32 s6, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[6:7], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[6:7], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s6, 1 @@ -828,37 +826,37 @@ ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[8:9], s12 -; GCN-IR-NEXT: s_add_u32 s15, s2, -1 -; GCN-IR-NEXT: s_addc_u32 s16, s3, -1 -; GCN-IR-NEXT: s_not_b64 s[0:1], s[10:11] -; GCN-IR-NEXT: s_add_u32 s8, s0, s14 -; GCN-IR-NEXT: s_addc_u32 s9, s1, 0 +; GCN-IR-NEXT: s_add_u32 s15, s0, -1 +; GCN-IR-NEXT: s_addc_u32 s16, s1, -1 +; GCN-IR-NEXT: s_not_b64 s[2:3], s[10:11] +; GCN-IR-NEXT: s_add_u32 s8, s2, s14 +; GCN-IR-NEXT: s_addc_u32 s9, s3, 0 ; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: s_mov_b32 s1, 0 +; GCN-IR-NEXT: s_mov_b32 s3, 0 ; GCN-IR-NEXT: .LBB7_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: s_lshl_b64 s[12:13], s[12:13], 1 -; GCN-IR-NEXT: s_lshr_b32 s0, s7, 31 +; GCN-IR-NEXT: s_lshr_b32 s2, s7, 31 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[0:1] +; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[2:3] ; GCN-IR-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] -; GCN-IR-NEXT: s_sub_u32 s0, s15, s12 -; GCN-IR-NEXT: s_subb_u32 s0, s16, s13 -; GCN-IR-NEXT: s_ashr_i32 s10, s0, 31 +; GCN-IR-NEXT: s_sub_u32 s2, s15, s12 +; GCN-IR-NEXT: s_subb_u32 s2, s16, s13 +; GCN-IR-NEXT: s_ashr_i32 s10, s2, 31 ; GCN-IR-NEXT: s_mov_b32 s11, s10 -; GCN-IR-NEXT: s_and_b32 s0, s10, 1 -; GCN-IR-NEXT: s_and_b64 s[10:11], s[10:11], s[2:3] +; GCN-IR-NEXT: s_and_b32 s2, s10, 1 +; GCN-IR-NEXT: s_and_b64 s[10:11], s[10:11], s[0:1] ; GCN-IR-NEXT: s_sub_u32 s12, s12, s10 ; GCN-IR-NEXT: s_subb_u32 s13, s13, s11 ; GCN-IR-NEXT: s_add_u32 s8, s8, 1 ; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 0 -; GCN-IR-NEXT: s_mov_b64 s[10:11], s[0:1] +; GCN-IR-NEXT: s_mov_b64 s[10:11], s[2:3] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_3 ; GCN-IR-NEXT: .LBB7_4: ; %Flow3 -; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GCN-IR-NEXT: s_lshl_b64 s[0:1], s[6:7], 1 +; GCN-IR-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] ; GCN-IR-NEXT: v_mov_b32_e32 v0, s0 ; GCN-IR-NEXT: v_mov_b32_e32 v1, s1 ; GCN-IR-NEXT: s_branch .LBB7_6 @@ -990,20 +988,19 @@ ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1439,20 +1436,19 @@ ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s10, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s10, s6, s7 ; GCN-IR-NEXT: s_sub_u32 s6, 59, s10 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[8:9], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[8:9], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[8:9], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s8, s6, 1 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -140,12 +140,11 @@ ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -202,8 +201,8 @@ ; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 ; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s10, -1 @@ -817,20 +816,19 @@ ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1005,20 +1003,19 @@ ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_sub_u32 s6, 59, s8 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1071,7 +1068,7 @@ ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, 24 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s4, s0 diff --git a/llvm/unittests/Transforms/Utils/IntegerDivisionTest.cpp b/llvm/unittests/Transforms/Utils/IntegerDivisionTest.cpp --- a/llvm/unittests/Transforms/Utils/IntegerDivisionTest.cpp +++ b/llvm/unittests/Transforms/Utils/IntegerDivisionTest.cpp @@ -43,7 +43,7 @@ Value *Ret = Builder.CreateRet(Div); expandDivision(cast(Div)); - EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::Freeze); Instruction* Quotient = dyn_cast(cast(Ret)->getOperand(0)); EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::Sub); @@ -73,7 +73,7 @@ Value *Ret = Builder.CreateRet(Div); expandDivision(cast(Div)); - EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::Freeze); Instruction* Quotient = dyn_cast(cast(Ret)->getOperand(0)); EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::PHI); @@ -103,7 +103,7 @@ Value *Ret = Builder.CreateRet(Rem); expandRemainder(cast(Rem)); - EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::Freeze); Instruction* Remainder = dyn_cast(cast(Ret)->getOperand(0)); EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub); @@ -133,7 +133,7 @@ Value *Ret = Builder.CreateRet(Rem); expandRemainder(cast(Rem)); - EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::Freeze); Instruction* Remainder = dyn_cast(cast(Ret)->getOperand(0)); EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub); @@ -164,7 +164,7 @@ Value *Ret = Builder.CreateRet(Div); expandDivision(cast(Div)); - EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::Freeze); Instruction* Quotient = dyn_cast(cast(Ret)->getOperand(0)); EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::Sub); @@ -194,7 +194,7 @@ Value *Ret = Builder.CreateRet(Div); expandDivision(cast(Div)); - EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::Freeze); Instruction* Quotient = dyn_cast(cast(Ret)->getOperand(0)); EXPECT_TRUE(Quotient && Quotient->getOpcode() == Instruction::PHI); @@ -224,7 +224,7 @@ Value *Ret = Builder.CreateRet(Rem); expandRemainder(cast(Rem)); - EXPECT_TRUE(BB->front().getOpcode() == Instruction::AShr); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::Freeze); Instruction* Remainder = dyn_cast(cast(Ret)->getOperand(0)); EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub); @@ -254,7 +254,7 @@ Value *Ret = Builder.CreateRet(Rem); expandRemainder(cast(Rem)); - EXPECT_TRUE(BB->front().getOpcode() == Instruction::ICmp); + EXPECT_TRUE(BB->front().getOpcode() == Instruction::Freeze); Instruction* Remainder = dyn_cast(cast(Ret)->getOperand(0)); EXPECT_TRUE(Remainder && Remainder->getOpcode() == Instruction::Sub);