diff --git a/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/llvm/lib/Transforms/Utils/IntegerDivision.cpp --- a/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -53,6 +53,8 @@ // ; %urem = urem i32 %dividend, %divisor // ; %xored = xor i32 %urem, %dividend_sgn // ; %srem = sub i32 %xored, %dividend_sgn + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); Value *DividendSign = Builder.CreateAShr(Dividend, Shift); Value *DivisorSign = Builder.CreateAShr(Divisor, Shift); Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); @@ -84,6 +86,8 @@ // ; %quotient = udiv i32 %dividend, %divisor // ; %product = mul i32 %divisor, %quotient // ; %remainder = sub i32 %dividend, %product + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); Value *Product = Builder.CreateMul(Divisor, Quotient); Value *Remainder = Builder.CreateSub(Dividend, Product); @@ -126,6 +130,8 @@ // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr // ; %tmp4 = xor i32 %q_mag, %q_sgn // ; %q = sub i32 %tmp4, %q_sgn + Dividend = Builder.CreateFreeze(Dividend); + Divisor = Builder.CreateFreeze(Divisor); Value *Tmp = Builder.CreateAShr(Dividend, Shift); Value *Tmp1 = Builder.CreateAShr(Divisor, Shift); Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); @@ -236,28 +242,30 @@ // ; special-cases: // ; %ret0_1 = icmp eq i32 %divisor, 0 // ; %ret0_2 = icmp eq i32 %dividend, 0 - // ; %ret0_3 = or i1 %ret0_1, %ret0_2 + // ; %ret0_3 = select i1 %ret0_1, i1 1, i1 %ret0_2 // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true) // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) // ; %sr = sub nsw i32 %tmp0, %tmp1 // ; %ret0_4 = icmp ugt i32 %sr, 31 - // ; %ret0 = or i1 %ret0_3, %ret0_4 + // ; %ret0 = select i1 %ret0_3, i1 1, i1 %ret0_4 // ; %retDividend = icmp eq i32 %sr, 31 // ; %retVal = select i1 %ret0, i32 0, i32 %dividend - // ; %earlyRet = or i1 %ret0, %retDividend + // ; %earlyRet = select i1 %ret0, i1 1, i1 %retDividend // ; br i1 %earlyRet, label %end, label %bb1 Builder.SetInsertPoint(SpecialCases); + Divisor = Builder.CreateFreeze(Divisor); + Dividend = Builder.CreateFreeze(Dividend); Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); - Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); + Value *Ret0_3 = Builder.CreateLogicalOr(Ret0_1, Ret0_2); Value *Tmp0 = Builder.CreateCall(CTLZ, {Divisor, True}); Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True}); Value *SR = Builder.CreateSub(Tmp0, Tmp1); Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); - Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); + Value *Ret0 = Builder.CreateLogicalOr(Ret0_3, Ret0_4); Value *RetDividend = Builder.CreateICmpEQ(SR, MSB); Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); - Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); + Value *EarlyRet = Builder.CreateLogicalOr(Ret0, RetDividend); Builder.CreateCondBr(EarlyRet, End, BB1); // ; bb1: ; preds = %special-cases diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -168,12 +168,11 @@ ; GCN-IR-NEXT: s_sub_u32 s10, s14, s18 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s15, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1 -; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s16, s10, 1 @@ -524,7 +523,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -694,7 +693,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 31 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -761,7 +760,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 23 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -828,7 +827,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v3, s4 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 25 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -910,7 +909,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: s_ashr_i64 s[10:11], s[10:11], 40 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v2, s10 ; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[6:7], 40 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v3, s6 @@ -926,7 +925,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| ; GCN-IR-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; GCN-IR-NEXT: v_bfe_i32 v2, v2, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v3, 31, v2 @@ -986,10 +985,10 @@ ; GCN-IR-NEXT: s_sext_i32_i16 s3, s3 ; GCN-IR-NEXT: s_sext_i32_i16 s5, s5 ; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[2:3], 24 -; GCN-IR-NEXT: s_ashr_i32 s2, s3, 31 -; GCN-IR-NEXT: s_mov_b32 s3, s2 +; GCN-IR-NEXT: s_ashr_i32 s2, s7, 31 ; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[4:5], 24 -; GCN-IR-NEXT: s_ashr_i32 s4, s5, 31 +; GCN-IR-NEXT: s_mov_b32 s3, s2 +; GCN-IR-NEXT: s_ashr_i32 s4, s9, 31 ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[2:3], s[6:7] ; GCN-IR-NEXT: s_mov_b32 s5, s4 ; GCN-IR-NEXT: s_sub_u32 s12, s6, s2 @@ -1012,12 +1011,11 @@ ; GCN-IR-NEXT: s_sub_u32 s10, s14, s18 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s15, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[22:23], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[20:21], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[22:23], s[16:17], -1 -; GCN-IR-NEXT: s_and_b64 s[20:21], s[22:23], s[20:21] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_or_b64 s[20:21], s[16:17], s[22:23] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; GCN-IR-NEXT: s_mov_b32 s15, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s16, s10, 1 @@ -1212,20 +1210,19 @@ ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3] ; GCN-IR-NEXT: s_sub_u32 s2, s2, s4 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s4 -; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 -; GCN-IR-NEXT: s_add_i32 s6, s6, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 -; GCN-IR-NEXT: s_min_u32 s10, s6, s7 +; GCN-IR-NEXT: s_flbit_i32_b32 s8, s2 +; GCN-IR-NEXT: s_add_i32 s8, s8, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s9, s3 +; GCN-IR-NEXT: s_min_u32 s10, s8, s9 ; GCN-IR-NEXT: s_add_u32 s8, s10, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[14:15] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[14:15], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[16:17], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[14:15], s[16:17], s[14:15] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[14:15] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -1827,7 +1824,7 @@ ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GCN-IR-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -1884,7 +1881,7 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v2, s0 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s8 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 ; GCN-IR-NEXT: s_mov_b32 s5, s1 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v1, 31, v0 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -140,12 +140,11 @@ ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -202,8 +201,8 @@ ; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 ; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s10, -1 @@ -505,7 +504,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -576,7 +575,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -701,7 +700,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc ; GCN-IR-NEXT: s_mov_b32 s5, s1 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 @@ -839,7 +838,7 @@ ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s3, v0 @@ -995,17 +994,17 @@ ; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_ashr_i64 s[2:3], s[6:7], 31 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 31 -; GCN-IR-NEXT: s_ashr_i32 s0, s7, 31 -; GCN-IR-NEXT: s_ashr_i32 s6, s1, 31 +; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[0:1], 31 +; GCN-IR-NEXT: s_ashr_i32 s0, s3, 31 +; GCN-IR-NEXT: s_ashr_i32 s10, s7, 31 ; GCN-IR-NEXT: s_mov_b32 s1, s0 -; GCN-IR-NEXT: s_mov_b32 s7, s6 +; GCN-IR-NEXT: s_mov_b32 s11, s10 ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[0:1] -; GCN-IR-NEXT: s_xor_b64 s[8:9], s[8:9], s[6:7] +; GCN-IR-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11] ; GCN-IR-NEXT: s_sub_u32 s2, s2, s0 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s0 -; GCN-IR-NEXT: s_sub_u32 s8, s8, s6 -; GCN-IR-NEXT: s_subb_u32 s9, s9, s6 +; GCN-IR-NEXT: s_sub_u32 s8, s6, s10 +; GCN-IR-NEXT: s_subb_u32 s9, s7, s10 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 ; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 @@ -1021,12 +1020,11 @@ ; GCN-IR-NEXT: s_sub_u32 s10, s12, s16 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s13, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1 -; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s10, 1 @@ -1152,17 +1150,17 @@ ; GCN-IR-NEXT: s_sext_i32_i16 s3, s3 ; GCN-IR-NEXT: s_sext_i32_i16 s5, s5 ; GCN-IR-NEXT: s_ashr_i64 s[6:7], s[2:3], 24 -; GCN-IR-NEXT: s_ashr_i32 s2, s3, 31 -; GCN-IR-NEXT: s_ashr_i32 s10, s5, 31 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[4:5], 24 +; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[4:5], 24 +; GCN-IR-NEXT: s_ashr_i32 s2, s7, 31 +; GCN-IR-NEXT: s_ashr_i32 s8, s5, 31 ; GCN-IR-NEXT: s_mov_b32 s3, s2 -; GCN-IR-NEXT: s_mov_b32 s11, s10 -; GCN-IR-NEXT: s_xor_b64 s[4:5], s[6:7], s[2:3] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[8:9], s[10:11] -; GCN-IR-NEXT: s_sub_u32 s4, s4, s2 -; GCN-IR-NEXT: s_subb_u32 s5, s5, s2 -; GCN-IR-NEXT: s_sub_u32 s6, s6, s10 -; GCN-IR-NEXT: s_subb_u32 s7, s7, s10 +; GCN-IR-NEXT: s_mov_b32 s9, s8 +; GCN-IR-NEXT: s_xor_b64 s[6:7], s[6:7], s[2:3] +; GCN-IR-NEXT: s_xor_b64 s[10:11], s[4:5], s[8:9] +; GCN-IR-NEXT: s_sub_u32 s4, s6, s2 +; GCN-IR-NEXT: s_subb_u32 s5, s7, s2 +; GCN-IR-NEXT: s_sub_u32 s6, s10, s8 +; GCN-IR-NEXT: s_subb_u32 s7, s11, s8 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[6:7], 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[4:5], 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 @@ -1178,12 +1176,11 @@ ; GCN-IR-NEXT: s_sub_u32 s10, s12, s16 ; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_mov_b32 s13, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[18:19], s[10:11], 63 -; GCN-IR-NEXT: s_xor_b64 s[20:21], s[14:15], -1 -; GCN-IR-NEXT: s_and_b64 s[18:19], s[20:21], s[18:19] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_or_b64 s[18:19], s[14:15], s[20:21] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[18:19] +; GCN-IR-NEXT: s_mov_b32 s13, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB9_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s10, 1 @@ -1380,20 +1377,19 @@ ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7] ; GCN-IR-NEXT: s_sub_u32 s4, s2, s6 ; GCN-IR-NEXT: s_subb_u32 s5, s3, s6 -; GCN-IR-NEXT: s_flbit_i32_b32 s2, s4 -; GCN-IR-NEXT: s_add_i32 s2, s2, 32 -; GCN-IR-NEXT: s_flbit_i32_b32 s3, s5 -; GCN-IR-NEXT: s_min_u32 s8, s2, s3 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s4 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s5 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[2:3], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[2:3] ; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1997,7 +1993,7 @@ ; GCN-IR-NEXT: v_cvt_i32_f32_e32 v1, v1 ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, |v0| ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s4 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, 24, v0 ; GCN-IR-NEXT: v_bfe_i32 v0, v0, 0, 24 @@ -2059,7 +2055,7 @@ ; GCN-IR-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, s4 ; GCN-IR-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GCN-IR-NEXT: s_movk_i32 s3, 0x5b7f -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v2, v0 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, s3 ; GCN-IR-NEXT: s_mov_b32 s4, s0 ; GCN-IR-NEXT: s_mov_b32 s5, s1 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -141,12 +141,11 @@ ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -816,12 +815,11 @@ ; GCN-IR-NEXT: s_sub_u32 s6, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[6:7], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[6:7], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s6, 1 @@ -995,20 +993,19 @@ ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1444,20 +1441,19 @@ ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s10, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s10, s6, s7 ; GCN-IR-NEXT: s_sub_u32 s6, 59, s10 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[8:9], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[8:9], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[8:9], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s8, s6, 1 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -140,12 +140,11 @@ ; GCN-IR-NEXT: s_sub_u32 s8, s10, s14 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_mov_b32 s11, 0 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[16:17], s[8:9], 63 -; GCN-IR-NEXT: s_xor_b64 s[18:19], s[12:13], -1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[12:13], s[18:19] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] +; GCN-IR-NEXT: s_mov_b32 s11, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s8, 1 @@ -202,8 +201,8 @@ ; GCN-IR-NEXT: v_mul_lo_u32 v3, s5, v0 ; GCN-IR-NEXT: v_mul_lo_u32 v0, s4, v0 ; GCN-IR-NEXT: s_mov_b32 s11, 0xf000 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v3, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s10, -1 @@ -817,20 +816,19 @@ ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_add_u32 s6, s8, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s7, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1005,20 +1003,19 @@ ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b32 s4, s2 -; GCN-IR-NEXT: s_flbit_i32_b32 s5, s3 -; GCN-IR-NEXT: s_add_i32 s4, s4, 32 -; GCN-IR-NEXT: s_min_u32 s8, s4, s5 +; GCN-IR-NEXT: s_flbit_i32_b32 s6, s2 +; GCN-IR-NEXT: s_flbit_i32_b32 s7, s3 +; GCN-IR-NEXT: s_add_i32 s6, s6, 32 +; GCN-IR-NEXT: s_min_u32 s8, s6, s7 ; GCN-IR-NEXT: s_sub_u32 s6, 59, s8 ; GCN-IR-NEXT: s_subb_u32 s7, 0, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[2:3], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[6:7], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[6:7], 63 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[10:11], s[12:13] +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[12:13], s[6:7], 63 -; GCN-IR-NEXT: s_xor_b64 s[14:15], s[10:11], -1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[14:15], s[12:13] -; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s6, 1 @@ -1071,7 +1068,7 @@ ; GCN-IR-NEXT: v_mul_lo_u32 v0, v0, 24 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 -; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, v2, v1 ; GCN-IR-NEXT: v_mov_b32_e32 v2, s3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 ; GCN-IR-NEXT: s_mov_b32 s4, s0