Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -588,6 +588,17 @@ break; } + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_BUILD_VECTOR_TRUNC: { + // TODO: Probably should have a recursion depth guard since you could have + // bitcasted vector elements. + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB)) + return false; + } + + return true; + } default: break; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir @@ -145,7 +145,10 @@ ; GCN: %var:_(<2 x s16>) = COPY $vgpr0 ; GCN: %four:_(s32) = G_CONSTANT i32 4 ; GCN: %four_vec:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %four(s32), %four(s32) - ; GCN: %rem:_(<2 x s16>) = G_UREM %var, %four_vec + ; GCN: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GCN: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD %four_vec, [[BUILD_VECTOR]] + ; GCN: %rem:_(<2 x s16>) = G_AND %var, [[ADD]] ; GCN: $vgpr0 = COPY %rem(<2 x s16>) %var:_(<2 x s16>) = COPY $vgpr0 %shift_amt:_(<2 x s16>) = COPY $vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -215,45 +215,13 @@ } define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) { -; GISEL-LABEL: v_urem_v2i32_pow2k_denom: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_movk_i32 s4, 0x1000 -; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4 -; GISEL-NEXT: s_sub_i32 s5, 0, s4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2 -; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2 -; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 12, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 12, v2 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 -; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GISEL-NEXT: s_setpc_b64 s[30:31] -; -; CGP-LABEL: v_urem_v2i32_pow2k_denom: -; CGP: ; %bb.0: -; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_add_i32 s4, 0x1000, -1 -; CGP-NEXT: v_and_b32_e32 v0, s4, v0 -; CGP-NEXT: v_and_b32_e32 v1, s4, v1 -; CGP-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: v_urem_v2i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_add_i32 s4, 0x1000, -1 +; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 +; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i32> %num, ret <2 x i32> %result } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -962,286 +962,25 @@ } define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) { -; GISEL-LABEL: v_urem_v2i64_pow2k_denom: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_movk_i32 s10, 0x1000 -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10 -; GISEL-NEXT: s_sub_u32 s8, 0, s10 -; GISEL-NEXT: s_cselect_b32 s4, 1, 0 -; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 -; GISEL-NEXT: v_mov_b32_e32 v6, v4 -; GISEL-NEXT: s_and_b32 s4, s4, 1 -; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 -; GISEL-NEXT: s_cmp_lg_u32 s4, 0 -; GISEL-NEXT: s_subb_u32 s9, 0, 0 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 -; GISEL-NEXT: s_sub_u32 s11, 0, s10 -; GISEL-NEXT: s_cselect_b32 s4, 1, 0 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: s_and_b32 s4, s4, 1 -; GISEL-NEXT: v_trunc_f32_e32 v7, v7 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: s_cmp_lg_u32 s4, 0 -; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: v_mul_lo_u32 v8, s11, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 -; GISEL-NEXT: v_mul_lo_u32 v10, s11, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4 -; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 -; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 -; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 -; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 -; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 -; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 -; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 -; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 -; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 -; GISEL-NEXT: v_mul_lo_u32 v8, s11, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 -; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5 -; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 -; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 -; GISEL-NEXT: v_mul_lo_u32 v16, s11, v10 -; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 -; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9 -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 -; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 -; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 -; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11 -; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11 -; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12 -; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc -; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 -; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 -; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 -; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 -; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4 -; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, s10, v4 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_mul_lo_u32 v11, s10, v5 -; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, s10, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v6, s10, v6 -; GISEL-NEXT: v_mul_lo_u32 v7, s10, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 -; GISEL-NEXT: v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s10, v2 -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc -; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s10, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s10, v8 -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc -; GISEL-NEXT: v_subrev_i32_e32 v16, vcc, s10, v9 -; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v11, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GISEL-NEXT: s_setpc_b64 s[30:31] -; -; CGP-LABEL: v_urem_v2i64_pow2k_denom: -; CGP: ; %bb.0: -; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s4, 0x1000 -; CGP-NEXT: s_add_u32 s5, s4, -1 -; CGP-NEXT: s_cselect_b32 s6, 1, 0 -; CGP-NEXT: s_and_b32 s6, s6, 1 -; CGP-NEXT: s_cmp_lg_u32 s6, 0 -; CGP-NEXT: s_addc_u32 s6, 0, -1 -; CGP-NEXT: s_add_u32 s4, s4, -1 -; CGP-NEXT: s_cselect_b32 s7, 1, 0 -; CGP-NEXT: v_and_b32_e32 v0, s5, v0 -; CGP-NEXT: s_and_b32 s5, s7, 1 -; CGP-NEXT: v_and_b32_e32 v1, s6, v1 -; CGP-NEXT: s_cmp_lg_u32 s5, 0 -; CGP-NEXT: s_addc_u32 s5, 0, -1 -; CGP-NEXT: v_and_b32_e32 v2, s4, v2 -; CGP-NEXT: v_and_b32_e32 v3, s5, v3 -; CGP-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: v_urem_v2i64_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: s_add_u32 s5, s4, -1 +; CHECK-NEXT: s_cselect_b32 s6, 1, 0 +; CHECK-NEXT: s_and_b32 s6, s6, 1 +; CHECK-NEXT: s_cmp_lg_u32 s6, 0 +; CHECK-NEXT: s_addc_u32 s6, 0, -1 +; CHECK-NEXT: s_add_u32 s4, s4, -1 +; CHECK-NEXT: s_cselect_b32 s7, 1, 0 +; CHECK-NEXT: v_and_b32_e32 v0, s5, v0 +; CHECK-NEXT: s_and_b32 s5, s7, 1 +; CHECK-NEXT: v_and_b32_e32 v1, s6, v1 +; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_addc_u32 s5, 0, -1 +; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 +; CHECK-NEXT: v_and_b32_e32 v3, s5, v3 +; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i64> %num, ret <2 x i64> %result }