Index: llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -129,7 +129,7 @@ return; } - unsigned BitWidth = DstTy.getSizeInBits(); + unsigned BitWidth = DstTy.getScalarSizeInBits(); auto CacheEntry = ComputeKnownBitsCache.find(R); if (CacheEntry != ComputeKnownBitsCache.end()) { Known = CacheEntry->second; @@ -140,9 +140,6 @@ } Known = KnownBits(BitWidth); // Don't know anything - if (DstTy.isVector()) - return; // TODO: Handle vectors. - // Depth may get bigger than max depth if it gets passed to a different // GISelKnownBits object. // This may happen when say a generic part uses a GISelKnownBits object @@ -164,6 +161,25 @@ TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, Depth); break; + case TargetOpcode::G_BUILD_VECTOR: { + // Collect the known bits that are shared by every demanded vector element. + Known.Zero.setAllBits(); Known.One.setAllBits(); + for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) { + if (!DemandedElts[i]) + continue; + + computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Known bits are the values that are shared by every demanded element. + Known = KnownBits::commonBits(Known, Known2); + + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + break; + } case TargetOpcode::COPY: case TargetOpcode::G_PHI: case TargetOpcode::PHI: { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir @@ -156,3 +156,74 @@ %rem:_(<2 x s16>) = G_UREM %var, %four_vec $vgpr0 = COPY %rem ... + +--- +name: v_urem_v2i32_pow2k_denom +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: v_urem_v2i32_pow2k_denom + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GCN: %pow2:_(s32) = G_CONSTANT i32 4096 + ; GCN: %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2(s32), %pow2(s32) + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; GCN: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD %pow2_vec, [[BUILD_VECTOR]] + ; GCN: %rem:_(<2 x s32>) = G_AND %var, [[ADD]] + ; GCN: $vgpr0_vgpr1 = COPY %rem(<2 x s32>) + %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %pow2:_(s32) = G_CONSTANT i32 4096 + %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2(s32), %pow2(s32) + %rem:_(<2 x s32>) = G_UREM %var, %pow2_vec + $vgpr0_vgpr1 = COPY %rem +... + +--- +name: v_urem_v2i32_pow2k_not_splat_denom +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: v_urem_v2i32_pow2k_not_splat_denom + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GCN: %pow2_1:_(s32) = G_CONSTANT i32 4096 + ; GCN: %pow2_2:_(s32) = G_CONSTANT i32 2048 + ; GCN: %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2_1(s32), %pow2_2(s32) + ; GCN: %rem:_(<2 x s32>) = G_UREM %var, %pow2_vec + ; GCN: $vgpr0_vgpr1 = COPY %rem(<2 x s32>) + %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %pow2_1:_(s32) = G_CONSTANT i32 4096 + %pow2_2:_(s32) = G_CONSTANT i32 2048 + %pow2_vec:_(<2 x s32>) = G_BUILD_VECTOR %pow2_1(s32), %pow2_2(s32) + %rem:_(<2 x s32>) = G_UREM %var, %pow2_vec + $vgpr0_vgpr1 = COPY %rem +... + +--- +name: v_urem_v2i64_pow2k_denom +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; GCN-LABEL: name: v_urem_v2i64_pow2k_denom + ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN: %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN: %pow2:_(s64) = G_CONSTANT i64 4096 + ; GCN: %pow2_vec:_(<2 x s64>) = G_BUILD_VECTOR %pow2(s64), %pow2(s64) + ; GCN: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; GCN: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD %pow2_vec, [[BUILD_VECTOR]] + ; GCN: %rem:_(<2 x s64>) = G_AND %var, [[ADD]] + ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %rem(<2 x s64>) + %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %pow2:_(s64) = G_CONSTANT i64 4096 + %pow2_vec:_(<2 x s64>) = G_BUILD_VECTOR %pow2(s64), %pow2(s64) + %rem:_(<2 x s64>) = G_UREM %var, %pow2_vec + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %rem +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -215,45 +215,13 @@ } define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) { -; GISEL-LABEL: v_urem_v2i32_pow2k_denom: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_movk_i32 s4, 0x1000 -; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4 -; GISEL-NEXT: s_sub_i32 s5, 0, s4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 -; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2 -; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2 -; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 12, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 12, v2 -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 -; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GISEL-NEXT: s_setpc_b64 s[30:31] -; -; CGP-LABEL: v_urem_v2i32_pow2k_denom: -; CGP: ; %bb.0: -; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_add_i32 s4, 0x1000, -1 -; CGP-NEXT: v_and_b32_e32 v0, s4, v0 -; CGP-NEXT: v_and_b32_e32 v1, s4, v1 -; CGP-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: v_urem_v2i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_add_i32 s4, 0x1000, -1 +; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 +; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i32> %num, ret <2 x i32> %result } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -962,286 +962,25 @@ } define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) { -; GISEL-LABEL: v_urem_v2i64_pow2k_denom: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_movk_i32 s10, 0x1000 -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10 -; GISEL-NEXT: s_sub_u32 s8, 0, s10 -; GISEL-NEXT: s_cselect_b32 s4, 1, 0 -; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 -; GISEL-NEXT: v_mov_b32_e32 v6, v4 -; GISEL-NEXT: s_and_b32 s4, s4, 1 -; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 -; GISEL-NEXT: s_cmp_lg_u32 s4, 0 -; GISEL-NEXT: s_subb_u32 s9, 0, 0 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 -; GISEL-NEXT: s_sub_u32 s11, 0, s10 -; GISEL-NEXT: s_cselect_b32 s4, 1, 0 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v6, v6 -; GISEL-NEXT: s_and_b32 s4, s4, 1 -; GISEL-NEXT: v_trunc_f32_e32 v7, v7 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: s_cmp_lg_u32 s4, 0 -; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: v_mul_lo_u32 v8, s11, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 -; GISEL-NEXT: v_mul_lo_u32 v10, s11, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4 -; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 -; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 -; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 -; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 -; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 -; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 -; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 -; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 -; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 -; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 -; GISEL-NEXT: v_mul_lo_u32 v8, s11, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 -; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 -; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5 -; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 -; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 -; GISEL-NEXT: v_mul_lo_u32 v16, s11, v10 -; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8 -; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 -; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9 -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 -; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 -; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 -; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 -; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11 -; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11 -; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12 -; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 -; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 -; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc -; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 -; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 -; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 -; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 -; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4 -; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, s10, v4 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; GISEL-NEXT: v_mul_lo_u32 v11, s10, v5 -; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, s10, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v6, s10, v6 -; GISEL-NEXT: v_mul_lo_u32 v7, s10, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 -; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 -; GISEL-NEXT: v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s10, v2 -; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc -; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s10, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s10, v8 -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc -; GISEL-NEXT: v_subrev_i32_e32 v16, vcc, s10, v9 -; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v11, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc -; GISEL-NEXT: s_setpc_b64 s[30:31] -; -; CGP-LABEL: v_urem_v2i64_pow2k_denom: -; CGP: ; %bb.0: -; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s4, 0x1000 -; CGP-NEXT: s_add_u32 s5, s4, -1 -; CGP-NEXT: s_cselect_b32 s6, 1, 0 -; CGP-NEXT: s_and_b32 s6, s6, 1 -; CGP-NEXT: s_cmp_lg_u32 s6, 0 -; CGP-NEXT: s_addc_u32 s6, 0, -1 -; CGP-NEXT: s_add_u32 s4, s4, -1 -; CGP-NEXT: s_cselect_b32 s7, 1, 0 -; CGP-NEXT: v_and_b32_e32 v0, s5, v0 -; CGP-NEXT: s_and_b32 s5, s7, 1 -; CGP-NEXT: v_and_b32_e32 v1, s6, v1 -; CGP-NEXT: s_cmp_lg_u32 s5, 0 -; CGP-NEXT: s_addc_u32 s5, 0, -1 -; CGP-NEXT: v_and_b32_e32 v2, s4, v2 -; CGP-NEXT: v_and_b32_e32 v3, s5, v3 -; CGP-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: v_urem_v2i64_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: s_add_u32 s5, s4, -1 +; CHECK-NEXT: s_cselect_b32 s6, 1, 0 +; CHECK-NEXT: s_and_b32 s6, s6, 1 +; CHECK-NEXT: s_cmp_lg_u32 s6, 0 +; CHECK-NEXT: s_addc_u32 s6, 0, -1 +; CHECK-NEXT: s_add_u32 s4, s4, -1 +; CHECK-NEXT: s_cselect_b32 s7, 1, 0 +; CHECK-NEXT: v_and_b32_e32 v0, s5, v0 +; CHECK-NEXT: s_and_b32 s5, s7, 1 +; CHECK-NEXT: v_and_b32_e32 v1, s6, v1 +; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_addc_u32 s5, 0, -1 +; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 +; CHECK-NEXT: v_and_b32_e32 v3, s5, v3 +; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i64> %num, ret <2 x i64> %result }