diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4602,9 +4602,16 @@ case Intrinsic::amdgcn_mbcnt_hi: { const GCNSubtarget &ST = DAG.getMachineFunction().getSubtarget(); - // These return at most the wavefront size - 1. + // These return at most the (wavefront size - 1) + src1 + // As long as src1 is an immediate we can calc known bits + KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); + unsigned Src1ValBits = Src1Known.countMaxActiveBits(); + unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2()); + // Cater for potential carry + MaxActiveBits += Src1ValBits ? 1 : 0; unsigned Size = Op.getValueType().getSizeInBits(); - Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); + if (MaxActiveBits < Size) + Known.Zero.setHighBits(Size - MaxActiveBits); break; } case Intrinsic::amdgcn_workitem_id_x: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll @@ -14,24 +14,79 @@ ret void } -; GCN-LABEL: {{^}}mbcnt_lo_known_bits: +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_1: ; GCN: v_mbcnt_lo_u32_b32 -; GCN-NOT: and -define i32 @mbcnt_lo_known_bits(i32 %x, i32 %y) #0 { +; GCN: v_and_b32_e32 +define i32 @mbcnt_lo_known_bits_1(i32 %x, i32 %y) #0 { %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 %y) %mask = and i32 %lo, 63 ret i32 %mask } -; GCN-LABEL: {{^}}mbcnt_hi_known_bits: -; GCN: v_mbcnt_hi_u32_b32 +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_2: +; GCN: v_mbcnt_lo_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_lo_known_bits_2(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 0) + %mask = and i32 %lo, 63 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_3: +; GCN: v_mbcnt_lo_u32_b32 ; GCN-NOT: and -define i32 @mbcnt_hi_known_bits(i32 %x, i32 %y) #0 { +define i32 @mbcnt_lo_known_bits_3(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15) + %mask = and i32 %lo, 127 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_lo_known_bits_4: +; GCN: v_mbcnt_lo_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_lo_known_bits_4(i32 %x) #0 { + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15) + %mask = and i32 %lo, 63 + ret i32 %mask +} + + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_1: +; GCN: v_mbcnt_hi_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_hi_known_bits_1(i32 %x, i32 %y) #0 { %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 %y) %mask = and i32 %hi, 63 ret i32 %mask } +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_2: +; GCN: v_mbcnt_hi_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_hi_known_bits_2(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 0) + %mask = and i32 %hi, 63 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_3: +; GCN: v_mbcnt_hi_u32_b32 +; GCN-NOT: and +define i32 @mbcnt_hi_known_bits_3(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15) + %mask = and i32 %hi, 127 + ret i32 %mask +} + +; GCN-LABEL: {{^}}mbcnt_hi_known_bits_4: +; GCN: v_mbcnt_hi_u32_b32 +; GCN: v_and_b32_e32 +define i32 @mbcnt_hi_known_bits_4(i32 %x) #0 { + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15) + %mask = and i32 %hi, 63 + ret i32 %mask +} + declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1