diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -16,7 +16,6 @@ #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUMachineFunction.h" -#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -4905,22 +4904,6 @@ case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast(Op.getOperand(0))->getZExtValue(); switch (IID) { - case Intrinsic::amdgcn_mbcnt_lo: - case Intrinsic::amdgcn_mbcnt_hi: { - const GCNSubtarget &ST = - DAG.getMachineFunction().getSubtarget(); - // These return at most the (wavefront size - 1) + src1 - // As long as src1 is an immediate we can calc known bits - KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); - unsigned Src1ValBits = Src1Known.countMaxActiveBits(); - unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2()); - // Cater for potential carry - MaxActiveBits += Src1ValBits ? 1 : 0; - unsigned Size = Op.getValueType().getSizeInBits(); - if (MaxActiveBits < Size) - Known.Zero.setHighBits(Size - MaxActiveBits); - break; - } case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::amdgcn_workitem_id_z: { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -452,6 +452,10 @@ void finalizeLowering(MachineFunction &MF) const override; + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; void computeKnownBitsForFrameIndex(int FrameIdx, KnownBits &Known, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12706,6 +12706,41 @@ TargetLoweringBase::finalizeLowering(MF); } +void SITargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + Known.resetAll(); + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IID = cast(Op.getOperand(0))->getZExtValue(); + switch (IID) { + case Intrinsic::amdgcn_mbcnt_lo: + case Intrinsic::amdgcn_mbcnt_hi: { + const GCNSubtarget &ST = + DAG.getMachineFunction().getSubtarget(); + // These return at most the (wavefront size - 1) + src1 + // As long as src1 is an immediate we can calc known bits + KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); + unsigned Src1ValBits = Src1Known.countMaxActiveBits(); + unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2()); + // Cater for potential carry + MaxActiveBits += Src1ValBits ? 1 : 0; + unsigned Size = Op.getValueType().getSizeInBits(); + if (MaxActiveBits < Size) + Known.Zero.setHighBits(Size - MaxActiveBits); + return; + } + } + break; + } + } + return AMDGPUTargetLowering::computeKnownBitsForTargetNode( + Op, Known, DemandedElts, DAG, Depth); +} + void SITargetLowering::computeKnownBitsForFrameIndex( const int FI, KnownBits &Known, const MachineFunction &MF) const { TargetLowering::computeKnownBitsForFrameIndex(FI, Known, MF);