Index: llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -902,6 +902,12 @@ break; } + case Intrinsic::amdgcn_mbcnt_hi: { + // exec_hi is all 0, so this is just a copy. + if (ST->isWave32()) + return IC.replaceInstUsesWith(II, II.getArgOperand(1)); + break; + } case Intrinsic::amdgcn_ballot: { if (auto *Src = dyn_cast(II.getArgOperand(0))) { if (Src->isZero()) { Index: llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll =================================================================== --- llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll +++ llvm/test/Transforms/InstCombine/AMDGPU/mbcnt.ll @@ -36,8 +36,7 @@ ; ; WAVE32-LABEL: define i32 @mbcnt_hi ; WAVE32-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR1]] { -; WAVE32-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[X]], i32 [[Y]]) -; WAVE32-NEXT: ret i32 [[HI]] +; WAVE32-NEXT: ret i32 [[Y]] ; ; WAVE64-LABEL: define i32 @mbcnt_hi ; WAVE64-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR1]] { @@ -58,8 +57,7 @@ ; WAVE32-LABEL: define i32 @mbcnt_lo_hi ; WAVE32-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) #[[ATTR1]] { ; WAVE32-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[X]], i32 [[Y]]) -; WAVE32-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[Z]], i32 [[LO]]) -; WAVE32-NEXT: ret i32 [[HI]] +; WAVE32-NEXT: ret i32 [[LO]] ; ; WAVE64-LABEL: define i32 @mbcnt_lo_hi ; WAVE64-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) #[[ATTR1]] { @@ -81,8 +79,7 @@ ; WAVE32-LABEL: define i32 @ockl_lane_u32 ; WAVE32-SAME: () #[[ATTR1]] { ; WAVE32-NEXT: [[LO:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) -; WAVE32-NEXT: [[HI:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[LO]]) -; WAVE32-NEXT: ret i32 [[HI]] +; WAVE32-NEXT: ret i32 [[LO]] ; ; WAVE64-LABEL: define i32 @ockl_lane_u32 ; WAVE64-SAME: () #[[ATTR1]] {