diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2419,6 +2419,11 @@ } +def : GCNPat< + (i64 (DivergentUnaryFrag i64:$a)), + (REG_SEQUENCE VReg_64, + (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub1))), sub0, + (V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0))), sub1)>; // Prefer selecting to max when legal, but using mul is always valid. let AddedComplexity = -5 in { diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -235,10 +235,10 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def S_BREV_B32 : SOP1_32 <"s_brev_b32", - [(set i32:$sdst, (bitreverse i32:$src0))] + [(set i32:$sdst, (UniformUnaryFrag i32:$src0))] >; def S_BREV_B64 : SOP1_64 <"s_brev_b64", - [(set i64:$sdst, (bitreverse i64:$src0))] + [(set i64:$sdst, (UniformUnaryFrag i64:$src0))] >; } // End isReMaterializable = 1, isAsCheapAsAMove = 1 diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -282,7 +282,7 @@ } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; -defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; +defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag>; defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>;