diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -597,53 +597,6 @@ (vt rc:$addr) >; -// Bitfield extract patterns - -def IMMZeroBasedBitfieldMask : ImmLeaf ; - -def IMMPopCount : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), - MVT::i32); -}]>; - -multiclass BFEPattern { - def : AMDGPUPat < - (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), - (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) - >; - - // x & ((1 << y) - 1) - def : AMDGPUPat < - (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x & ~(-1 << y) - def : AMDGPUPat < - (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x & (-1 >> (bitwidth - y)) - def : AMDGPUPat < - (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), - (UBFE $src, (MOV (i32 0)), $width) - >; - - // x << (bitwidth - y) >> (bitwidth - y) - def : AMDGPUPat < - (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), - (UBFE $src, (MOV (i32 0)), $width) - >; - - def : AMDGPUPat < - (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), - (SBFE $src, (MOV (i32 0)), $width) - >; -} - // fshr pattern class FSHRPattern : AMDGPUPat < (fshr i32:$src0, i32:$src1, i32:$src2), diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -30,6 +30,15 @@ let SubtargetPredicate = isEGorCayman; } +def IMMZeroBasedBitfieldMask : ImmLeaf ; + +def IMMPopCount : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), + MVT::i32); +}]>; + //===----------------------------------------------------------------------===// // Evergreen / Cayman store instructions //===----------------------------------------------------------------------===// @@ -394,7 +403,41 @@ VecALU >; -defm : BFEPattern ; +// Bitfield extract patterns + +def : AMDGPUPat < + (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask), + (BFE_UINT_eg $src, $rshift, (MOV_IMM_I32 (i32 (IMMPopCount $mask)))) +>; + +// x & ((1 << y) - 1) +def : AMDGPUPat < + (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x & ~(-1 << y) +def : AMDGPUPat < + (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x & (-1 >> (bitwidth - y)) +def : AMDGPUPat < + (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +// x << (bitwidth - y) >> (bitwidth - y) +def : AMDGPUPat < + (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; + +def : AMDGPUPat < + (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (BFE_INT_eg $src, (MOV_IMM_I32 (i32 0)), $width) +>; def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))], diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2315,7 +2315,50 @@ defm : BFMPatterns ; // FIXME: defm : BFMPatterns ; -defm : BFEPattern ; +// Bitfield extract patterns + +def IMMZeroBasedBitfieldMask : ImmLeaf ; + +def IMMPopCount : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), + MVT::i32); +}]>; + +def : AMDGPUPat < + (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask), + (V_BFE_U32 $src, $rshift, (i32 (IMMPopCount $mask))) +>; + +// x & ((1 << y) - 1) +def : AMDGPUPat < + (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x & ~(-1 << y) +def : AMDGPUPat < + (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x & (-1 >> (bitwidth - y)) +def : AMDGPUPat < + (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), + (V_BFE_U32 $src, (i32 0), $width) +>; + +// x << (bitwidth - y) >> (bitwidth - y) +def : AMDGPUPat < + (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (V_BFE_U32 $src, (i32 0), $width) +>; + +def : AMDGPUPat < + (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), + (V_BFE_I32 $src, (i32 0), $width) +>; // SHA-256 Ma patterns