diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1903,6 +1903,18 @@ }] >; +// Create two BFI instructions at once, if possible. +// This handles nested bitfieldInserts: +// ((a ^ y) & C0) ^ ((y & C1) | z & ~C1))) <=> +// ((a & C0) | (~C0 | ((y & C1) | z & ~C1))) +def : AMDGPUPat < + (xor (and (xor i32:$a, i32:$y), (i32 imm:$imm0)), + (BFIImm32 i32:$x, i32:$y, i32:$z)), + + (V_BFI_B32_e64 VSrc_b32:$imm0, VSrc_b32:$a, + (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)) +>; + // Definition from ISA doc: // (y & x) | (z & ~x) def : AMDGPUPat < diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll --- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll +++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll @@ -1912,31 +1912,27 @@ ; GFX7-LABEL: v_bfi_seq_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s4, 0xffc00 -; GFX7-NEXT: v_bfi_b32 v0, s4, v0, v1 -; GFX7-NEXT: v_xor_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_and_b32_e32 v1, 0x3ff00000, v1 -; GFX7-NEXT: v_xor_b32_e32 v0, v1, v0 +; GFX7-NEXT: s_mov_b32 s4, 0xfff003ff +; GFX7-NEXT: v_bfi_b32 v0, s4, v1, v0 +; GFX7-NEXT: s_mov_b32 s4, 0x3ff00000 +; GFX7-NEXT: v_bfi_b32 v0, s4, v2, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_bfi_seq_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0xffc00 -; GFX8-NEXT: v_bfi_b32 v0, s4, v0, v1 -; GFX8-NEXT: v_xor_b32_e32 v1, v1, v2 -; GFX8-NEXT: v_and_b32_e32 v1, 0x3ff00000, v1 -; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_mov_b32 s4, 0xfff003ff +; GFX8-NEXT: v_bfi_b32 v0, s4, v1, v0 +; GFX8-NEXT: s_mov_b32 s4, 0x3ff00000 +; GFX8-NEXT: v_bfi_b32 v0, s4, v2, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_bfi_seq_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_xor_b32_e32 v2, v1, v2 -; GFX10-NEXT: v_bfi_b32 v0, 0xffc00, v0, v1 -; GFX10-NEXT: v_and_b32_e32 v1, 0x3ff00000, v2 -; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 +; GFX10-NEXT: v_bfi_b32 v0, 0xfff003ff, v1, v0 +; GFX10-NEXT: v_bfi_b32 v0, 0x3ff00000, v2, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-GISEL-LABEL: v_bfi_seq_i32: @@ -1965,3 +1961,4 @@ %6 = xor i32 %5, %3 ret i32 %6 } +