Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -126,6 +126,7 @@ def xor_oneuse : HasOneUseBinOp; } // Properties = [SDNPCommutative, SDNPAssociative] +def add_oneuse : HasOneUseBinOp; def sub_oneuse : HasOneUseBinOp; def srl_oneuse : HasOneUseBinOp; @@ -682,6 +683,12 @@ (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) >; + // x & ((1 << y) - 1) + def : AMDGPUPat < + (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)), + (UBFE $src, (i32 0), $width) + >; + // x & (-1 >> (bitwidth - y)) def : AMDGPUPat < (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))), Index: llvm/trunk/test/CodeGen/AMDGPU/extract-lowbits.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/extract-lowbits.ll +++ llvm/trunk/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -17,19 +17,11 @@ ; ---------------------------------------------------------------------------- ; define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { -; SI-LABEL: bzhi32_a0: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_bfm_b32_e64 v1, v1, 0 -; SI-NEXT: v_and_b32_e32 v0, v1, v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: bzhi32_a0: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_bfm_b32 v1, v1, 0 -; VI-NEXT: v_and_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: bzhi32_a0: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %mask, %val @@ -37,19 +29,11 @@ } define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { -; SI-LABEL: bzhi32_a1_indexzext: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_bfm_b32_e64 v1, v1, 0 -; SI-NEXT: v_and_b32_e32 v0, v1, v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: bzhi32_a1_indexzext: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_bfm_b32 v1, v1, 0 -; VI-NEXT: v_and_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: bzhi32_a1_indexzext: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] %conv = zext i8 %numlowbits to i32 %onebit = shl i32 1, %conv %mask = add nsw i32 %onebit, -1 @@ -58,19 +42,11 @@ } define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { -; SI-LABEL: bzhi32_a4_commutative: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_bfm_b32_e64 v1, v1, 0 -; SI-NEXT: v_and_b32_e32 v0, v0, v1 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: bzhi32_a4_commutative: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_bfm_b32 v1, v1, 0 -; VI-NEXT: v_and_b32_e32 v0, v0, v1 -; VI-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: bzhi32_a4_commutative: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %val, %mask ; swapped order