diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4214,8 +4214,9 @@ const Register Dst = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Dst); + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal( - TargetOpcode::G_UBFX, Ty, Ty)) + TargetOpcode::G_UBFX, Ty, ExtractTy)) return false; // Try to match shr (and x, c1), c2 @@ -4249,8 +4250,8 @@ return false; MatchInfo = [=](MachineIRBuilder &B) { - auto WidthCst = B.buildConstant(Ty, Width); - auto PosCst = B.buildConstant(Ty, Pos); + auto WidthCst = B.buildConstant(ExtractTy, Width); + auto PosCst = B.buildConstant(ExtractTy, Pos); B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst}); }; return true; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir @@ -145,11 +145,10 @@ ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 261888 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C1]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[COPY]], [[C1]](s32), [[C]] + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[UBFX]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_CONSTANT i64 261888 ; 1023 << 8 %2:_(s64) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll @@ -121,22 +121,24 @@ } ; Test vector bitfield extract for 64-bits. +; TODO: No need for a 64-bit shift instruction when the extracted value is +; entirely contained within the upper or lower half. define i64 @v_mask_srl_i64(i64 %value) { ; GFX89-LABEL: v_mask_srl_i64: ; GFX89: ; %bb.0: ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX89-NEXT: v_and_b32_e32 v0, 0xfe000000, v0 -; GFX89-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] +; GFX89-NEXT: v_mov_b32_e32 v1, 0 +; GFX89-NEXT: v_bfe_u32 v0, v0, 0, 10 ; GFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_mask_srl_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_and_b32_e32 v0, 0xfe000000, v0 -; GFX10-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10 ; GFX10-NEXT: s_setpc_b64 s[30:31] %1 = and i64 %value, 34326183936 ; 1023 << 25 %2 = lshr i64 %1, 25 @@ -147,10 +149,7 @@ define amdgpu_ps i64 @s_mask_srl_i64(i64 inreg %value) { ; GCN-LABEL: s_mask_srl_i64: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s2, 0xfe000000 -; GCN-NEXT: s_mov_b32 s3, 7 -; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] -; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], 25 +; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0xa0019 ; GCN-NEXT: ; return to shader part epilog %1 = and i64 %value, 34326183936 ; 1023 << 25 %2 = lshr i64 %1, 25