diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4127,8 +4127,9 @@ assert(MI.getOpcode() == TargetOpcode::G_AND); Register Dst = MI.getOperand(0).getReg(); LLT Ty = MRI.getType(Dst); + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal( - TargetOpcode::G_UBFX, Ty, Ty)) + TargetOpcode::G_UBFX, Ty, ExtractTy)) return false; int64_t AndImm, LSBImm; @@ -4148,7 +4149,6 @@ if (static_cast(LSBImm) >= Size) return false; - LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); MatchInfo = [=](MachineIRBuilder &B) { auto WidthCst = B.buildConstant(ExtractTy, Width); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4906,5 +4906,6 @@ bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal( unsigned Opc, LLT Ty1, LLT Ty2) const { - return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)); + return (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)) && + Ty2 == LLT::scalar(32); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir @@ -105,3 +105,56 @@ $vgpr0 = COPY %4(s32) ... + +--- +name: bfe_lshr_and_s32 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; GCN-LABEL: name: bfe_lshr_and_s32 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]] + ; GCN-NEXT: $vgpr0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 7936 ; 31 << 8 + %2:_(s32) = G_AND %0, %1 + %3:_(s32) = G_CONSTANT i32 8 + %4:_(s32) = G_LSHR %2, %3(s32) + $vgpr0 = COPY %4(s32) + +... + +--- +name: bfe_lshr_and_s64 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: bfe_lshr_and_s64 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 261888 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] + ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C1]](s32) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[LSHR]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_CONSTANT i64 261888 ; 1023 << 8 + %2:_(s64) = G_AND %0, %1 + %3:_(s32) = G_CONSTANT i32 8 + %4:_(s64) = G_LSHR %2, %3(s32) + $vgpr0_vgpr1 = COPY %4(s64) + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll @@ -44,6 +44,36 @@ ret i32 %2 } +; Test vector bitfield extract. +define i32 @v_mask_srl_i32(i32 %value) { +; GFX89-LABEL: v_mask_srl_i32: +; GFX89: ; %bb.0: +; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-NEXT: v_bfe_u32 v0, v0, 8, 5 +; GFX89-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_mask_srl_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 5 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = and i32 %value, 7936 ; 31 << 8 + %2 = lshr i32 %1, 8 + ret i32 %2 +} + +; Test scalar bitfield extract. +define amdgpu_ps i32 @s_mask_srl_i32(i32 inreg %value) { +; GCN-LABEL: s_mask_srl_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_u32 s0, s0, 0x50008 +; GCN-NEXT: ; return to shader part epilog + %1 = and i32 %value, 7936 ; 31 << 8 + %2 = lshr i32 %1, 8 + ret i32 %2 +} + ; Test vector bitfield extract for 64-bits. define i64 @v_srl_mask_i64(i64 %value) { ; GFX89-LABEL: v_srl_mask_i64: @@ -89,3 +119,40 @@ %2 = and i64 %1, 63 ret i64 %2 } + +; Test vector bitfield extract for 64-bits. +define i64 @v_mask_srl_i64(i64 %value) { +; GFX89-LABEL: v_mask_srl_i64: +; GFX89: ; %bb.0: +; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-NEXT: v_and_b32_e32 v0, 0xfe000000, v0 +; GFX89-NEXT: v_and_b32_e32 v1, 7, v1 +; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] +; GFX89-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_mask_srl_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_and_b32_e32 v0, 0xfe000000, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 7, v1 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = and i64 %value, 34326183936 ; 1023 << 25 + %2 = lshr i64 %1, 25 + ret i64 %2 +} + +; Test scalar bitfield extract for 64-bits. +define amdgpu_ps i64 @s_mask_srl_i64(i64 inreg %value) { +; GCN-LABEL: s_mask_srl_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s2, 0xfe000000 +; GCN-NEXT: s_mov_b32 s3, 7 +; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], 25 +; GCN-NEXT: ; return to shader part epilog + %1 = and i64 %value, 34326183936 ; 1023 << 25 + %2 = lshr i64 %1, 25 + ret i64 %2 +}