diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -564,6 +564,7 @@ /// This variant does not erase \p MI after calling the build function. void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); bool matchRotateOutOfRange(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -645,6 +645,13 @@ extract_vec_elt_build_vec, extract_all_elts_from_build_vector]>; +def funnel_shift_from_or_shift : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_OR):$root, + [{ return Helper.matchOrShiftToFunnelShift(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }]) +>; + def funnel_shift_to_rotate : GICombineRule< (defs root:$root), (match (wip_match_opcode G_FSHL, G_FSHR):$root, @@ -683,7 +690,8 @@ [{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; -def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; +def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift, + funnel_shift_to_rotate]>; def bitfield_extract_from_sext_inreg : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3867,6 +3867,51 @@ MatchInfo(Builder); } +bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_OR); + + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + unsigned BitWidth = Ty.getScalarSizeInBits(); + + Register ShlSrc, ShlAmt, LShrSrc, LShrAmt; + unsigned FshOpc = 0; + + // TODO: Handle vector types. + // Match (or (shl x, amt), (lshr y, sub(bw, amt))). + if (mi_match(Dst, MRI, + // m_GOr() handles the commuted version as well. + m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)), + m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICst(BitWidth), + m_Reg(LShrAmt)))))) { + FshOpc = TargetOpcode::G_FSHL; + + // Match (or (shl x, sub(bw, amt)), (lshr y, amt)). + } else if (mi_match( + Dst, MRI, + m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)), + m_GShl(m_Reg(ShlSrc), m_GSub(m_SpecificICst(BitWidth), + m_Reg(ShlAmt)))))) { + FshOpc = TargetOpcode::G_FSHR; + + } else { + return false; + } + + if (ShlAmt != LShrAmt) + return false; + + LLT AmtTy = MRI.getType(ShlAmt); + if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt}); + }; + return true; +} + /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir @@ -0,0 +1,142 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: fshl_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: fshl_i32 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: %or:_(s32) = G_FSHL %a, %b, %amt(s32) + ; CHECK-NEXT: $vgpr3 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %b:_(s32) = COPY $vgpr1 + %amt:_(s32) = COPY $vgpr2 + %bw:_(s32) = G_CONSTANT i32 32 + %shl:_(s32) = G_SHL %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt:_ + %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32) + %or:_(s32) = G_OR %shl:_, %lshr:_ + $vgpr3 = COPY %or +... + +--- +name: fshl_commute_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: fshl_commute_i32 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: %or:_(s32) = G_FSHL %a, %b, %amt(s32) + ; CHECK-NEXT: $vgpr3 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %b:_(s32) = COPY $vgpr1 + %amt:_(s32) = COPY $vgpr2 + %bw:_(s32) = G_CONSTANT i32 32 + %shl:_(s32) = G_SHL %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt:_ + %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32) + %or:_(s32) = G_OR %lshr:_, %shl:_ + $vgpr3 = COPY %or +... + +--- +name: fshr_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: fshr_i32 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: %or:_(s32) = G_FSHR %a, %b, %amt(s32) + ; CHECK-NEXT: $vgpr3 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %b:_(s32) = COPY $vgpr1 + %amt:_(s32) = COPY $vgpr2 + %bw:_(s32) = G_CONSTANT i32 32 + %lshr:_(s32) = G_LSHR %b:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt:_ + %shl:_(s32) = G_SHL %a:_, %sub:_(s32) + %or:_(s32) = G_OR %shl:_, %lshr:_ + $vgpr3 = COPY %or +... + +--- +name: fshl_i32_bad_const +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: fshl_i32_bad_const + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: %bw:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt(s32) + ; CHECK-NEXT: %sub:_(s32) = G_SUB %bw, %amt + ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %b, %sub(s32) + ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr3 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %b:_(s32) = COPY $vgpr1 + %amt:_(s32) = COPY $vgpr2 + %bw:_(s32) = G_CONSTANT i32 31 + %shl:_(s32) = G_SHL %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt:_ + %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32) + %or:_(s32) = G_OR %shl:_, %lshr:_ + $vgpr3 = COPY %or +... + +--- +name: fshl_i32_bad_amt_reg +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; CHECK-LABEL: name: fshl_i32_bad_amt_reg + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: %amt1:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: %bw:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt(s32) + ; CHECK-NEXT: %sub:_(s32) = G_SUB %bw, %amt1 + ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %b, %sub(s32) + ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr4 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %b:_(s32) = COPY $vgpr1 + %amt:_(s32) = COPY $vgpr2 + %amt1:_(s32) = COPY $vgpr3 + %bw:_(s32) = G_CONSTANT i32 32 + %shl:_(s32) = G_SHL %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt1:_ + %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32) + %or:_(s32) = G_OR %shl:_, %lshr:_ + $vgpr4 = COPY %or +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir @@ -0,0 +1,131 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: rotl_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: rotl_i32 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %or:_(s32) = G_ROTL %a, %amt(s32) + ; CHECK-NEXT: $vgpr2 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %amt:_(s32) = COPY $vgpr1 + %bw:_(s32) = G_CONSTANT i32 32 + %shl:_(s32) = G_SHL %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt:_ + %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32) + %or:_(s32) = G_OR %shl:_, %lshr:_ + $vgpr2 = COPY %or +... + +--- +name: rotl_commute_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: rotl_commute_i32 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %or:_(s32) = G_ROTL %a, %amt(s32) + ; CHECK-NEXT: $vgpr2 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %amt:_(s32) = COPY $vgpr1 + %bw:_(s32) = G_CONSTANT i32 32 + %shl:_(s32) = G_SHL %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt:_ + %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32) + %or:_(s32) = G_OR %lshr:_, %shl:_ + $vgpr2 = COPY %or +... + +--- +name: rotr_i32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: rotr_i32 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %or:_(s32) = G_ROTR %a, %amt(s32) + ; CHECK-NEXT: $vgpr2 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %amt:_(s32) = COPY $vgpr1 + %bw:_(s32) = G_CONSTANT i32 32 + %lshr:_(s32) = G_LSHR %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt:_ + %shl:_(s32) = G_SHL %a:_, %sub:_(s32) + %or:_(s32) = G_OR %shl:_, %lshr:_ + $vgpr2 = COPY %or +... + +--- +name: rotl_i32_bad_const +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: rotl_i32_bad_const + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %bw:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt(s32) + ; CHECK-NEXT: %sub:_(s32) = G_SUB %bw, %amt + ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %a, %sub(s32) + ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr2 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %amt:_(s32) = COPY $vgpr1 + %bw:_(s32) = G_CONSTANT i32 31 + %shl:_(s32) = G_SHL %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt:_ + %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32) + %or:_(s32) = G_OR %shl:_, %lshr:_ + $vgpr2 = COPY %or +... + +--- +name: rotl_i32_bad_amt_reg +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: rotl_i32_bad_amt_reg + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %amt:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %amt1:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: %bw:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt(s32) + ; CHECK-NEXT: %sub:_(s32) = G_SUB %bw, %amt1 + ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %a, %sub(s32) + ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr3 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %amt:_(s32) = COPY $vgpr1 + %amt1:_(s32) = COPY $vgpr2 + %bw:_(s32) = G_CONSTANT i32 32 + %shl:_(s32) = G_SHL %a:_, %amt:_(s32) + %sub:_(s32) = G_SUB %bw:_, %amt1:_ + %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32) + %or:_(s32) = G_OR %shl:_, %lshr:_ + $vgpr3 = COPY %or +...