diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -508,6 +508,8 @@ /// Use a function which takes in a MachineIRBuilder to perform a combine. bool applyBuildFn(MachineInstr &MI, std::function &MatchInfo); + bool matchFunnelShiftToRotate(MachineInstr &MI); + void applyFunnelShiftToRotate(MachineInstr &MI); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -598,6 +598,15 @@ extract_vec_elt_build_vec, extract_all_elts_from_build_vector]>; +def funnel_shift_to_rotate : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FSHL, G_FSHR):$root, + [{ return Helper.matchFunnelShiftToRotate(*${root}); }]), + (apply [{ Helper.applyFunnelShiftToRotate(*${root}); }]) +>; + +def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -640,4 +649,4 @@ unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, - div_rem_to_divrem]>; + div_rem_to_divrem, funnel_shift_combines]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3870,6 +3870,30 @@ return true; } +/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. +bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + if (X != Y) + return false; + bool RotateOpc = + Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR; + return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}}); +} + +void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); + bool IsFSHL = Opc == TargetOpcode::G_FSHL; + Observer.changingInstr(MI); + MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL + : TargetOpcode::G_ROTR)); + MI.RemoveOperand(2); + Observer.changedInstr(MI); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-funnel-shifts-to-rotates.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-funnel-shifts-to-rotates.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-funnel-shifts-to-rotates.mir @@ -0,0 +1,115 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - -verify-machineinstrs | FileCheck %s + +--- +name: test_rotr +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: test_rotr + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[COPY1]](s32) + ; CHECK: $w0 = COPY [[ROTR]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_FSHR %0, %0, %1(s32) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_rotl +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: test_rotl + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[COPY]], [[COPY1]](s32) + ; CHECK: $w0 = COPY [[ROTL]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_FSHL %0, %0, %1(s32) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_vector_rotr +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_vector_rotr + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[ROTR:%[0-9]+]]:_(<4 x s32>) = G_ROTR [[COPY]], [[COPY1]](<4 x s32>) + ; CHECK: $q0 = COPY [[ROTR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_FSHR %0, %0, %1(<4 x s32>) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_vector_rotl +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_vector_rotl + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[ROTL:%[0-9]+]]:_(<4 x s32>) = G_ROTL [[COPY]], [[COPY1]](<4 x s32>) + ; CHECK: $q0 = COPY [[ROTL]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_FSHL %0, %0, %1(<4 x s32>) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +...