diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -507,6 +507,9 @@ MachineInstr &MI, SmallVectorImpl> &MatchInfo); + bool matchFunnelShiftToRotate(MachineInstr &MI); + void applyFunnelShiftToRotate(MachineInstr &MI); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -33,11 +33,20 @@ (apply [{ return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}]) >; +def funnel_shift_to_rotate : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FSHL, G_FSHR):$root, + [{ return matchFunnelShiftToRotate(*${root}, MRI); }]), + (apply [{ applyFunnelShiftToRotate(*${root}, MRI, B, Observer); }]) +>; + + def AArch64PreLegalizerCombinerHelper: GICombinerHelper< "AArch64GenPreLegalizerCombinerHelper", [all_combines, fconstant_to_constant, icmp_redundant_trunc, - fold_global_offset]> { + fold_global_offset, + funnel_shift_to_rotate]> { let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; let StateClass = "AArch64PreLegalizerCombinerHelperState"; let AdditionalArguments = []; diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -156,6 +156,13 @@ let InOperandList = (ins type0:$src); } +// Represents rotate right. +def G_ROR : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2); + let hasSideEffects = 0; +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -175,6 +182,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -217,6 +217,50 @@ return true; } +/// Match a scalar G_FSHR that can be turned into an AArch64::ROR. +static bool matchFunnelShiftToRotate(MachineInstr &MI, + MachineRegisterInfo &MRI) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(X); + if (!Ty.isScalar()) + return false; + unsigned Size = Ty.getSizeInBits(); + if (Size == 32 || Size == 64) + return X == Y; + return false; // Illegal size for ROR. +} + +static void applyFunnelShiftToRotate(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + GISelChangeObserver &Observer) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); + bool IsFSHL = Opc == TargetOpcode::G_FSHL; + Register ShiftReg = MI.getOperand(3).getReg(); + LLT ShiftTy = MRI.getType(ShiftReg); + B.setInstrAndDebugLoc(MI); + // For rotate-left, we can negate the shift and use ROR. + if (IsFSHL) { + auto Neg = B.buildSub(ShiftTy, B.buildConstant(ShiftTy, 0), ShiftReg); + ShiftReg = Neg.getReg(0); + } + Observer.changingInstr(MI); + MI.setDesc(B.getTII().get(AArch64::G_ROR)); + MI.RemoveOperand(1); + // If we have a 32 bit shift, then extend the amount to 64b for selection. + if (ShiftTy.getSizeInBits() == 32) { + auto Ext = B.buildSExt(LLT::scalar(64), ShiftReg); + MI.getOperand(2).setReg(Ext.getReg(0)); + } else { + MI.getOperand(2).setReg(ShiftReg); + } + Observer.changedInstr(MI); +} + class AArch64PreLegalizerCombinerHelperState { protected: CombinerHelper &Helper; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-funnel-shifts-to-rotates.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-funnel-shifts-to-rotates.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-funnel-shifts-to-rotates.mir @@ -0,0 +1,151 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - -verify-machineinstrs | FileCheck %s + +# Tests that we combine funnel shifts to AArch64-specific rotate opcodes. +--- +name: test_ror +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: test_ror + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK: [[ROR:%[0-9]+]]:_(s32) = G_ROR [[COPY]], [[SEXT]] + ; CHECK: $w0 = COPY [[ROR]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_FSHR %0, %0, %1(s32) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_ror64 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_ror64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[ROR:%[0-9]+]]:_(s64) = G_ROR [[COPY]], [[COPY1]] + ; CHECK: $x0 = COPY [[ROR]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_FSHR %0, %0, %1(s64) + $x0 = COPY %2(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: test_rotl +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: test_rotl + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]] + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SUB]](s32) + ; CHECK: [[ROR:%[0-9]+]]:_(s32) = G_ROR [[COPY]], [[SEXT]] + ; CHECK: $w0 = COPY [[ROR]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_FSHL %0, %0, %1(s32) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_rotl64 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_rotl64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C]], [[COPY1]] + ; CHECK: [[ROR:%[0-9]+]]:_(s64) = G_ROR [[COPY]], [[SUB]] + ; CHECK: $x0 = COPY [[ROR]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_FSHL %0, %0, %1(s64) + $x0 = COPY %2(s64) + RET_ReallyLR implicit $x0 + +... +# Just do this for scalars for now. +--- +name: test_no_vector +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_no_vector + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[FSHR:%[0-9]+]]:_(<4 x s32>) = G_FSHR [[COPY]], [[COPY]], [[COPY1]](<4 x s32>) + ; CHECK: $q0 = COPY [[FSHR]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_FSHR %0, %0, %1(<4 x s32>) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-rotates.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-rotates.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-rotates.mir @@ -0,0 +1,73 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=arm64-unknown-unknown -global-isel -run-pass=instruction-select -global-isel-abort=1 %s -o - | FileCheck %s + +--- +name: test_ror +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } +liveins: + - { reg: '$w0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: test_ror + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[RORVWr:%[0-9]+]]:gpr32 = RORVWr [[COPY]], [[COPY1]] + ; CHECK: $w0 = COPY [[RORVWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %3:gpr(s64) = G_SEXT %1(s32) + %2:gpr(s32) = G_ROR %0, %3 + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_ror64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_ror64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK: [[RORVXr:%[0-9]+]]:gpr64 = RORVXr [[COPY]], [[COPY1]] + ; CHECK: $x0 = COPY [[RORVXr]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = G_ROR %0, %1 + $x0 = COPY %2(s64) + RET_ReallyLR implicit $x0 + +...