diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -510,6 +510,8 @@ std::function &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); + bool matchRotateOutOfRange(MachineInstr &MI); + void applyRotateOutOfRange(MachineInstr &MI); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -605,7 +605,14 @@ (apply [{ Helper.applyFunnelShiftToRotate(*${root}); }]) >; -def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; +def rotate_out_of_range : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_ROTR, G_ROTL):$root, + [{ return Helper.matchRotateOutOfRange(*${root}); }]), + (apply [{ Helper.applyRotateOutOfRange(*${root}); }]) +>; + +def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate, rotate_out_of_range]>; // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -9,6 +9,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -25,6 +26,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" @@ -3894,6 +3896,38 @@ Observer.changedInstr(MI); } +// Fold (rot x, c) -> (rot x, c % BitSize) +bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_ROTL || + MI.getOpcode() == TargetOpcode::G_ROTR); + unsigned Bitsize = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); + bool OutOfRange = false; + auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) { + if (auto *CI = dyn_cast(C)) + OutOfRange |= CI->getValue().uge(Bitsize); + return true; + }; + return matchUnaryPredicate(MRI, MI.getOperand(2).getReg(), MatchOutOfRange) && + OutOfRange; +} + +void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_ROTL || + MI.getOpcode() == TargetOpcode::G_ROTR); + unsigned Bitsize = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); + Builder.setInstrAndDebugLoc(MI); + Register Amt = MI.getOperand(2).getReg(); + LLT AmtTy = MRI.getType(Amt); + auto Bits = Builder.buildConstant(AmtTy, Bitsize); + ConstantFoldingMIRBuilder CFMIB(MI); + Amt = CFMIB.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(Amt); + Observer.changedInstr(MI); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -188,6 +188,6 @@ redundant_and, xor_of_and_with_same_reg, extractvecelt_pairwise_add, redundant_or, mul_const, redundant_sext_inreg, - form_bitfield_extract]> { + form_bitfield_extract, rotate_out_of_range]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir @@ -0,0 +1,54 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we simplify the constant rotate amount to be in range. +--- +name: rotl +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotl + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTL]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 -16 + %2:_(s32) = G_ROTL %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: rotr +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotr + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTR]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 -16 + %2:_(s32) = G_ROTR %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +...