diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -510,6 +510,8 @@ std::function &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); + bool matchRotateOutOfRange(MachineInstr &MI); + void applyRotateOutOfRange(MachineInstr &MI); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -605,6 +605,13 @@ (apply [{ Helper.applyFunnelShiftToRotate(*${root}); }]) >; +def rotate_out_of_range : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_ROTR, G_ROTL):$root, + [{ return Helper.matchRotateOutOfRange(*${root}); }]), + (apply [{ Helper.applyRotateOutOfRange(*${root}); }]) +>; + def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; // FIXME: These should use the custom predicate feature once it lands. diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3894,6 +3894,37 @@ Observer.changedInstr(MI); } +// Fold (rot x, c) -> (rot x, c % BitSize) +bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_ROTL || + MI.getOpcode() == TargetOpcode::G_ROTR); + unsigned Bitsize = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); + Register AmtReg = MI.getOperand(2).getReg(); + bool OutOfRange = false; + auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) { + if (auto *CI = dyn_cast(C)) + OutOfRange |= CI->getValue().uge(Bitsize); + return true; + }; + return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange; +} + +void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_ROTL || + MI.getOpcode() == TargetOpcode::G_ROTR); + unsigned Bitsize = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); + Builder.setInstrAndDebugLoc(MI); + Register Amt = MI.getOperand(2).getReg(); + LLT AmtTy = MRI.getType(Amt); + auto Bits = Builder.buildConstant(AmtTy, Bitsize); + Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(Amt); + Observer.changedInstr(MI); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -188,6 +188,6 @@ redundant_and, xor_of_and_with_same_reg, extractvecelt_pairwise_add, redundant_or, mul_const, redundant_sext_inreg, - form_bitfield_extract]> { + form_bitfield_extract, rotate_out_of_range]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -339,6 +339,8 @@ if (!IsOptNone) { AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } MachineFunctionPass::getAnalysisUsage(AU); } @@ -364,8 +366,11 @@ IsOptNone ? nullptr : &getAnalysis(); AArch64PostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), KB, MDT); + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); + return C.combineMachineInstrs(MF, CSEInfo); } char AArch64PostLegalizerCombiner::ID = 0; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir @@ -14,9 +14,9 @@ ; CHECK-LABEL: name: sextinreg_ashr_to_sbfx ; CHECK: liveins: $w0 ; CHECK: %x:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, [[C]](s32), [[C1]] + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, %lsb(s32), [[C]] ; CHECK: $w0 = COPY %sext_inreg(s32) ; CHECK: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 @@ -37,9 +37,9 @@ ; CHECK-LABEL: name: sextinreg_lshr_to_sbfx ; CHECK: liveins: $w0 ; CHECK: %x:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, [[C]](s32), [[C1]] + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, %lsb(s32), [[C]] ; CHECK: $w0 = COPY %sext_inreg(s32) ; CHECK: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we simplify the constant rotate amount to be in range. +--- +name: rotl +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotl + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTL]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 -16 + %2:_(s32) = G_ROTL %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: rotr +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotr + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTR]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 -16 + %2:_(s32) = G_ROTR %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: rotl_bitwidth_cst +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotl_bitwidth_cst + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTL]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 32 + %2:_(s32) = G_ROTL %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: rotl_bitwidth_minus_one_cst +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotl_bitwidth_minus_one_cst + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTL]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 31 + %2:_(s32) = G_ROTL %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +...