diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -510,6 +510,8 @@ std::function &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); + bool matchRotateOutOfRange(MachineInstr &MI); + void applyRotateOutOfRange(MachineInstr &MI); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -234,6 +234,8 @@ /// fallback. void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU); +Optional ConstantFoldBinOp(unsigned Opcode, const APInt &Op1, + const APInt &Op2); Optional ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -605,6 +605,13 @@ (apply [{ Helper.applyFunnelShiftToRotate(*${root}); }]) >; +def rotate_out_of_range : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_ROTR, G_ROTL):$root, + [{ return Helper.matchRotateOutOfRange(*${root}); }]), + (apply [{ Helper.applyRotateOutOfRange(*${root}); }]) +>; + def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; // FIXME: These should use the custom predicate feature once it lands. diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -9,6 +9,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -25,6 +26,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" @@ -3894,6 +3896,44 @@ Observer.changedInstr(MI); } +// Fold (rot x, c) -> (rot x, c % BitSize) +bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_ROTL || + MI.getOpcode() == TargetOpcode::G_ROTR); + unsigned Bitsize = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); + Register AmtReg = MI.getOperand(2).getReg(); + bool OutOfRange = false; + auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) { + if (auto *CI = dyn_cast(C)) + OutOfRange |= CI->getValue().uge(Bitsize); + return true; + }; + if (!(matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && + OutOfRange)) + return false; + const APInt &OrigAmtCst = *getConstantVRegVal(AmtReg, MRI); + const APInt BitSizeCst(OrigAmtCst.getBitWidth(), Bitsize); + + return ConstantFoldBinOp(TargetOpcode::G_UREM, OrigAmtCst, BitSizeCst) + .hasValue(); +} + +void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_ROTL || + MI.getOpcode() == TargetOpcode::G_ROTR); + unsigned Bitsize = + MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits(); + Builder.setInstrAndDebugLoc(MI); + Register Amt = MI.getOperand(2).getReg(); + LLT AmtTy = MRI.getType(Amt); + auto Bits = Builder.buildConstant(AmtTy, Bitsize); + Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(Amt); + Observer.changedInstr(MI); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -426,19 +426,8 @@ return APF; } -Optional llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI) { - auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI); - if (!MaybeOp2Cst) - return None; - - auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); - if (!MaybeOp1Cst) - return None; - - const APInt &C1 = *MaybeOp1Cst; - const APInt &C2 = *MaybeOp2Cst; +Optional llvm::ConstantFoldBinOp(unsigned Opcode, const APInt &C1, + const APInt &C2) { switch (Opcode) { default: break; @@ -477,10 +466,25 @@ break; return C1.srem(C2); } - return None; } +Optional llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI) { + auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI); + if (!MaybeOp2Cst) + return None; + + auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + if (!MaybeOp1Cst) + return None; + + const APInt &C1 = *MaybeOp1Cst; + const APInt &C2 = *MaybeOp2Cst; + return ConstantFoldBinOp(Opcode, C1, C2); +} + Optional llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI) { diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -188,6 +188,6 @@ redundant_and, xor_of_and_with_same_reg, extractvecelt_pairwise_add, redundant_or, mul_const, redundant_sext_inreg, - form_bitfield_extract]> { + form_bitfield_extract, rotate_out_of_range]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -339,6 +339,8 @@ if (!IsOptNone) { AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } MachineFunctionPass::getAnalysisUsage(AU); } @@ -364,8 +366,11 @@ IsOptNone ? nullptr : &getAnalysis(); AArch64PostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), KB, MDT); + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); + return C.combineMachineInstrs(MF, CSEInfo); } char AArch64PostLegalizerCombiner::ID = 0; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/form-bitfield-extract-from-sextinreg.mir @@ -14,9 +14,9 @@ ; CHECK-LABEL: name: sextinreg_ashr_to_sbfx ; CHECK: liveins: $w0 ; CHECK: %x:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, [[C]](s32), [[C1]] + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, %lsb(s32), [[C]] ; CHECK: $w0 = COPY %sext_inreg(s32) ; CHECK: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 @@ -37,9 +37,9 @@ ; CHECK-LABEL: name: sextinreg_lshr_to_sbfx ; CHECK: liveins: $w0 ; CHECK: %x:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 - ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, [[C]](s32), [[C1]] + ; CHECK: %lsb:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK: %sext_inreg:_(s32) = G_SBFX %x, %lsb(s32), [[C]] ; CHECK: $w0 = COPY %sext_inreg(s32) ; CHECK: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-rotate.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we simplify the constant rotate amount to be in range. +--- +name: rotl +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotl + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTL]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 -16 + %2:_(s32) = G_ROTL %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: rotr +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotr + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[ROTR:%[0-9]+]]:_(s32) = G_ROTR [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTR]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 -16 + %2:_(s32) = G_ROTR %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: rotl_bitwidth_cst +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotl_bitwidth_cst + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTL]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 32 + %2:_(s32) = G_ROTL %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: rotl_bitwidth_minus_one_cst +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: rotl_bitwidth_minus_one_cst + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK: [[ROTL:%[0-9]+]]:_(s32) = G_ROTL [[COPY]], [[C]](s64) + ; CHECK: $w0 = COPY [[ROTL]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %5:_(s64) = G_CONSTANT i64 31 + %2:_(s32) = G_ROTL %0, %5(s64) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +...