diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -17,9 +17,17 @@ [{ return matchFConstantToConstant(*${root}, MRI); }]), (apply [{ applyFConstantToConstant(*${root}); }])>; +def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">; +def icmp_redundant_trunc : GICombineRule< + (defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo), + (match (wip_match_opcode G_ICMP):$root, + [{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]), + (apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>; + def AArch64PreLegalizerCombinerHelper: GICombinerHelper< "AArch64GenPreLegalizerCombinerHelper", [all_combines, - fconstant_to_constant]> { + fconstant_to_constant, + icmp_redundant_trunc]> { let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; let StateClass = "AArch64PreLegalizerCombinerHelperState"; let AdditionalArguments = []; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -17,8 +17,11 @@ #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -53,6 +56,57 @@ MI.eraseFromParent(); } +/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits +/// are sign bits. In this case, we can transform the G_ICMP to directly compare +/// the wide value with a zero. +static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, + GISelKnownBits *KB, Register &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB); + + auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate(); + if (Pred != ICmpInst::ICMP_NE && Pred != llvm::CmpInst::ICMP_EQ) + return false; + + Register LHS = MI.getOperand(2).getReg(); + LLT LHSTy = MRI.getType(LHS); + if (!LHSTy.isScalar()) + return false; + + Register RHS = MI.getOperand(3).getReg(); + Register WideReg; + + if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg)))) + return false; + if (!mi_match(RHS, MRI, m_SpecificICst(0))) + return false; + + LLT WideTy = MRI.getType(WideReg); + if (KB->computeNumSignBits(WideReg) <= + WideTy.getSizeInBits() - LHSTy.getSizeInBits()) + return false; + + MatchInfo = WideReg; + return true; +} + +static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &Builder, + GISelChangeObserver &Observer, + Register &WideReg) { + assert(MI.getOpcode() == TargetOpcode::G_ICMP); + + LLT WideTy = MRI.getType(WideReg); + // We're going to directly use the wide register as the LHS, and then use an + // equivalent size zero for RHS. + Builder.setInstrAndDebugLoc(MI); + auto WideZero = Builder.buildConstant(WideTy, 0); + Observer.changingInstr(MI); + MI.getOperand(2).setReg(WideReg); + MI.getOperand(3).setReg(WideZero.getReg(0)); + Observer.changedInstr(MI); + return true; +} + class AArch64PreLegalizerCombinerHelperState { protected: CombinerHelper &Helper; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir @@ -0,0 +1,107 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - -verify-machineinstrs | FileCheck %s + +# This test checks the optimization to remove the G_TRUNC if we can determine it's redundant. +--- +name: icmp_trunc_sextload +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_trunc_sextload + ; CHECK: liveins: $x0 + ; CHECK: %v:_(p0) = COPY $x0 + ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %load(s64), [[C]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %v:_(p0) = COPY $x0 + %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4) + %trunc:_(s32) = G_TRUNC %load(s64) + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s32), %zero + %5:_(s32) = G_ANYEXT %cmp + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_trunc_sextload_eq +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_trunc_sextload_eq + ; CHECK: liveins: $x0 + ; CHECK: %v:_(p0) = COPY $x0 + ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %load(s64), [[C]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %v:_(p0) = COPY $x0 + %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4) + %trunc:_(s32) = G_TRUNC %load(s64) + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(eq), %trunc(s32), %zero + %5:_(s32) = G_ANYEXT %cmp + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_trunc_sextload_wrongpred +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_trunc_sextload_wrongpred + ; CHECK: liveins: $x0 + ; CHECK: %v:_(p0) = COPY $x0 + ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4) + ; CHECK: %trunc:_(s32) = G_TRUNC %load(s64) + ; CHECK: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %v:_(p0) = COPY $x0 + %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4) + %trunc:_(s32) = G_TRUNC %load(s64) + %zero:_(s32) = G_CONSTANT i32 0 + %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero + %5:_(s32) = G_ANYEXT %cmp + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 +... +--- +name: icmp_trunc_sextload_extend_mismatch +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: icmp_trunc_sextload_extend_mismatch + ; CHECK: liveins: $x0 + ; CHECK: %v:_(p0) = COPY $x0 + ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4) + ; CHECK: %trunc:_(s16) = G_TRUNC %load(s64) + ; CHECK: %zero:_(s16) = G_CONSTANT i16 0 + ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %v:_(p0) = COPY $x0 + %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4) + %trunc:_(s16) = G_TRUNC %load(s64) + %zero:_(s16) = G_CONSTANT i16 0 + %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero + %5:_(s32) = G_ANYEXT %cmp + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 +... +