diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -189,6 +189,13 @@ (apply [{ applyFoldMergeToZext(*${d}, MRI, B, Observer); }]) >; +def mutate_anyext_to_zext : GICombineRule< + (defs root:$d), + (match (wip_match_opcode G_ANYEXT):$d, + [{ return matchMutateAnyExtToZExt(*${d}, MRI); }]), + (apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -204,7 +211,7 @@ def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", [copy_prop, erase_undef_store, combines_for_extload, - sext_trunc_sextload, + sext_trunc_sextload, mutate_anyext_to_zext, hoist_logic_op_with_same_opcode_hands, redundant_and, xor_of_and_with_same_reg, extractvecelt_pairwise_add, redundant_or, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -262,6 +262,33 @@ Observer.changedInstr(MI); } +/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT +/// instruction. +static bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) { + // If this is coming from a scalar compare then we can use a G_ZEXT instead of + // a G_ANYEXT: + // + // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1. + // %ext:_(s64) = G_ANYEXT %cmp(s32) + // + // By doing this, we can leverage more KnownBits combines. + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + return MRI.getType(Dst).isScalar() && + mi_match(Src, MRI, + m_any_of(m_GICmp(m_Pred(), m_Reg(), m_Reg()), + m_GFCmp(m_Pred(), m_Reg(), m_Reg()))); +} + +static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + GISelChangeObserver &Observer) { + Observer.changingInstr(MI); + MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT)); + Observer.changedInstr(MI); +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-anyext-to-zext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-anyext-to-zext.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-anyext-to-zext.mir @@ -0,0 +1,84 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner --aarch64postlegalizercombinerhelper-only-enable-rule="mutate_anyext_to_zext" -verify-machineinstrs %s -o - | FileCheck %s +# REQUIRES: asserts + +... +--- +name: scalar_icmp +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; Scalars have 0 or 1, so we want a ZExt. + + ; CHECK-LABEL: name: scalar_icmp + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-NEXT: %cst_1:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %icmp:_(s32) = G_ICMP intpred(eq), %copy(s64), %cst_1 + ; CHECK-NEXT: %ext:_(s64) = G_ZEXT %icmp(s32) + ; CHECK-NEXT: $x0 = COPY %ext(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %copy:_(s64) = COPY $x0 + %cst_1:_(s64) = G_CONSTANT i64 1 + %icmp:_(s32) = G_ICMP intpred(eq), %copy(s64), %cst_1 + %ext:_(s64) = G_ANYEXT %icmp(s32) + $x0 = COPY %ext(s64) + RET_ReallyLR implicit $x0 + + +... +--- +name: vector_icmp +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $d0 + ; Vectors have 0 or negative 1, so we don't produce a zext. + + ; CHECK-LABEL: name: vector_icmp + ; CHECK: liveins: $x0, $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: %cst_1:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %vec_cst_1:_(<2 x s32>) = G_BUILD_VECTOR %cst_1(s32), %cst_1(s32) + ; CHECK-NEXT: %icmp:_(<2 x s32>) = G_ICMP intpred(eq), %copy(<2 x s32>), %vec_cst_1 + ; CHECK-NEXT: %ext:_(<2 x s64>) = G_ANYEXT %icmp(<2 x s32>) + ; CHECK-NEXT: $q0 = COPY %ext(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %copy:_(<2 x s32>) = COPY $d0 + %cst_1:_(s32) = G_CONSTANT i32 1 + %vec_cst_1:_(<2 x s32>) = G_BUILD_VECTOR %cst_1, %cst_1 + %icmp:_(<2 x s32>) = G_ICMP intpred(eq), %copy(<2 x s32>), %vec_cst_1 + %ext:_(<2 x s64>) = G_ANYEXT %icmp(<2 x s32>) + $q0 = COPY %ext(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: scalar_fcmp +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $d0 + ; Scalars have 0 or 1, so we want a ZExt. + + ; CHECK-LABEL: name: scalar_fcmp + ; CHECK: liveins: $x0, $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s64) = COPY $d0 + ; CHECK-NEXT: %cst_1:_(s64) = G_FCONSTANT double 1.000000e+00 + ; CHECK-NEXT: %fcmp:_(s32) = G_FCMP intpred(eq), %copy(s64), %cst_1 + ; CHECK-NEXT: %ext:_(s64) = G_ZEXT %fcmp(s32) + ; CHECK-NEXT: $x0 = COPY %ext(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %copy:_(s64) = COPY $d0 + %cst_1:_(s64) = G_FCONSTANT double 1.0 + %fcmp:_(s32) = G_FCMP intpred(eq), %copy(s64), %cst_1 + %ext:_(s64) = G_ANYEXT %fcmp(s32) + $x0 = COPY %ext(s64) + RET_ReallyLR implicit $x0