diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -182,6 +182,13 @@ (apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }]) >; +def fold_merge_to_zext : GICombineRule< + (defs root:$d), + (match (wip_match_opcode G_MERGE_VALUES):$d, + [{ return matchFoldMergeToZext(*${d}, MRI); }]), + (apply [{ applyFoldMergeToZext(*${d}, MRI, B, Observer); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -204,6 +211,6 @@ mul_const, redundant_sext_inreg, form_bitfield_extract, rotate_out_of_range, icmp_to_true_false_known_bits, merge_unmerge, - select_combines]> { + select_combines, fold_merge_to_zext]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -240,6 +241,27 @@ return true; } +/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source +/// is a zero, into a G_ZEXT of the first. +bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) { + auto &Merge = cast(MI); + LLT SrcTy = MRI.getType(Merge.getSourceReg(0)); + if (SrcTy != LLT::scalar(32) || Merge.getNumSources() != 2) + return false; + return mi_match(Merge.getSourceReg(1), MRI, m_SpecificICst(0)); +} + +void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, GISelChangeObserver &Observer) { + // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32) + // -> + // %d(s64) = G_ZEXT %a(s32) + Observer.changingInstr(MI); + MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT)); + MI.RemoveOperand(2); + Observer.changedInstr(MI); +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-merge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-merge.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-merge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-merge.mir @@ -68,3 +68,25 @@ RET_ReallyLR implicit $x0 ... +--- +name: merge_to_zext +alignment: 4 +legalized: true +liveins: + - { reg: '$w0' } +body: | + bb.1.entry: + liveins: $w0 + + ; CHECK-LABEL: name: merge_to_zext + ; CHECK: %v:_(s32) = COPY $w0 + ; CHECK: %merge:_(s64) = G_ZEXT %v(s32) + ; CHECK: $x0 = COPY %merge(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %v:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %merge:_(s64) = G_MERGE_VALUES %v, %zero + $x0 = COPY %merge(s64) + RET_ReallyLR implicit $x0 + +...