diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -609,6 +609,11 @@ /// and rename: s/bool tryEmit/void emit/ bool tryEmitMemcpyInline(MachineInstr &MI); + /// Match: + /// (G_UMULO x, 2) -> (G_UADDO x, x) + /// (G_SMULO x, 2) -> (G_SADDO x, x) + bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -702,6 +702,12 @@ [{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]), (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>; +def mulo_by_2: GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_UMULO, G_SMULO):$root, + [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -718,7 +724,7 @@ fneg_fneg_fold, right_identity_one]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, - overlapping_and]>; + overlapping_and, mulo_by_2]>; def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4376,6 +4376,35 @@ return true; } +bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); + // Check for a constant 2 or a splat of 2 on the RHS. + auto RHS = MI.getOperand(3).getReg(); + bool IsVector = MRI.getType(RHS).isVector(); + if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2))) + return false; + if (IsVector) { + // FIXME: There's no mi_match pattern for this yet. + auto *RHSDef = getDefIgnoringCopies(RHS, MRI); + if (!RHSDef) + return false; + auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI); + if (!Splat || *Splat != 2) + return false; + } + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Observer.changingInstr(MI); + unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO + : TargetOpcode::G_SADDO; + MI.setDesc(Builder.getTII().get(NewOpc)); + MI.getOperand(3).setReg(MI.getOperand(2).getReg()); + Observer.changedInstr(MI); + }; + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir @@ -0,0 +1,117 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="mulo_by_2" -global-isel -verify-machineinstrs %s -o - | FileCheck %s +# REQUIRES: asserts +... +--- +name: smulo_to_saddo +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: smulo_to_saddo + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_SADDO %copy, %copy + ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1) + ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %copy:_(s64) = COPY $x0 + %two:_(s64) = G_CONSTANT i64 2 + %mul:_(s64), %o:_(s1) = G_SMULO %copy, %two + %overflow_ext:_(s32) = G_ZEXT %o(s1) + $w0 = COPY %overflow_ext(s32) + RET_ReallyLR implicit $w0 +... +--- +name: umulo_to_uaddo +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: umulo_to_uaddo + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UADDO %copy, %copy + ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1) + ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %copy:_(s64) = COPY $x0 + %two:_(s64) = G_CONSTANT i64 2 + %mul:_(s64), %o:_(s1) = G_UMULO %copy, %two + %overflow_ext:_(s32) = G_ZEXT %o(s1) + $w0 = COPY %overflow_ext(s32) + RET_ReallyLR implicit $w0 +... +--- +name: vector +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: vector + ; CHECK: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: %mul:_(<2 x s32>), %o:_(<2 x s1>) = G_SADDO %copy, %copy + ; CHECK-NEXT: %overflow_ext:_(<2 x s32>) = G_ANYEXT %o(<2 x s1>) + ; CHECK-NEXT: $d0 = COPY %overflow_ext(<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %copy:_(<2 x s32>) = COPY $d0 + %two:_(s32) = G_CONSTANT i32 2 + %splat_two:_(<2 x s32>) = G_BUILD_VECTOR %two(s32), %two(s32) + %mul:_(<2 x s32>), %o:_(<2 x s1>) = G_SMULO %copy, %splat_two + %overflow_ext:_(<2 x s32>) = G_ANYEXT %o(<2 x s1>) + $d0 = COPY %overflow_ext(<2 x s32>) + RET_ReallyLR implicit $d0 +... +--- +name: dont_combine_wrong_cst +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: dont_combine_wrong_cst + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-NEXT: %three:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UMULO %copy, %three + ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1) + ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %copy:_(s64) = COPY $x0 + %three:_(s64) = G_CONSTANT i64 3 + %mul:_(s64), %o:_(s1) = G_UMULO %copy, %three + %overflow_ext:_(s32) = G_ZEXT %o(s1) + $w0 = COPY %overflow_ext(s32) + RET_ReallyLR implicit $w0 +... +--- +name: dont_combine_not_cst +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: dont_combine_not_cst + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy1:_(s64) = COPY $x0 + ; CHECK-NEXT: %copy2:_(s64) = COPY $x1 + ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UMULO %copy1, %copy2 + ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1) + ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %copy1:_(s64) = COPY $x0 + %copy2:_(s64) = COPY $x1 + %mul:_(s64), %o:_(s1) = G_UMULO %copy1, %copy2 + %overflow_ext:_(s32) = G_ZEXT %o(s1) + $w0 = COPY %overflow_ext(s32) + RET_ReallyLR implicit $w0 +...