Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -640,6 +640,10 @@ /// (G_SMULO x, 2) -> (G_SADDO x, x) bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Match: + /// (G_*MULO x, 0) -> 0 + no carry out + bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Transform (fadd x, fneg(y)) -> (fsub x, y) /// (fadd fneg(x), y) -> (fsub y, x) /// (fsub x, fneg(y)) -> (fadd x, y) Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -745,6 +745,12 @@ [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; +def mulo_by_0: GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_UMULO, G_SMULO):$root, + [{ return Helper.matchMulOBy0(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + def mulh_to_lshr : GICombineRule< (defs root:$root), (match (wip_match_opcode G_UMULH):$root, @@ -853,7 +859,7 @@ fneg_fneg_fold, right_identity_one]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, - overlapping_and, mulo_by_2]>; + overlapping_and, mulo_by_2, mulo_by_0]>; def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4559,6 +4559,26 @@ return true; } +bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { + // (G_*MULO x, 0) -> 0 + no carry out + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0))) + return false; + Register Dst = MI.getOperand(0).getReg(); + Register Carry = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT CarryTy = MRI.getType(Carry); + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {DstTy}}) || + !isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {CarryTy}})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildConstant(Dst, 0); + B.buildConstant(Carry, 0); + }; + return true; +} + MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UDIV); auto &UDiv = cast(MI); Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-mulo-zero.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-mulo-zero.mir @@ -0,0 +1,134 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="mulo_by_0" -global-isel -verify-machineinstrs %s -o - | FileCheck %s +# REQUIRES: asserts + +# (G_*MULO x, 0) -> 0 + no carry out + +... +--- +name: umulo_zero +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: umulo_zero + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %mulo:_(s32) = COPY %zero(s32) + ; CHECK-NEXT: %carry:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: %carry_wide:_(s32) = G_ZEXT %carry(s1) + ; CHECK-NEXT: $w0 = COPY %mulo(s32) + ; CHECK-NEXT: $w1 = COPY %carry_wide(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %lhs:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %mulo:_(s32), %carry:_(s1) = G_UMULO %lhs, %zero + %carry_wide:_(s32) = G_ZEXT %carry(s1) + $w0 = COPY %mulo(s32) + $w1 = COPY %carry_wide + RET_ReallyLR implicit $w0 +... +--- +name: smulo_zero +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: smulo_zero + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %mulo:_(s32) = COPY %zero(s32) + ; CHECK-NEXT: %carry:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: %carry_wide:_(s32) = G_ZEXT %carry(s1) + ; CHECK-NEXT: $w0 = COPY %mulo(s32) + ; CHECK-NEXT: $w1 = COPY %carry_wide(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %lhs:_(s32) = COPY $w0 + %zero:_(s32) = G_CONSTANT i32 0 + %mulo:_(s32), %carry:_(s1) = G_SMULO %lhs, %zero + %carry_wide:_(s32) = G_ZEXT %carry(s1) + $w0 = COPY %mulo(s32) + $w1 = COPY %carry_wide + RET_ReallyLR implicit $w0 +... +--- +name: wrong_cst +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: wrong_cst + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %lhs:_(s32) = COPY $w0 + ; CHECK-NEXT: %not_zero:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mulo:_(s32), %carry:_(s1) = G_UMULO %lhs, %not_zero + ; CHECK-NEXT: %carry_wide:_(s32) = G_ZEXT %carry(s1) + ; CHECK-NEXT: $w0 = COPY %mulo(s32) + ; CHECK-NEXT: $w1 = COPY %carry_wide(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %lhs:_(s32) = COPY $w0 + %not_zero:_(s32) = G_CONSTANT i32 3 + %mulo:_(s32), %carry:_(s1) = G_UMULO %lhs, %not_zero + %carry_wide:_(s32) = G_ZEXT %carry(s1) + $w0 = COPY %mulo(s32) + $w1 = COPY %carry_wide + RET_ReallyLR implicit $w0 +... +--- +name: umulo_vec_zero +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $x0 + ; CHECK-LABEL: name: umulo_vec_zero + ; CHECK: liveins: $q0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %mulo:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: %carry:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1) + ; CHECK-NEXT: %carry_elt_0:_(s1) = G_EXTRACT_VECTOR_ELT %carry(<2 x s1>), %zero(s64) + ; CHECK-NEXT: %carry_wide:_(s64) = G_ZEXT %carry_elt_0(s1) + ; CHECK-NEXT: $q0 = COPY %mulo(<2 x s64>) + ; CHECK-NEXT: $x0 = COPY %carry_wide(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %lhs:_(<2 x s64>) = COPY $q0 + %zero:_(s64) = G_CONSTANT i64 0 + %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero + %mulo:_(<2 x s64>), %carry:_(<2 x s1>) = G_UMULO %lhs, %zero_vec + %carry_elt_0:_(s1) = G_EXTRACT_VECTOR_ELT %carry:_(<2 x s1>), %zero:_(s64) + %carry_wide:_(s64) = G_ZEXT %carry_elt_0 + $q0 = COPY %mulo(<2 x s64>) + $x0 = COPY %carry_wide + RET_ReallyLR implicit $q0 +... +--- +name: smulo_vec_zero +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $x0 + ; CHECK-LABEL: name: smulo_vec_zero + ; CHECK: liveins: $q0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %mulo:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: %carry:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1) + ; CHECK-NEXT: %carry_elt_0:_(s1) = G_EXTRACT_VECTOR_ELT %carry(<2 x s1>), %zero(s64) + ; CHECK-NEXT: %carry_wide:_(s64) = G_ZEXT %carry_elt_0(s1) + ; CHECK-NEXT: $q0 = COPY %mulo(<2 x s64>) + ; CHECK-NEXT: $x0 = COPY %carry_wide(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %lhs:_(<2 x s64>) = COPY $q0 + %zero:_(s64) = G_CONSTANT i64 0 + %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero + %mulo:_(<2 x s64>), %carry:_(<2 x s1>) = G_SMULO %lhs, %zero_vec + %carry_elt_0:_(s1) = G_EXTRACT_VECTOR_ELT %carry:_(<2 x s1>), %zero:_(s64) + %carry_wide:_(s64) = G_ZEXT %carry_elt_0 + $q0 = COPY %mulo(<2 x s64>) + $x0 = COPY %carry_wide + RET_ReallyLR implicit $q0