diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -461,6 +461,10 @@ /// \p C. bool matchConstantOp(const MachineOperand &MOP, int64_t C); + /// Return true if \p MOP is defined by a G_FCONSTANT with a value equal to + /// \p C. + bool matchFConstantOp(const MachineOperand &MOP, double C); + /// Optimize (cond ? x : x) -> x bool matchSelectSameVal(MachineInstr &MI); @@ -736,6 +740,12 @@ bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info); + /// Transform G_FMUL(G_FNEG(x), G_FNEG(y)) to G_FMUL(x, y). + bool matchFMulWithNegatedInputs(MachineInstr &MI, + std::pair &MatchInfo); + bool applyFMulWithNegatedInputs(MachineInstr &MI, + std::pair &MatchInfo); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -314,6 +314,14 @@ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; +// Fold x op 1.0 -> x +def right_identity_fp_one: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchFConstantOp(${root}->getOperand(2), 1.0); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) +>; + // Fold (x op x) - > x def binop_same_val: GICombineRule< (defs root:$root), @@ -898,6 +906,16 @@ [{ return Helper.matchCombineFMinMaxNaN(*${root}, ${info}); }]), (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${info}); }])>; +// Transform (fmul (fneg x), (fneg y)) -> (fmul x, y) +def fmul_fneg_fneg_matchinfo : GIDefMatchData<"std::pair">; +def fmul_fneg_fneg: GICombineRule < + (defs root:$root, fmul_fneg_fneg_matchinfo:$matchinfo), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchFMulWithNegatedInputs(*${root}, + ${matchinfo}); }]), + (apply [{ return Helper.applyFMulWithNegatedInputs(*${root}, + ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -913,7 +931,8 @@ binop_same_val, binop_left_to_zero, binop_right_to_zero, p2i_to_i2p, i2p_to_p2i, anyext_trunc_fold, - fneg_fneg_fold, right_identity_one]>; + fneg_fneg_fold, right_identity_one, + right_identity_fp_one]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, overlapping_and, mulo_by_2, mulo_by_0, @@ -940,6 +959,8 @@ combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma, combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>; +def fp_combines : GICombineGroup<[fma_combines, fmul_fneg_fneg]>; + def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, phi_combines, @@ -956,7 +977,7 @@ truncstore_merge, div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, - and_or_disjoint_mask, fma_combines, fold_binop_into_select]>; + and_or_disjoint_mask, fp_combines, fold_binop_into_select]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2430,6 +2430,13 @@ MaybeCst->getSExtValue() == C; } +bool CombinerHelper::matchFConstantOp(const MachineOperand &MOP, double C) { + if (!MOP.isReg()) + return false; + const ConstantFP *FCst = getConstantFPVRegVal(MOP.getReg(), MRI); + return FCst && FCst->getValueAPF().isExactlyValue(C); +} + bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); @@ -5633,6 +5640,33 @@ return MatchNaN(1) || MatchNaN(2); } +bool CombinerHelper::matchFMulWithNegatedInputs( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + Register Dst = MI.getOperand(0).getReg(); + Register Src0; + Register Src1; + + if (mi_match(Dst, MRI, m_GFMul(m_GFNeg(m_Reg(Src0)), m_GFNeg(m_Reg(Src1))))) { + MatchInfo = std::make_pair(Src0, Src1); + return true; + } + return false; +} + +bool CombinerHelper::applyFMulWithNegatedInputs( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MatchInfo.first; + Register Src1 = MatchInfo.second; + + Builder.setInstrAndDebugLoc(MI); + Builder.buildFMul({Dst}, Src0, Src1, MI.getFlags()); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fmul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fmul.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fmul.mir @@ -0,0 +1,91 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +--- +name: fmul_by_one +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: fmul_by_one + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_FCONSTANT double 1.0 + %2:_(s64) = G_FMUL %0, %1(s64) + $x0 = COPY %2(s64) +... +--- +name: mul_vector_by_one +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + ; Currently not implemented. + ; CHECK-LABEL: name: mul_vector_by_one + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[BUILD_VECTOR]] + ; CHECK-NEXT: $q0 = COPY [[FMUL]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_FCONSTANT float 1.0 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_FMUL %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) +... +--- +name: fmul_fneg_fneg +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: fmul_fneg_fneg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY]] + ; CHECK-NEXT: $x0 = COPY [[FMUL]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_FNEG %0(s64) + %2:_(s64) = G_FMUL %1(s64), %1(s64) + $x0 = COPY %2(s64) +... +--- +name: fmul_vector_fneg_fneg +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + ; Currently not implemented. + ; CHECK-LABEL: name: fmul_vector_fneg_fneg + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[COPY]] + ; CHECK-NEXT: $q0 = COPY [[FMUL]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = G_FNEG %0(<4 x s32>) + %2:_(<4 x s32>) = G_FMUL %1(<4 x s32>), %1(<4 x s32>) + $q0 = COPY %2(<4 x s32>) +...