Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -20,6 +20,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/Register.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Support/Alignment.h" namespace llvm { @@ -318,9 +319,45 @@ bool applyCombineTruncOfShl(MachineInstr &MI, std::pair &MatchInfo); + /// Transform G_FMUL(G_FNEG(x), G_FNEG(y)) to G_FMUL(x, y). + bool matchFMulWithNegatedInputs(MachineInstr &MI, + std::pair &MatchInfo); + bool applyFMulWithNegatedInputs(MachineInstr &MI, + std::pair &MatchInfo); + + /// Transform G_FMUL(G_FMUL(x, c0) c1) to G_FMUL(x, c0 * c1). + /// Unsafe floating point math only. + bool matchFMulFMulWithConstantOps( + MachineInstr &MI, std::pair> &MatchInfo); + bool applyFMulFMulWithConstantOps( + MachineInstr &MI, std::pair> &MatchInfo); + + /// Transform G_FMUL(G_FADD(x, x) c) to G_FMUL(x, c * 2.0). + /// Unsafe floating point math only. + bool + matchFMulFAddWithSameReg(MachineInstr &MI, + std::pair> &MatchInfo); + bool + applyFMulFAddWithSameReg(MachineInstr &MI, + std::pair> &MatchInfo); + + /// Transform G_FMUL(x, G_SELECT(G_FCMP(x > 0.0), -1.0, 1.0)) to + /// G_FNEG(G_FABS(x)). + /// Transform G_FMUL(x, G_SELECT(G_FCMP(x > 0.0), 1.0, -1.0)) to + /// G_FABS(x). + bool matchForceFPNegOrPos(MachineInstr &MI, + std::tuple, + Optional> &MatchInfo); + bool applyForceFPNegOrPos(MachineInstr &MI, + std::tuple, + Optional> &MatchInfo); + /// Transform G_MUL(x, -1) to G_SUB(0, x) bool applyCombineMulByNegativeOne(MachineInstr &MI); + /// Transform G_FMUL(x, -1) to G_FNEG(x) + bool applyCombineFMulByNegativeOne(MachineInstr &MI); + /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); @@ -365,6 +402,10 @@ /// \p C. bool matchConstantOp(const MachineOperand &MOP, int64_t C); + /// Return true if \p MOP is defined by a G_FCONSTANT with a value equal to + /// \p C. + bool matchFConstantOp(const MachineOperand &MOP, double C); + /// Optimize (cond ? x : x) -> x bool matchSelectSameVal(MachineInstr &MI); @@ -374,6 +415,9 @@ /// Check if operand \p OpIdx is zero. bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx); + /// Check if operand \p OpIdx is floating point zero. + bool matchOperandIsFPZero(MachineInstr &MI, unsigned OpIdx); + /// Check if operand \p OpIdx is undef. bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -263,6 +263,14 @@ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; +// Fold x op 1.0 -> x +def right_identity_fp_one: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchFConstantOp(${root}->getOperand(2), 1.0); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) +>; + // Fold (x op x) - > x def binop_same_val: GICombineRule< (defs root:$root), @@ -287,6 +295,14 @@ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }]) >; +// Fold (x op 0) - > 0 +def binop_right_to_fp_zero: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchOperandIsFPZero(*${root}, 2); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }]) +>; + // Erase stores of undef values. def erase_undef_store : GICombineRule< (defs root:$root), @@ -480,6 +496,54 @@ (apply [{ return Helper.applyCombineMulByNegativeOne(*${root}); }]) >; +// Transform (fmul x, -1.0) -> (fneg x) +def fmul_by_neg_one: GICombineRule < + (defs root:$root), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchFConstantOp(${root}->getOperand(2), -1.0); }]), + (apply [{ return Helper.applyCombineFMulByNegativeOne(*${root}); }]) +>; + +// Transform (fmul (fneg x), (fneg y)) -> (fmul x, y) +def fmul_fneg_fneg_matchinfo : GIDefMatchData<"std::pair">; +def fmul_fneg_fneg: GICombineRule < + (defs root:$root, fmul_fneg_fneg_matchinfo:$matchinfo), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchFMulWithNegatedInputs(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyFMulWithNegatedInputs(*${root}, ${matchinfo}); }]) +>; + +// Transform (fmul (fmul x, c0), c1) -> (fmul x, c0 * c1) +// Unsafe fp math only +def fmul_fmul_cst_matchinfo : GIDefMatchData<"std::pair>">; +def fmul_fmul_cst: GICombineRule< + (defs root:$root, fmul_fmul_cst_matchinfo:$matchinfo), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchFMulFMulWithConstantOps(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyFMulFMulWithConstantOps(*${root}, ${matchinfo}); }]) +>; + +// Transform (fmul (fadd x, x), c) -> (fmul x, c * 2.0) +// Unsafe fp math only +def fmul_fadd_cst_matchinfo : GIDefMatchData<"std::pair>">; +def fmul_fadd_cst: GICombineRule< + (defs root:$root, fmul_fadd_cst_matchinfo:$matchinfo), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchFMulFAddWithSameReg(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyFMulFAddWithSameReg(*${root}, ${matchinfo}); }]) +>; + + +// Transform (fmul x, (select (fcmp x > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) +// Transform (fmul x, (select (fcmp x > 0.0), 1.0, -1.0)) -> (fabs X) +def fp_neg_or_pos_matchinfo : GIDefMatchData<"std::tuple, Optional>">; +def fp_neg_or_pos: GICombineRule< + (defs root:$root, fp_neg_or_pos_matchinfo:$matchinfo), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchForceFPNegOrPos(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyForceFPNegOrPos(*${root}, ${matchinfo}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -493,7 +557,8 @@ binop_same_val, binop_left_to_zero, binop_right_to_zero, p2i_to_i2p, i2p_to_p2i, anyext_trunc_fold, - fneg_fneg_fold, right_identity_one]>; + fneg_fneg_fold, right_identity_one, + right_identity_fp_one, binop_right_to_fp_zero]>; def known_bits_simplifications : GICombineGroup<[ and_trivial_mask, redundant_sext_inreg]>; @@ -503,7 +568,9 @@ def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, - mul_by_neg_one]>; + mul_by_neg_one, fmul_by_neg_one]>; + +def unsafe_fp_math_combines: GICombineGroup<[fmul_fmul_cst, fmul_fadd_cst]>; def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store, undef_combines, @@ -515,4 +582,5 @@ not_cmp_fold, opt_brcond_by_inverting_cond, unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, - constant_fp_op]>; + constant_fp_op, fmul_fneg_fneg, unsafe_fp_math_combines, + fp_neg_or_pos]>; Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2071,6 +2071,202 @@ return false; } +bool CombinerHelper::matchFMulWithNegatedInputs( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + Register Dst = MI.getOperand(0).getReg(); + Register Src0; + Register Src1; + + if (mi_match(Dst, MRI, m_GFMul(m_GFNeg(m_Reg(Src0)), m_GFNeg(m_Reg(Src1))))) { + MatchInfo = std::make_pair(Src0, Src1); + return true; + } + return false; +} + +bool CombinerHelper::applyFMulWithNegatedInputs( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MatchInfo.first; + Register Src1 = MatchInfo.second; + + Builder.setInstrAndDebugLoc(MI); + Builder.buildFMul({Dst}, Src0, Src1, MI.getFlags()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchFMulFMulWithConstantOps( + MachineInstr &MI, std::pair> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + auto Options = MI.getMF()->getTarget().Options; + if (!(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc))) + return false; + + Register Dst = MI.getOperand(0).getReg(); + Register Src; + const ConstantFP *FP0{}; + const ConstantFP *FP1{}; + if (mi_match(Dst, MRI, + m_GFMul(m_GFMul(m_Reg(Src), m_GFCst(FP0)), m_GFCst(FP1)))) { + const fltSemantics &FPSemantic = getFltSemanticForLLT(MRI.getType(Dst)); + APFloat RHS = FP0->getValueAPF(); + APFloat LHS = FP1->getValueAPF(); + bool Unused; + RHS.convert(FPSemantic, APFloat::rmNearestTiesToEven, &Unused); + LHS.convert(FPSemantic, APFloat::rmNearestTiesToEven, &Unused); + MatchInfo = std::make_pair(Src, RHS * LHS); + return true; + } + return false; +} + +bool CombinerHelper::applyFMulFMulWithConstantOps( + MachineInstr &MI, std::pair> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && + MatchInfo.second.hasValue() && + "Expected a G_FMUL and an non-empty optional"); + Register Src = MatchInfo.first; + APFloat NewFP = MatchInfo.second.getValue(); + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + + Builder.setInstrAndDebugLoc(MI); + auto MIBFPNewC = Builder.buildFConstant( + DstTy, *ConstantFP::get(MI.getMF()->getFunction().getContext(), NewFP)); + Builder.buildFMul({Dst}, Src, MIBFPNewC, MI.getFlags()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchFMulFAddWithSameReg( + MachineInstr &MI, std::pair> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + auto Options = MI.getMF()->getTarget().Options; + if (!(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc))) + return false; + + Register Dst = MI.getOperand(0).getReg(); + Register Src0; + Register Src1; + const ConstantFP *FP{}; + if (mi_match(Dst, MRI, + m_GFMul(m_GFAdd(m_Reg(Src0), m_Reg(Src1)), m_GFCst(FP))) && + Src0 == Src1) { + APFloat OldFP = FP->getValueAPF(); + APFloat NewFP = OldFP * APFloat(OldFP.getSemantics(), "2.0"); + MatchInfo = std::make_pair(Src0, NewFP); + return true; + } + return false; +} + +bool CombinerHelper::applyFMulFAddWithSameReg( + MachineInstr &MI, std::pair> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && + MatchInfo.second.hasValue() && "Expected a G_FMUL"); + Register Src = MatchInfo.first; + APFloat NewFP = MatchInfo.second.getValue(); + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + + Builder.setInstrAndDebugLoc(MI); + auto MIBFPNewC = Builder.buildFConstant( + DstTy, *ConstantFP::get(MI.getMF()->getFunction().getContext(), NewFP)); + Builder.buildFMul({Dst}, Src, MIBFPNewC, MI.getFlags()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchForceFPNegOrPos( + MachineInstr &MI, + std::tuple, Optional> + &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + // `G_FMUL`s which allow NaNs or consider signed-zero aren't candidates for + // this optimization. + if (!MI.getFlag(MachineInstr::MIFlag::FmNoNans) || + !MI.getFlag(MachineInstr::MIFlag::FmNsz)) + return false; + + Register Dst = MI.getOperand(0).getReg(); + Register Src0; + Register Src1; + if (mi_match(Dst, MRI, m_GFMul(m_Reg(Src0), m_Reg(Src1)))) { + MachineInstr *SrcMI = MRI.getVRegDef(Src1); + if (SrcMI->getOpcode() != TargetOpcode::G_SELECT) + return false; + + // Verify the conditions that `x` is being compared to `0.0` and that `x` is + // the same value as the input to `G_FMUL`. + MachineInstr *CmpMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg()); + if (CmpMI->getOpcode() != TargetOpcode::G_FCMP || + !matchFConstantOp(CmpMI->getOperand(3), 0.0) || + Src0 != CmpMI->getOperand(2).getReg()) + return false; + + CmpInst::Predicate Pred = + static_cast(CmpMI->getOperand(1).getPredicate()); + auto *TrueOp = getConstantFPVRegVal(SrcMI->getOperand(2).getReg(), MRI); + auto *FalseOp = getConstantFPVRegVal(SrcMI->getOperand(3).getReg(), MRI); + if (!TrueOp || !FalseOp) + return false; + + MatchInfo = + std::make_tuple(Pred, TrueOp->getValueAPF(), FalseOp->getValueAPF()); + return true; + } + return false; +} + +bool CombinerHelper::applyForceFPNegOrPos( + MachineInstr &MI, + std::tuple, Optional> + &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && + std::get<1>(MatchInfo).hasValue() && + std::get<2>(MatchInfo).hasValue() && + "Expected a G_FMUL and optionals with values"); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + auto Pred = std::get<0>(MatchInfo); + auto TrueFP = std::get<1>(MatchInfo).getValue(); + auto FalseFP = std::get<2>(MatchInfo).getValue(); + + switch (Pred) { + default: + break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: + std::swap(TrueFP, FalseFP); + LLVM_FALLTHROUGH; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: { + Builder.setInstrAndDebugLoc(MI); + if (TrueFP.isExactlyValue(-1.0) && FalseFP.isExactlyValue(1.0)) { + auto FAbsMIB = Builder.buildFAbs({DstTy}, {Src}); + Builder.buildFNeg({Dst}, {FAbsMIB}, MI.getFlags()); + MI.eraseFromParent(); + return true; + } + if (TrueFP.isExactlyValue(1.0) && FalseFP.isExactlyValue(-1.0)) { + Builder.buildFAbs({Dst}, {Src}); + MI.eraseFromParent(); + return true; + } + break; + } + } + return false; +} + bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); Register DstReg = MI.getOperand(0).getReg(); @@ -2084,6 +2280,17 @@ return true; } +bool CombinerHelper::applyCombineFMulByNegativeOne(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + + Builder.setInstrAndDebugLoc(MI); + Builder.buildFNeg(DstReg, SrcReg, MI.getFlags()); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) { assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG"); Register SrcReg = MI.getOperand(1).getReg(); @@ -2310,6 +2517,13 @@ return ValAndVReg && ValAndVReg->Value == C; } +bool CombinerHelper::matchFConstantOp(const MachineOperand &MOP, double C) { + if (!MOP.isReg()) + return false; + const ConstantFP *FCst = getConstantFPVRegVal(MOP.getReg(), MRI); + return FCst && FCst->getValueAPF().isExactlyValue(C); +} + bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); @@ -2351,6 +2565,12 @@ MRI); } +bool CombinerHelper::matchOperandIsFPZero(MachineInstr &MI, unsigned OpIdx) { + return matchFConstantOp(MI.getOperand(OpIdx), 0.0) && + canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), + MRI); +} + bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); return MO.isReg() && Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-fmul.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/combine-fmul.mir @@ -0,0 +1,211 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +--- +name: fmul_by_zero +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: fmul_by_zero + ; CHECK: liveins: $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK: $x0 = COPY [[C]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_FCONSTANT double 0.0 + %2:_(s64) = G_FMUL %0, %1(s64) + $x0 = COPY %2(s64) +... +--- +name: fmul_vector_by_zero +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + ; Currently not implemented. + ; CHECK-LABEL: name: fmul_vector_by_zero + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[BUILD_VECTOR]] + ; CHECK: $q0 = COPY [[FMUL]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_FMUL %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) +... +--- +name: fmul_by_one +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: fmul_by_one + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: $x0 = COPY [[COPY]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_FCONSTANT double 1.0 + %2:_(s64) = G_FMUL %0, %1(s64) + $x0 = COPY %2(s64) +... +--- +name: mul_vector_by_one +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + ; Currently not implemented. + ; CHECK-LABEL: name: mul_vector_by_one + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[BUILD_VECTOR]] + ; CHECK: $q0 = COPY [[FMUL]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_FCONSTANT float 1.0 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_FMUL %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) +... +--- +name: fmul_by_neg_one +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: fmul_by_neg_one + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] + ; CHECK: $x0 = COPY [[FNEG]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_FCONSTANT double -1.0 + %2:_(s64) = G_FMUL %0, %1(s64) + $x0 = COPY %2(s64) +... +--- +name: fmul_vector_by_neg_one +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + ; Currently not implemented. + ; CHECK-LABEL: name: fmul_vector_by_neg_one + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[BUILD_VECTOR]] + ; CHECK: $q0 = COPY [[FMUL]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_FCONSTANT float -1.0 + %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32) + %3:_(<4 x s32>) = G_FMUL %0, %2(<4 x s32>) + $q0 = COPY %3(<4 x s32>) +... +--- +name: fmul_fneg_fneg +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: fmul_fneg_fneg + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY]] + ; CHECK: $x0 = COPY [[FMUL]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_FNEG %0(s64) + %2:_(s64) = G_FMUL %1(s64), %1(s64) + $x0 = COPY %2(s64) +... +--- +name: fmul_vector_fneg_fneg +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + ; Currently not implemented. + ; CHECK-LABEL: name: fmul_vector_fneg_fneg + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[COPY]], [[COPY]] + ; CHECK: $q0 = COPY [[FMUL]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = G_FNEG %0(<4 x s32>) + %2:_(<4 x s32>) = G_FMUL %1(<4 x s32>), %1(<4 x s32>) + $q0 = COPY %2(<4 x s32>) +... +--- +name: make_fp_negative +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_negative + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK: [[FNEG:%[0-9]+]]:_(s32) = nnan nsz G_FNEG [[FABS]] + ; CHECK: $w0 = COPY [[FNEG]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ogt), %0, %1 + %3:_(s32) = G_FCONSTANT float -1.0 + %4:_(s32) = G_FCONSTANT float 1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %0, %5 + $w0 = COPY %6(s32) +... +--- +name: make_fp_positive +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_positive + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK: $w0 = COPY [[FABS]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ogt), %0, %1 + %3:_(s32) = G_FCONSTANT float 1.0 + %4:_(s32) = G_FCONSTANT float -1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %0, %5 + $w0 = COPY %6(s32) +... Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-unsafe-fp-math.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/combine-unsafe-fp-math.mir @@ -0,0 +1,62 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=SAFE +# RUN: llc -enable-unsafe-fp-math -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=UNSAFE + +--- +name: fmul_fmul_cst +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; SAFE-LABEL: name: fmul_fmul_cst + ; SAFE: liveins: $x0 + ; SAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; SAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 + ; SAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] + ; SAFE: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FMUL]], [[C]] + ; SAFE: $x0 = COPY [[FMUL1]](s64) + ; UNSAFE-LABEL: name: fmul_fmul_cst + ; UNSAFE: liveins: $x0 + ; UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 + ; UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] + ; UNSAFE: $x0 = COPY [[FMUL]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_FCONSTANT double 2.0 + %2:_(s64) = G_FMUL %0, %1(s64) + %3:_(s64) = G_FMUL %2, %1(s64) + $x0 = COPY %3(s64) +... +--- +name: fmul_fadd_same_reg +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; SAFE-LABEL: name: fmul_fadd_same_reg + ; SAFE: liveins: $x0 + ; SAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; SAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00 + ; SAFE: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[COPY]] + ; SAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[FADD]], [[C]] + ; SAFE: $x0 = COPY [[FMUL]](s64) + ; UNSAFE-LABEL: name: fmul_fadd_same_reg + ; UNSAFE: liveins: $x0 + ; UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00 + ; UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[C]] + ; UNSAFE: $x0 = COPY [[FMUL]](s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = G_FCONSTANT double 4.0 + %2:_(s64) = G_FADD %0, %0 + %3:_(s64) = G_FMUL %2, %1(s64) + $x0 = COPY %3(s64) +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll @@ -70,15 +70,12 @@ ; GFX9-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x80008000 -; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1 ; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mov_b32_e32 v1, 16 @@ -203,18 +200,13 @@ ; GFX9-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,1] neg_hi:[1,1] -; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,1] neg_hi:[1,1] +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x80008000 -; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 -; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1 -; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3 ; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3 @@ -338,21 +330,14 @@ ; GFX9-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,1] neg_hi:[1,1] -; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,1] neg_hi:[1,1] -; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,1] neg_hi:[1,1] +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 +; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x80008000 -; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3 -; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1 -; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 -; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4 -; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5 ; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4 @@ -498,24 +483,15 @@ ; GFX9-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,1] neg_hi:[1,1] -; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,1] neg_hi:[1,1] -; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,1] neg_hi:[1,1] -; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,1] neg_hi:[1,1] +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 +; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 +; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 +; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, 0x80008000 -; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX8-NEXT: v_xor_b32_e32 v4, s4, v4 -; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1 -; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 -; GFX8-NEXT: v_xor_b32_e32 v3, s4, v3 -; GFX8-NEXT: v_xor_b32_e32 v5, s4, v5 -; GFX8-NEXT: v_xor_b32_e32 v6, s4, v6 -; GFX8-NEXT: v_xor_b32_e32 v7, s4, v7 ; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4 ; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5