diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Register.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Support/LowLevelTypeImpl.h" #include @@ -736,6 +737,21 @@ bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info); + /// Transform G_FMUL(x, G_SELECT(G_FCMP(x > 0.0), -1.0, 1.0)) or + /// G_FMUL(G_SELECT(G_FCMP(x > 0.0), -1.0, 1.0), x) or + /// G_FMUL(x, G_SELECT(G_FCMP(x <= 0.0), 1.0, -1.0)) or + /// G_FMUL(G_SELECT(G_FCMP(x <= 0.0), 1.0, -1.0), x) to + /// G_FNEG(G_FABS(x)). + /// Transform G_FMUL(x, G_SELECT(G_FCMP(x > 0.0), 1.0, -1.0)) or + /// G_FMUL(G_SELECT(G_FCMP(x > 0.0), 1.0, -1.0), x) or + /// G_FMUL(x, G_SELECT(G_FCMP(x <= 0.0), -1.0, 1.0)) or + /// G_FMUL(G_SELECT(G_FCMP(x <= 0.0), -1.0, 1.0), x) to + /// G_FABS(x). + bool matchForceFPNegOrPos(MachineInstr &MI, + std::pair &MatchInfo); + bool applyForceFPNegOrPos(MachineInstr &MI, + std::pair &MatchInfo); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -898,6 +898,21 @@ [{ return Helper.matchCombineFMinMaxNaN(*${root}, ${info}); }]), (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${info}); }])>; +// Transform (fmul x, (select (fcmp x > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) +// (fmul (select (fcmp x > 0.0), -1.0, 1.0), x) -> (fneg (fabs X)) +// (fmul x, (select (fcmp x <= 0.0), 1.0, -1.0)) -> (fneg (fabs X)) +// (fmul (select (fcmp x <= 0.0), 1.0, -1.0), x) -> (fneg (fabs X)) +// Transform (fmul x, (select (fcmp x > 0.0), 1.0, -1.0)) -> (fabs X) +// (fmul (select (fcmp x > 0.0), 1.0, -1.0), x) -> (fabs X) +// (fmul x, (select (fcmp x <= 0.0), -1.0, 1.0)) -> (fabs X) +// (fmul (select (fcmp x <= 0.0), -1.0, 1.0), x) -> (fabs X) +def fp_neg_or_pos_matchinfo : GIDefMatchData<"std::pair">; +def fp_neg_or_pos: GICombineRule< + (defs root:$root, fp_neg_or_pos_matchinfo:$matchinfo), + (match (wip_match_opcode G_FMUL):$root, + [{ return Helper.matchForceFPNegOrPos(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyForceFPNegOrPos(*${root}, ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -940,6 +955,8 @@ combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma, combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>; +def fp_combines : GICombineGroup<[fma_combines, fp_neg_or_pos]>; + def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, phi_combines, @@ -956,7 +973,7 @@ truncstore_merge, div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, - and_or_disjoint_mask, fma_combines, fold_binop_into_select]>; + and_or_disjoint_mask, fp_combines, fold_binop_into_select]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5633,6 +5633,84 @@ return MatchNaN(1) || MatchNaN(2); } +bool CombinerHelper::matchForceFPNegOrPos( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + // `G_FMUL`s which allow NaNs or consider signed-zero aren't candidates for + // this optimization. + if (!MI.getFlag(MachineInstr::MIFlag::FmNoNans) || + !MI.getFlag(MachineInstr::MIFlag::FmNsz)) + return false; + + Register Src; + CmpInst::Predicate Pred; + const ConstantFP *SelecteeLHS{}; + const ConstantFP *SelecteeRHS{}; + auto MatchFPNegOrPos = [&](Register SelectSrc, Register SameSrc) { + Register CompareeLHS; + const ConstantFP *CompareeRHS{}; + if (!mi_match(SelectSrc, MRI, + m_GISelect(m_GFCmp(m_Pred(Pred), m_Reg(CompareeLHS), + m_GFCst(CompareeRHS)), + m_GFCst(SelecteeLHS), m_GFCst(SelecteeRHS)))) + return false; + if (CompareeLHS != SameSrc || + !CompareeRHS->getValueAPF().isExactlyValue(0.0)) + return false; + Src = SameSrc; + return true; + }; + + Register SrcLHS = MI.getOperand(1).getReg(); + Register SrcRHS = MI.getOperand(2).getReg(); + if (!MatchFPNegOrPos(SrcLHS, SrcRHS) && !MatchFPNegOrPos(SrcRHS, SrcLHS)) + return false; + + APFloat TrueValue = SelecteeLHS->getValueAPF(); + APFloat FalseValue = SelecteeRHS->getValueAPF(); + switch (Pred) { + default: + return false; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: + std::swap(TrueValue, FalseValue); + break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + break; + } + + if (TrueValue.isExactlyValue(-1.0) && FalseValue.isExactlyValue(1.0)) { + MatchInfo = std::make_pair(Src, /*ForceNegate*/ true); + return true; + } + if (TrueValue.isExactlyValue(1.0) && FalseValue.isExactlyValue(-1.0)) { + MatchInfo = std::make_pair(Src, /*ForceNegate*/ false); + return true; + } + + return false; +} + +bool CombinerHelper::applyForceFPNegOrPos( + MachineInstr &MI, std::pair &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL && "Expected a G_FMUL"); + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + Register Src = MatchInfo.first; + bool ForceNegate = MatchInfo.second; + + Builder.setInstrAndDebugLoc(MI); + if (ForceNegate) { + auto FAbsMIB = Builder.buildFAbs({DstTy}, {Src}); + Builder.buildFNeg({Dst}, {FAbsMIB}, MI.getFlags()); + } else { + Builder.buildFAbs({Dst}, {Src}); + } + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fmul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fmul.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fmul.mir @@ -0,0 +1,151 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +--- +name: make_fp_negative_rhs_select_gt +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_negative_rhs_select_gt + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan nsz G_FNEG [[FABS]] + ; CHECK-NEXT: $w0 = COPY [[FNEG]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ogt), %0, %1 + %3:_(s32) = G_FCONSTANT float -1.0 + %4:_(s32) = G_FCONSTANT float 1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %0, %5 + $w0 = COPY %6(s32) +... +--- +name: make_fp_positive_rhs_select_gt +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_positive_rhs_select_gt + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[FABS]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ogt), %0, %1 + %3:_(s32) = G_FCONSTANT float 1.0 + %4:_(s32) = G_FCONSTANT float -1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %0, %5 + $w0 = COPY %6(s32) +... +--- +name: make_fp_negative_rhs_select_le +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_negative_rhs_select_le + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan nsz G_FNEG [[FABS]] + ; CHECK-NEXT: $w0 = COPY [[FNEG]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ole), %0, %1 + %3:_(s32) = G_FCONSTANT float 1.0 + %4:_(s32) = G_FCONSTANT float -1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %0, %5 + $w0 = COPY %6(s32) +... +--- +name: make_fp_positive_rhs_select_le +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_positive_rhs_select_le + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[FABS]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ole), %0, %1 + %3:_(s32) = G_FCONSTANT float -1.0 + %4:_(s32) = G_FCONSTANT float 1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %0, %5 + $w0 = COPY %6(s32) +... +--- +name: make_fp_negative_lhs_select_gt +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_negative_lhs_select_gt + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan nsz G_FNEG [[FABS]] + ; CHECK-NEXT: $w0 = COPY [[FNEG]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ogt), %0, %1 + %3:_(s32) = G_FCONSTANT float -1.0 + %4:_(s32) = G_FCONSTANT float 1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %5, %0 + $w0 = COPY %6(s32) +... +--- +name: make_fp_positive_lhs_select_gt +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_positive_lhs_select_gt + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[FABS]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ogt), %0, %1 + %3:_(s32) = G_FCONSTANT float 1.0 + %4:_(s32) = G_FCONSTANT float -1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %5, %0 + $w0 = COPY %6(s32) +... +--- +name: make_fp_negative_lhs_select_le +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_negative_lhs_select_le + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan nsz G_FNEG [[FABS]] + ; CHECK-NEXT: $w0 = COPY [[FNEG]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ole), %0, %1 + %3:_(s32) = G_FCONSTANT float 1.0 + %4:_(s32) = G_FCONSTANT float -1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %5, %0 + $w0 = COPY %6(s32) +... +--- +name: make_fp_positive_lhs_select_le +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: make_fp_positive_lhs_select_le + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[COPY]] + ; CHECK-NEXT: $w0 = COPY [[FABS]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_FCONSTANT float 0.0 + %2:_(s1) = G_FCMP floatpred(ole), %0, %1 + %3:_(s32) = G_FCONSTANT float -1.0 + %4:_(s32) = G_FCONSTANT float 1.0 + %5:_(s32) = G_SELECT %2(s1), %3, %4 + %6:_(s32) = nnan nsz G_FMUL %5, %0 + $w0 = COPY %6(s32) +...