diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -909,6 +909,12 @@ bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; + /// isProfitableToHoist - Check if it is profitable to hoist instruction + /// \p I to its dominator block. + /// For example, it is not profitable if \p I and it's only user can form a + /// FMA instruction, because Powerpc prefers FMADD. + bool isProfitableToHoist(Instruction *I) const override; + const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; // Should we expand the build vector with shuffles? diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15318,6 +15318,33 @@ } } +// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist. +// FIXME: add more patterns which are profitable to hoist. +bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const { + if (I->getOpcode() != Instruction::FMul) + return true; + + if (!I->hasOneUse()) + return true; + + Instruction *User = I->user_back(); + assert(User && "A single use instruction with no uses."); + + if (User->getOpcode() != Instruction::FSub && + User->getOpcode() == Instruction::FAdd) + return true; + + const TargetOptions &Options = getTargetMachine().Options; + const Function *F = I->getFunction(); + const DataLayout &DL = F->getParent()->getDataLayout(); + Type *Ty = User->getOperand(0)->getType(); + + return !( + isFMAFasterThanFMulAndFAdd(*F, Ty) && + isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); +} + const MCPhysReg * PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call diff --git a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll --- a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll +++ b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-fma.ll @@ -11,14 +11,15 @@ ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00 ; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[X:%.*]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP1]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[MUL:%.*]] = fadd fast double 1.000000e+00, [[TMP3]] ; CHECK-NEXT: store double [[MUL]], double* [[Y]], align 8 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[SUB1:%.*]] = fsub fast double [[TMP3]], [[TMP0]] +; CHECK-NEXT: [[MUL1:%.*]] = fmul fast double [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[SUB1:%.*]] = fsub fast double [[MUL1]], [[TMP0]] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr double, double* [[Y]], i32 1 ; CHECK-NEXT: store double [[SUB1]], double* [[GEP1]], align 8 ; CHECK-NEXT: br label [[IF_END]]