diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16381,31 +16381,56 @@ } } -// Currently this is a copy from AArch64TargetLowering::isProfitableToHoist. -// FIXME: add more patterns which are profitable to hoist. +// FIXME: add more patterns which are not profitable to hoist. bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const { - if (I->getOpcode() != Instruction::FMul) - return true; - if (!I->hasOneUse()) return true; Instruction *User = I->user_back(); assert(User && "A single use instruction with no uses."); - if (User->getOpcode() != Instruction::FSub && - User->getOpcode() != Instruction::FAdd) - return true; + switch (I->getOpcode()) { + case Instruction::FMul: { + // Don't break FMA, PowerPC prefers FMA. + if (User->getOpcode() != Instruction::FSub && + User->getOpcode() != Instruction::FAdd) + return true; - const TargetOptions &Options = getTargetMachine().Options; - const Function *F = I->getFunction(); - const DataLayout &DL = F->getParent()->getDataLayout(); - Type *Ty = User->getOperand(0)->getType(); - - return !( - isFMAFasterThanFMulAndFAdd(*F, Ty) && - isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); + const TargetOptions &Options = getTargetMachine().Options; + const Function *F = I->getFunction(); + const DataLayout &DL = F->getParent()->getDataLayout(); + Type *Ty = User->getOperand(0)->getType(); + + return !( + isFMAFasterThanFMulAndFAdd(*F, Ty) && + isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); + } + case Instruction::Load: { + // Don't break "store (load float*)" pattern, this pattern will be combined + // to "store (load int32)" in later InstCombine pass. See function + // combineLoadToOperationType. On PowerPC, loading a float point takes more + // cycles than loading a 32 bit integer. + LoadInst *LI = cast(I); + // For the loads that combineLoadToOperationType does nothing, like + // ordered load, it should be profitable to hoist them. + // For swifterror load, it can only be used for pointer to pointer type, so + // later type check should get rid of this case. + if (!LI->isUnordered()) + return true; + + if (User->getOpcode() != Instruction::Store) + return true; + + if (I->getType()->getTypeID() != Type::FloatTyID) + return true; + + return false; + } + default: + return true; + } + return true; } const MCPhysReg * diff --git a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll --- a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll +++ b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll @@ -3,7 +3,7 @@ define float @foo(float* %src, float* %dest, i32 signext %count, i32 signext %cond) { ; CHECK-LABEL: @foo( ; CHECK-LABEL: entry: -; CHECK: %0 = load float, float* %arrayidx, align 4 +; CHECK-NOT: load float entry: %cmp = icmp sgt i32 %cond, 10 %idxprom = sext i32 %count to i64 @@ -11,14 +11,15 @@ br i1 %cmp, label %if.then, label %if.else ; CHECK-LABEL: if.then: -; CHECK-NOT: load float +; CHECK: %0 = load float, float* %arrayidx, align 4 if.then: ; preds = %entry %0 = load float, float* %arrayidx, align 4 %res = fmul float %0, 3.000000e+00 br label %if.end ; CHECK-LABEL: if.else: -; CHECK-NOT: load float +; CHECK: %1 = load float, float* %arrayidx, align 4 +; CHECK: store float %1, float* %arrayidx4, align 4 if.else: ; preds = %entry %1 = load float, float* %arrayidx, align 4 %idxprom3 = sext i32 %count to i64