Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -249,6 +249,7 @@ // combining and will be updated to reflect any changes. LoopInfo *LI; + TargetTransformInfo &TTI; bool MadeIRChange = false; public: @@ -256,10 +257,10 @@ bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, const DataLayout &DL, - LoopInfo *LI) + LoopInfo *LI, TargetTransformInfo &TTI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI), TTI(TTI) {} /// Run the combiner over the entire worklist until it is empty. /// Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -57,6 +57,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -3003,6 +3004,20 @@ BasicBlock *BB = I->getParent(); Instruction *UserInst = cast(*I->user_begin()); BasicBlock *UserParent; + bool SinkCandidate = true; + + // See if there is a function call in the BB after the value + // definition. In this case, don't sink the instruction. + for (BasicBlock::iterator Scan = I->getIterator(), E = BB->end(); + Scan != E; ++Scan) { + if (CallInst *CI = dyn_cast(Scan)) { + Function *F = CI->getCalledFunction (); + if (F && TTI.isLoweredToCall(F)) { + SinkCandidate = false; + break; + } + } + } // Get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) @@ -3010,7 +3025,7 @@ else UserParent = UserInst->getParent(); - if (UserParent != BB) { + if (UserParent != BB && SinkCandidate) { bool UserIsSuccessor = false; // See if the user is one of our successors. for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) @@ -3246,8 +3261,8 @@ static bool combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, - OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true, - LoopInfo *LI = nullptr) { + OptimizationRemarkEmitter &ORE, TargetTransformInfo &TTI, + bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); ExpensiveCombines |= EnableExpensiveCombines; @@ -3277,7 +3292,7 @@ MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, - AC, TLI, DT, ORE, DL, LI); + AC, TLI, DT, ORE, DL, LI, TTI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) @@ -3293,11 +3308,11 @@ auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); auto &ORE = AM.getResult(F); - auto *LI = AM.getCachedResult(F); - auto *AA = &AM.getResult(F); - if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, + auto &TTI = AM.getResult(F); + + if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, TTI, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3316,6 +3331,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -3334,12 +3350,13 @@ auto &TLI = getAnalysis().getTLI(); auto &DT = getAnalysis().getDomTree(); auto &ORE = getAnalysis().getORE(); + auto &TTI = getAnalysis().getTTI(F); // Optional analyses. auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, TTI, ExpensiveCombines, LI); } @@ -3349,6 +3366,7 @@ "Combine redundant instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) Index: test/Transforms/InstCombine/sink_across_call.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/sink_across_call.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +;; This tests that instructions are not sunk into user block when +;; there is a function call interfering. + +declare float @other(float*, i32) readonly + +define void @test(float* nocapture %out, float* %in, i32 %w, i32 %n) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[W:%.*]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[IN:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = tail call float @other(float* [[IN]], i32 [[N:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[CALL]], -1.000000e+01 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[CMP1:%.*]] = fcmp oge float [[TMP0]], [[CALL]] +; CHECK-NEXT: [[CONV:%.*]] = uitofp i1 [[CMP1]] to float +; CHECK-NEXT: store float [[CONV]], float* [[OUT:%.*]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + %idxprom = sext i32 %w to i64 + %arrayidx = getelementptr inbounds float, float* %in, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @other(float* %in, i32 %n) + %cmp = fcmp oeq float %call, -1.000000e+01 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %cmp1 = fcmp oge float %0, %call + %conv = uitofp i1 %cmp1 to float + store float %conv, float* %out, align 4 + br label %if.end + +if.end: + ret void +}