Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3003,6 +3003,23 @@ BasicBlock *BB = I->getParent(); Instruction *UserInst = cast(*I->user_begin()); BasicBlock *UserParent; + bool SinkCandidate = true; + + // See if there is a call to an inline function in the BB after the value + // definition. If so, the CFG will be more complex when the inlining is + // done. In this case, don't sink the instruction. + for (BasicBlock::iterator Scan = I->getIterator(), E = BB->end(); + Scan != E; ++Scan) { + CallSite CS(&*Scan); + if (!CS) + continue; + + if (CS.hasFnAttr(Attribute::InlineHint) || + CS.hasFnAttr(Attribute::AlwaysInline)) { + SinkCandidate = false; + break; + } + } // Get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) @@ -3010,7 +3027,11 @@ else UserParent = UserInst->getParent(); - if (UserParent != BB) { + // No need to sink this instruction. + if (UserParent == BB) + SinkCandidate = false; + + if (SinkCandidate) { bool UserIsSuccessor = false; // See if the user is one of our successors. for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) @@ -3023,17 +3044,39 @@ // only has us as a predecessors (we'd have to split the critical edge // otherwise), we can keep going. if (UserIsSuccessor && UserParent->getUniquePredecessor()) { - // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { - DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); - MadeIRChange = true; - // We'll add uses of the sunk instruction below, but since sinking - // can expose opportunities for it's *operands* add them to the - // worklist - for (Use &U : I->operands()) - if (Instruction *OpI = dyn_cast(U.get())) - Worklist.Add(OpI); + // Again, check if there a function call which will complicate the + // CFG. + for (BasicBlock::iterator Scan = UserParent->begin(), + E = UserParent->end(); + Scan != E; ++Scan) { + if (Scan == UserInst->getIterator()) + break; + + CallSite CS(&*Scan); + if (!CS) + continue; + + if (CS.hasFnAttr(Attribute::InlineHint) || + CS.hasFnAttr(Attribute::AlwaysInline)) { + SinkCandidate = false; + break; + } } + } else + SinkCandidate = false; + } + + if (SinkCandidate) { + // Okay, the CFG is simple enough, try to sink this instruction. + if (TryToSinkInstruction(I, UserParent)) { + DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); + MadeIRChange = true; + // We'll add uses of the sunk instruction below, but since sinking + // can expose opportunities for it's *operands* add them to the + // worklist + for (Use &U : I->operands()) + if (Instruction *OpI = dyn_cast(U.get())) + Worklist.Add(OpI); } } } Index: test/Transforms/InstCombine/sink_inline.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/sink_inline.ll @@ -0,0 +1,76 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +;; This tests that instructions are not sunk into each user block when +;; there are inline function call interfering. + +declare float @other(float*, i32) readonly + +define float @inline_func(float* %in, i32 %radius) readonly alwaysinline { +entry: + %cmp12 = icmp sgt i32 %radius, 0 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext i32 %radius to i64 + br label %for.body + +for.cond.cleanup: + %max_val.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %max_val.0., %for.body ] + ret float %max_val.0.lcssa + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %max_val.013 = phi float [ 0.000000e+00, %for.body.preheader ], [ %max_val.0., %for.body ] + %arrayidx = getelementptr inbounds float, float* %in, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %cmp1 = fcmp ogt float %max_val.013, %0 + %max_val.0. = select i1 %cmp1, float %max_val.013, float %0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +define void @test1(float* nocapture %out, float* %in, i32 %w, i32 %n) { +entry: + %idxprom = sext i32 %w to i64 + %arrayidx = getelementptr inbounds float, float* %in, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @inline_func(float* %in, i32 %n) + %cmp = fcmp oeq float %call, -1.000000e+01 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry +; CHECK: if.then +; CHECK-NOT: load float +; CHECK: fcmp oge float + %cmp1 = fcmp oge float %0, %call + %conv = uitofp i1 %cmp1 to float + store float %conv, float* %out, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + + +define void @test2(float* nocapture %out, float* %in, i32 %w, i32 %n) { +entry: + %idxprom = sext i32 %w to i64 + %arrayidx = getelementptr inbounds float, float* %in, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %call = tail call float @other(float* %in, i32 %n) #4 + %cmp = fcmp oeq float %call, -1.000000e+01 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry +; CHECK: if.then +; CHECK: load float +; CHECK: fcmp oge float + %cmp1 = fcmp oge float %0, %call + %conv = uitofp i1 %cmp1 to float + store float %conv, float* %out, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +}