Index: llvm/include/llvm/Transforms/Scalar/JumpThreading.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -44,6 +44,7 @@ class SelectInst; class SwitchInst; class TargetLibraryInfo; +class TargetTransformInfo; class Value; /// A private "module" namespace for types and utilities used by @@ -78,6 +79,7 @@ /// revectored to the false side of the second if. class JumpThreadingPass : public PassInfoMixin { TargetLibraryInfo *TLI; + TargetTransformInfo *TTI; LazyValueInfo *LVI; AAResults *AA; DomTreeUpdater *DTU; @@ -99,9 +101,9 @@ JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1); // Glue for old PM. - bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI, - AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData, - std::unique_ptr BFI, + bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU, + bool HasProfileData, std::unique_ptr BFI, std::unique_ptr BPI); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); Index: llvm/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -331,7 +331,7 @@ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(), + bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(), std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { dbgs() << "LVI for function '" << F.getName() << "':\n"; @@ -360,7 +360,7 @@ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(), + bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(), std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { @@ -377,12 +377,14 @@ } bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, - LazyValueInfo *LVI_, AliasAnalysis *AA_, - DomTreeUpdater *DTU_, bool HasProfileData_, + TargetTransformInfo *TTI_, LazyValueInfo *LVI_, + AliasAnalysis *AA_, DomTreeUpdater *DTU_, + bool HasProfileData_, std::unique_ptr BFI_, std::unique_ptr BPI_) { LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; + TTI = TTI_; LVI = LVI_; AA = AA_; DTU = DTU_; @@ -513,10 +515,11 @@ /// Return the cost of duplicating a piece of this block from first non-phi /// and before StopAt instruction to thread across it. Stop scanning the block -/// when exceeding the threshold. If duplication is impossible, returns ~0U. -static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, - Instruction *StopAt, - unsigned Threshold) { +/// when exceeding the threshold. If duplication is impossible, returns invalid +/// cost. +static InstructionCost +getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, BasicBlock *BB, + Instruction *StopAt, unsigned Threshold) { assert(StopAt->getParent() == BB && "Not an instruction from proper BB?"); /// Ignore PHI nodes, these will be flattened when duplication happens. BasicBlock::const_iterator I(BB->getFirstNonPHI()); @@ -543,50 +546,25 @@ // Sum up the cost of each instruction until we get to the terminator. Don't // include the terminator because the copy won't include it. - unsigned Size = 0; + InstructionCost Size = 0; for (; &*I != StopAt; ++I) { // Stop scanning the block if we've reached the threshold. if (Size > Threshold) return Size; - // Debugger intrinsics don't incur code size. - if (isa(I)) continue; - - // Pseudo-probes don't incur code size. - if (isa(I)) - continue; - - // If this is a pointer->pointer bitcast, it is free. - if (isa(I) && I->getType()->isPointerTy()) - continue; - - // Freeze instruction is free, too. - if (isa(I)) - continue; - // Bail out if this instruction gives back a token type, it is not possible // to duplicate it if it is used outside this BB. if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB)) - return ~0U; + return InstructionCost::getInvalid(); - // All other instructions count for at least one unit. - ++Size; - - // Calls are more expensive. If they are non-intrinsic calls, we model them - // as having cost of 4. If they are a non-vector intrinsic, we model them - // as having cost of 2 total, and if they are a vector intrinsic, we model - // them as having cost 1. if (const CallInst *CI = dyn_cast(I)) { + // Cannot duplicate noduplicate or convergent calls. if (CI->cannotDuplicate() || CI->isConvergent()) - // Blocks with NoDuplicate are modelled as having infinite cost, so they - // are never duplicated. - return ~0U; - else if (!isa(CI)) - Size += 3; - else if (!CI->getType()->isVectorTy()) - Size += 1; + return InstructionCost::getInvalid(); } + + Size += TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency); } return Size > Bonus ? Size - Bonus : 0; @@ -2234,10 +2212,10 @@ } // Compute the cost of duplicating BB and PredBB. - unsigned BBCost = - getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold); - unsigned PredBBCost = getJumpThreadDuplicationCost( - PredBB, PredBB->getTerminator(), BBDupThreshold); + InstructionCost BBCost = getJumpThreadDuplicationCost( + TTI, BB, BB->getTerminator(), BBDupThreshold); + InstructionCost PredBBCost = getJumpThreadDuplicationCost( + TTI, PredBB, PredBB->getTerminator(), BBDupThreshold); // Give up if costs are too high. We need to check BBCost and PredBBCost // individually before checking their sum because getJumpThreadDuplicationCost @@ -2345,8 +2323,8 @@ return false; } - unsigned JumpThreadCost = - getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold); + InstructionCost JumpThreadCost = getJumpThreadDuplicationCost( + TTI, BB, BB->getTerminator(), BBDupThreshold); if (JumpThreadCost > BBDupThreshold) { LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName() << "' - Cost is too high: " << JumpThreadCost << "\n"); @@ -2614,8 +2592,8 @@ return false; } - unsigned DuplicationCost = - getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold); + InstructionCost DuplicationCost = getJumpThreadDuplicationCost( + TTI, BB, BB->getTerminator(), BBDupThreshold); if (DuplicationCost > BBDupThreshold) { LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() << "' - Cost is too high: " << DuplicationCost << "\n"); @@ -3031,7 +3009,8 @@ ValueToValueMapTy UnguardedMapping, GuardedMapping; Instruction *AfterGuard = Guard->getNextNode(); - unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold); + InstructionCost Cost = getJumpThreadDuplicationCost( + TTI, BB, AfterGuard, BBDupThreshold); if (Cost > BBDupThreshold) return false; // Duplicate all instructions before the guard and the guard itself to the Index: llvm/test/Transforms/JumpThreading/free_instructions.ll =================================================================== --- llvm/test/Transforms/JumpThreading/free_instructions.ll +++ llvm/test/Transforms/JumpThreading/free_instructions.ll @@ -5,26 +5,28 @@ ; the jump threading threshold, as everything else are free instructions. define i32 @free_instructions(i1 %c, i32* %p) { ; CHECK-LABEL: @free_instructions( -; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] -; CHECK: if: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF2:%.*]], label [[ELSE2:%.*]] +; CHECK: if2: ; CHECK-NEXT: store i32 -1, i32* [[P:%.*]], align 4 -; CHECK-NEXT: br label [[JOIN:%.*]] -; CHECK: else: -; CHECK-NEXT: store i32 -2, i32* [[P]], align 4 -; CHECK-NEXT: br label [[JOIN]] -; CHECK: join: ; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) ; CHECK-NEXT: store i32 1, i32* [[P]], align 4, !noalias !0 ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ] ; CHECK-NEXT: store i32 2, i32* [[P]], align 4 +; CHECK-NEXT: [[P21:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: [[P32:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P21]]) +; CHECK-NEXT: [[P43:%.*]] = bitcast i8* [[P32]] to i32* +; CHECK-NEXT: store i32 3, i32* [[P43]], align 4, !invariant.group !3 +; CHECK-NEXT: ret i32 0 +; CHECK: else2: +; CHECK-NEXT: store i32 -2, i32* [[P]], align 4 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) +; CHECK-NEXT: store i32 1, i32* [[P]], align 4, !noalias !4 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ] +; CHECK-NEXT: store i32 2, i32* [[P]], align 4 ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* ; CHECK-NEXT: [[P3:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P2]]) ; CHECK-NEXT: [[P4:%.*]] = bitcast i8* [[P3]] to i32* ; CHECK-NEXT: store i32 3, i32* [[P4]], align 4, !invariant.group !3 -; CHECK-NEXT: br i1 [[C]], label [[IF2:%.*]], label [[ELSE2:%.*]] -; CHECK: if2: -; CHECK-NEXT: ret i32 0 -; CHECK: else2: ; CHECK-NEXT: ret i32 1 ; br i1 %c, label %if, label %else Index: llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll +++ llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll @@ -32,13 +32,10 @@ ; ASSUMPTIONS-OFF-NEXT: br label [[COMMON_RET]] ; ; ASSUMPTIONS-ON-LABEL: @caller1( -; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE1:%.*]] -; ASSUMPTIONS-ON: false1: -; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4 -; ASSUMPTIONS-ON-NEXT: br label [[COMMON_RET]] +; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE2:%.*]] ; ASSUMPTIONS-ON: common.ret: -; ASSUMPTIONS-ON-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE1]] ], [ 2, [[TMP0:%.*]] ] -; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ] +; ASSUMPTIONS-ON-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE2]] ], [ 2, [[TMP0:%.*]] ] +; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR:%.*]], i64 8) ] ; ASSUMPTIONS-ON-NEXT: store volatile i64 0, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 @@ -47,6 +44,9 @@ ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 [[DOTSINK]], i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: ret void +; ASSUMPTIONS-ON: false2: +; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR]], align 4 +; ASSUMPTIONS-ON-NEXT: br label [[COMMON_RET]] ; br i1 %c, label %true1, label %false1