diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -57,6 +58,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PredicateInfo.h" #include @@ -177,6 +179,9 @@ LLVMContext &Ctx; public: + DenseMap FunctionsToDuplicate; + DenseMap DuplicateToOriginal; + DenseMap CBToDuplicate; void addAnalysis(Function &F, AnalysisResultsForFn A) { AnalysisResults.insert({&F, std::move(A)}); } @@ -530,10 +535,10 @@ // update the call sites with the new function result and do not have to // propagate the call arguments. if (isa(I)) { - for (User *U : I->users()) { + SmallVector Users(I->user_begin(), I->user_end()); + for (User *U : Users) if (auto *CB = dyn_cast(U)) handleCallResult(*CB); - } } else { for (User *U : I->users()) if (auto *UI = dyn_cast(U)) @@ -542,9 +547,19 @@ auto Iter = AdditionalUsers.find(I); if (Iter != AdditionalUsers.end()) { - for (User *U : Iter->second) + for (User *U : Iter->second) { + if (auto *CB = dyn_cast(U)) { + if (CBToDuplicate.count(CB)) { + Function *F = CB->getCalledFunction(); + CB->setCalledFunction(CBToDuplicate[CB]); + handleCallResult(*CB); + CB->setCalledFunction(F); + continue; + } + } if (auto *UI = dyn_cast(U)) OperandChangedState(UI); + } } } void handleCallOverdefined(CallBase &CB); @@ -1169,8 +1184,56 @@ } void SCCPSolver::visitCallBase(CallBase &CB) { + handleCallResult(CB); handleCallArguments(CB); + + Function *F = CB.getCalledFunction(); + if (!TrackingIncomingArguments.empty() && + TrackingIncomingArguments.count(F)) { + if (!CBToDuplicate.count(&CB) && + !DuplicateToOriginal.count(CB.getParent()->getParent()) && + FunctionsToDuplicate.count(F) && all_of(CB.args(), [this](Value *A) { + if (auto *STy = dyn_cast(A->getType())) + return false; + + return isConstant(getValueState(A)); + })) { + + ValueToValueMapTy VMap; + auto *NF = CloneFunction(F, VMap); + for (BasicBlock &BB : *NF) { + for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) { + Instruction *Inst = &*BI++; + if (auto *II = dyn_cast(Inst)) { + if (II->getIntrinsicID() == Intrinsic::ssa_copy) { + Value *Op = II->getOperand(0); + Inst->replaceAllUsesWith(Op); + Inst->eraseFromParent(); + } + } + } + } + + DuplicateToOriginal.insert({NF, F}); + CBToDuplicate.insert({&CB, NF}); + AddTrackedFunction(NF); + addAdditionalUser(NF, &CB); + AddArgumentTrackedFunction(NF); + DominatorTree DT(*NF); + AssumptionCache AC(*NF); + addAnalysis(*NF, {std::make_unique(*NF, DT, AC), nullptr, + nullptr}); + } + } + + if (F && CBToDuplicate.count(&CB)) { + CB.setCalledFunction(CBToDuplicate[&CB]); + handleCallResult(CB); + // dbgs() << "CB " << CB << "\n"; + handleCallArguments(CB); + CB.setCalledFunction(F); + } } void SCCPSolver::handleCallOverdefined(CallBase &CB) { @@ -1225,6 +1288,7 @@ // the formal arguments of the function. if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)) { + // dbgs() << "Exec entry of " << F->getName() << "\n"; MarkBlockExecutable(&F->front()); // Propagate information from this call site into the callee. @@ -1350,6 +1414,12 @@ if (TFRVI == TrackedRetVals.end()) return handleCallOverdefined(CB); // Not tracking this callee. + if (CBToDuplicate.count(&CB)) { + auto V2 = TrackedRetVals.find(CBToDuplicate[&CB]); + if (V2 != TrackedRetVals.end() && isConstant(V2->second)) + TFRVI = V2; + } + // If so, propagate the return value of the callee into this call result. mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts()); } @@ -1846,6 +1916,8 @@ Module &M, const DataLayout &DL, std::function GetTLI, function_ref getAnalysis) { + + // Zap all returns which we've identified as zap to change. SCCPSolver Solver(DL, GetTLI, M.getContext()); // Loop over all functions, marking arguments to those with their addresses @@ -1864,6 +1936,10 @@ // Determine if we can track the function's arguments. If so, add the // function to the solver's set of argument-tracked functions. if (canTrackArgumentsInterprocedurally(&F)) { + if (std::distance(instructions(F).begin(), instructions(F).end()) < 20 && + canTrackReturnsInterprocedurally(&F)) { + Solver.FunctionsToDuplicate.insert({&F, false}); + } Solver.AddArgumentTrackedFunction(&F); continue; } @@ -1891,13 +1967,22 @@ while (ResolvedUndefs) { LLVM_DEBUG(dbgs() << "RESOLVING UNDEFS\n"); ResolvedUndefs = false; - for (Function &F : M) + for (Function &F : M) { if (Solver.ResolvedUndefsIn(F)) { // We run Solve() after we resolved an undef in a function, because // we might deduce a fact that eliminates an undef in another function. Solver.Solve(); ResolvedUndefs = true; } + } + for (auto &P : Solver.CBToDuplicate) { + if (Solver.ResolvedUndefsIn(*P.second)) { + // We run Solve() after we resolved an undef in a function, because + // we might deduce a fact that eliminates an undef in another function. + Solver.Solve(); + ResolvedUndefs = true; + } + } } bool MadeChanges = false; @@ -2096,5 +2181,10 @@ ++IPNumGlobalConst; } + for (auto &Dup : Solver.DuplicateToOriginal) { + Dup.first->replaceAllUsesWith(Dup.second); + Dup.first->eraseFromParent(); + } + return MadeChanges; } diff --git a/llvm/test/Transforms/SCCP/ip-ranges-binaryops.ll b/llvm/test/Transforms/SCCP/ip-ranges-binaryops.ll --- a/llvm/test/Transforms/SCCP/ip-ranges-binaryops.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-binaryops.ll @@ -1,20 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -ipsccp -S | FileCheck %s ; x = [10, 21), y = [100, 201) ; x + y = [110, 221) define internal i1 @f.add(i32 %x, i32 %y) { -; CHECK-LABEL: define internal i1 @f.add(i32 %x, i32 %y) { -; CHECK-NEXT: %a.1 = add i32 %x, %y -; CHECK-NEXT: %c.2 = icmp sgt i32 %a.1, 219 -; CHECK-NEXT: %c.4 = icmp slt i32 %a.1, 111 -; CHECK-NEXT: %c.5 = icmp eq i32 %a.1, 150 -; CHECK-NEXT: %c.6 = icmp slt i32 %a.1, 150 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: ret i1 %res.5 +; CHECK-LABEL: @f.add( +; CHECK-NEXT: [[A_1:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i32 [[A_1]], 219 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i32 [[A_1]], 111 +; CHECK-NEXT: [[C_5:%.*]] = icmp eq i32 [[A_1]], 150 +; CHECK-NEXT: [[C_6:%.*]] = icmp slt i32 [[A_1]], 150 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], [[C_5]] +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: ret i1 [[RES_5]] ; %a.1 = add i32 %x, %y %c.1 = icmp sgt i32 %a.1, 220 @@ -32,11 +33,10 @@ } define i1 @caller.add() { -; CHECK-LABEL: define i1 @caller.add() { -; CHECK-NEXT: %call.1 = tail call i1 @f.add(i32 10, i32 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.add(i32 20, i32 200) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.add( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.add(i32 10, i32 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.add(i32 20, i32 200) +; CHECK-NEXT: ret i1 false ; %call.1 = tail call i1 @f.add(i32 10, i32 100) %call.2 = tail call i1 @f.add(i32 20, i32 200) @@ -48,18 +48,18 @@ ; x = [10, 21), y = [100, 201) ; x - y = [-190, -79) define internal i1 @f.sub(i32 %x, i32 %y) { -; CHECK-LABEL: define internal i1 @f.sub(i32 %x, i32 %y) { -; CHECK-NEXT: %a.1 = sub i32 %x, %y -; CHECK-NEXT: %c.2 = icmp sgt i32 %a.1, -81 -; CHECK-NEXT: %c.4 = icmp slt i32 %a.1, -189 -; CHECK-NEXT: %c.5 = icmp eq i32 %a.1, -150 -; CHECK-NEXT: %c.6 = icmp slt i32 %a.1, -150 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: ret i1 %res.5 +; CHECK-LABEL: @f.sub( +; CHECK-NEXT: [[A_1:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i32 [[A_1]], -81 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i32 [[A_1]], -189 +; CHECK-NEXT: [[C_5:%.*]] = icmp eq i32 [[A_1]], -150 +; CHECK-NEXT: [[C_6:%.*]] = icmp slt i32 [[A_1]], -150 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], [[C_5]] +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: ret i1 [[RES_5]] ; %a.1 = sub i32 %x, %y %c.1 = icmp sgt i32 %a.1, -80 @@ -77,11 +77,10 @@ } define i1 @caller.sub() { -; CHECK-LABEL: define i1 @caller.sub() { -; CHECK-NEXT: %call.1 = tail call i1 @f.sub(i32 10, i32 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.sub(i32 20, i32 200) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.sub( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.sub(i32 10, i32 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.sub(i32 20, i32 200) +; CHECK-NEXT: ret i1 false ; %call.1 = tail call i1 @f.sub(i32 10, i32 100) %call.2 = tail call i1 @f.sub(i32 20, i32 200) @@ -92,18 +91,18 @@ ; x = [10, 21), y = [100, 201) ; x * y = [1000, 4001) define internal i1 @f.mul(i32 %x, i32 %y) { -; CHECK-LABEL: define internal i1 @f.mul(i32 %x, i32 %y) { -; CHECK-NEXT: %a.1 = mul i32 %x, %y -; CHECK-NEXT: %c.2 = icmp sgt i32 %a.1, 3999 -; CHECK-NEXT: %c.4 = icmp slt i32 %a.1, 1001 -; CHECK-NEXT: %c.5 = icmp eq i32 %a.1, 1500 -; CHECK-NEXT: %c.6 = icmp slt i32 %a.1, 1500 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: ret i1 %res.5 +; CHECK-LABEL: @f.mul( +; CHECK-NEXT: [[A_1:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i32 [[A_1]], 3999 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i32 [[A_1]], 1001 +; CHECK-NEXT: [[C_5:%.*]] = icmp eq i32 [[A_1]], 1500 +; CHECK-NEXT: [[C_6:%.*]] = icmp slt i32 [[A_1]], 1500 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], [[C_5]] +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: ret i1 [[RES_5]] ; %a.1 = mul i32 %x, %y %c.1 = icmp sgt i32 %a.1, 4000 @@ -121,11 +120,10 @@ } define i1 @caller.mul() { -; CHECK-LABEL: define i1 @caller.mul() { -; CHECK-NEXT: %call.1 = tail call i1 @f.mul(i32 10, i32 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.mul(i32 20, i32 200) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.mul( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.mul(i32 10, i32 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.mul(i32 20, i32 200) +; CHECK-NEXT: ret i1 false ; %call.1 = tail call i1 @f.mul(i32 10, i32 100) %call.2 = tail call i1 @f.mul(i32 20, i32 200) diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll --- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll @@ -1,24 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -ipsccp -S | FileCheck %s ; x = [100, 301) define internal i1 @f.trunc(i32 %x) { -; CHECK-LABEL: define internal i1 @f.trunc(i32 %x) { -; CHECK-NEXT: %t.1 = trunc i32 %x to i16 -; CHECK-NEXT: %c.2 = icmp sgt i16 %t.1, 299 -; CHECK-NEXT: %c.4 = icmp slt i16 %t.1, 101 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %t.2 = trunc i32 %x to i8 -; CHECK-NEXT: %c.5 = icmp sgt i8 %t.2, 44 -; CHECK-NEXT: %c.6 = icmp sgt i8 %t.2, 43 -; CHECK-NEXT: %c.7 = icmp slt i8 %t.2, 100 -; CHECK-NEXT: %c.8 = icmp slt i8 %t.2, 101 -; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: %res.6 = add i1 %res.5, %c.7 -; CHECK-NEXT: %res.7 = add i1 %res.6, %c.8 -; CHECK-NEXT: ret i1 %res.7 +; CHECK-LABEL: @f.trunc( +; CHECK-NEXT: [[T_1:%.*]] = trunc i32 [[X:%.*]] to i16 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i16 [[T_1]], 299 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i16 [[T_1]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[T_2:%.*]] = trunc i32 [[X]] to i8 +; CHECK-NEXT: [[C_5:%.*]] = icmp sgt i8 [[T_2]], 44 +; CHECK-NEXT: [[C_6:%.*]] = icmp sgt i8 [[T_2]], 43 +; CHECK-NEXT: [[C_7:%.*]] = icmp slt i8 [[T_2]], 100 +; CHECK-NEXT: [[C_8:%.*]] = icmp slt i8 [[T_2]], 101 +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], [[C_5]] +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: [[RES_6:%.*]] = add i1 [[RES_5]], [[C_7]] +; CHECK-NEXT: [[RES_7:%.*]] = add i1 [[RES_6]], [[C_8]] +; CHECK-NEXT: ret i1 [[RES_7]] +; %t.1 = trunc i32 %x to i16 %c.1 = icmp sgt i16 %t.1, 300 @@ -41,11 +43,10 @@ } define i1 @caller1() { -; CHECK-LABEL: define i1 @caller1() { -; CHECK-NEXT: %call.1 = tail call i1 @f.trunc(i32 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.trunc(i32 300) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller1( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.trunc(i32 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.trunc(i32 300) +; CHECK-NEXT: ret i1 false ; %call.1 = tail call i1 @f.trunc(i32 100) %call.2 = tail call i1 @f.trunc(i32 300) @@ -56,22 +57,23 @@ ; x = [100, 301) define internal i1 @f.zext(i32 %x, i32 %y) { -; CHECK-LABEL: define internal i1 @f.zext(i32 %x, i32 %y) { -; CHECK-NEXT: %t.1 = zext i32 %x to i64 -; CHECK-NEXT: %c.2 = icmp sgt i64 %t.1, 299 -; CHECK-NEXT: %c.4 = icmp slt i64 %t.1, 101 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %t.2 = zext i32 %y to i64 -; CHECK-NEXT: %c.5 = icmp sgt i64 %t.2, 300 -; CHECK-NEXT: %c.6 = icmp sgt i64 %t.2, 299 -; CHECK-NEXT: %c.8 = icmp slt i64 %t.2, 1 -; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: %res.6 = add i1 %res.5, false -; CHECK-NEXT: %res.7 = add i1 %res.6, %c.8 -; CHECK-NEXT: ret i1 %res.7 +; CHECK-LABEL: @f.zext( +; CHECK-NEXT: [[T_1:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[T_1]], 299 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[T_1]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[T_2:%.*]] = zext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[C_5:%.*]] = icmp sgt i64 [[T_2]], 300 +; CHECK-NEXT: [[C_6:%.*]] = icmp sgt i64 [[T_2]], 299 +; CHECK-NEXT: [[C_8:%.*]] = icmp slt i64 [[T_2]], 1 +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], [[C_5]] +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: [[RES_6:%.*]] = add i1 [[RES_5]], false +; CHECK-NEXT: [[RES_7:%.*]] = add i1 [[RES_6]], [[C_8]] +; CHECK-NEXT: ret i1 [[RES_7]] +; %t.1 = zext i32 %x to i64 %c.1 = icmp sgt i64 %t.1, 300 @@ -94,11 +96,10 @@ } define i1 @caller.zext() { -; CHECK-LABEL: define i1 @caller.zext() { -; CHECK-NEXT: %call.1 = tail call i1 @f.zext(i32 100, i32 -120) -; CHECK-NEXT: %call.2 = tail call i1 @f.zext(i32 300, i32 900) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.zext( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.zext(i32 100, i32 -120) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.zext(i32 300, i32 900) +; CHECK-NEXT: ret i1 true ; %call.1 = tail call i1 @f.zext(i32 100, i32 -120) %call.2 = tail call i1 @f.zext(i32 300, i32 900) @@ -108,21 +109,21 @@ ; x = [100, 301) define internal i1 @f.sext(i32 %x, i32 %y) { -; CHECK-LABEL: define internal i1 @f.sext(i32 %x, i32 %y) { -; CHECK-NEXT: [[T_1:%.*]] = zext i32 %x to i64 -; CHECK-NEXT: %c.2 = icmp sgt i64 [[T_1]], 299 -; CHECK-NEXT: %c.4 = icmp slt i64 [[T_1]], 101 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %t.2 = sext i32 %y to i64 -; CHECK-NEXT: %c.6 = icmp sgt i64 %t.2, 899 -; CHECK-NEXT: %c.8 = icmp slt i64 %t.2, -119 -; CHECK-NEXT: %res.4 = add i1 %res.3, false -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: %res.6 = add i1 %res.5, false -; CHECK-NEXT: %res.7 = add i1 %res.6, %c.8 -; CHECK-NEXT: ret i1 %res.7 +; CHECK-LABEL: @f.sext( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[TMP1]], 299 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[TMP1]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[T_2:%.*]] = sext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[C_6:%.*]] = icmp sgt i64 [[T_2]], 899 +; CHECK-NEXT: [[C_8:%.*]] = icmp slt i64 [[T_2]], -119 +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], false +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: [[RES_6:%.*]] = add i1 [[RES_5]], false +; CHECK-NEXT: [[RES_7:%.*]] = add i1 [[RES_6]], [[C_8]] +; CHECK-NEXT: ret i1 [[RES_7]] ; %t.1 = sext i32 %x to i64 %c.1 = icmp sgt i64 %t.1, 300 @@ -145,11 +146,10 @@ } define i1 @caller.sext() { -; CHECK-LABEL: define i1 @caller.sext() { -; CHECK-NEXT: %call.1 = tail call i1 @f.sext(i32 100, i32 -120) -; CHECK-NEXT: %call.2 = tail call i1 @f.sext(i32 300, i32 900) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.sext( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.sext(i32 100, i32 -120) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.sext(i32 300, i32 900) +; CHECK-NEXT: ret i1 false ; %call.1 = tail call i1 @f.sext(i32 100, i32 -120) %call.2 = tail call i1 @f.sext(i32 300, i32 900) @@ -159,18 +159,18 @@ ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.fptosi(i32 %x) { -; CHECK-LABEL: define internal i1 @f.fptosi(i32 %x) { -; CHECK-NEXT: %to.double = sitofp i32 %x to double -; CHECK-NEXT: %add = fadd double 0.000000e+00, %to.double -; CHECK-NEXT: %to.i32 = fptosi double %add to i32 -; CHECK-NEXT: %c.1 = icmp sgt i32 %to.i32, 300 -; CHECK-NEXT: %c.2 = icmp sgt i32 %to.i32, 299 -; CHECK-NEXT: %c.3 = icmp slt i32 %to.i32, 100 -; CHECK-NEXT: %c.4 = icmp slt i32 %to.i32, 101 -; CHECK-NEXT: %res.1 = add i1 %c.1, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, %c.3 -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: ret i1 %res.3 +; CHECK-LABEL: @f.fptosi( +; CHECK-NEXT: [[TO_DOUBLE:%.*]] = sitofp i32 [[X:%.*]] to double +; CHECK-NEXT: [[ADD:%.*]] = fadd double 0.000000e+00, [[TO_DOUBLE]] +; CHECK-NEXT: [[TO_I32:%.*]] = fptosi double [[ADD]] to i32 +; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i32 [[TO_I32]], 300 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i32 [[TO_I32]], 299 +; CHECK-NEXT: [[C_3:%.*]] = icmp slt i32 [[TO_I32]], 100 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i32 [[TO_I32]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 [[C_1]], [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], [[C_3]] +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: ret i1 [[RES_3]] ; %to.double = sitofp i32 %x to double %add = fadd double 0.000000e+00, %to.double @@ -186,11 +186,10 @@ } define i1 @caller.fptosi() { -; CHECK-LABEL: define i1 @caller.fptosi() { -; CHECK-NEXT: %call.1 = tail call i1 @f.fptosi(i32 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.fptosi(i32 300) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.fptosi( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.fptosi(i32 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.fptosi(i32 300) +; CHECK-NEXT: ret i1 true ; %call.1 = tail call i1 @f.fptosi(i32 100) %call.2 = tail call i1 @f.fptosi(i32 300) @@ -200,18 +199,18 @@ ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.fpext(i16 %x) { -; CHECK-LABEL: define internal i1 @f.fpext(i16 %x) { -; CHECK-NEXT: %to.float = sitofp i16 %x to float -; CHECK-NEXT: %to.double = fpext float %to.float to double -; CHECK-NEXT: %to.i64 = fptoui float %to.float to i64 -; CHECK-NEXT: %c.1 = icmp sgt i64 %to.i64, 300 -; CHECK-NEXT: %c.2 = icmp sgt i64 %to.i64, 299 -; CHECK-NEXT: %c.3 = icmp slt i64 %to.i64, 100 -; CHECK-NEXT: %c.4 = icmp slt i64 %to.i64, 101 -; CHECK-NEXT: %res.1 = add i1 %c.1, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, %c.3 -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: ret i1 %res.3 +; CHECK-LABEL: @f.fpext( +; CHECK-NEXT: [[TO_FLOAT:%.*]] = sitofp i16 [[X:%.*]] to float +; CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[TO_FLOAT]] to double +; CHECK-NEXT: [[TO_I64:%.*]] = fptoui float [[TO_FLOAT]] to i64 +; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[TO_I64]], 299 +; CHECK-NEXT: [[C_3:%.*]] = icmp slt i64 [[TO_I64]], 100 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[TO_I64]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 [[C_1]], [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], [[C_3]] +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: ret i1 [[RES_3]] ; %to.float = sitofp i16 %x to float %to.double = fpext float %to.float to double @@ -228,11 +227,10 @@ ; There's nothing we can do besides going to the full range or overdefined. define i1 @caller.fpext() { -; CHECK-LABEL: define i1 @caller.fpext() { -; CHECK-NEXT: %call.1 = tail call i1 @f.fpext(i16 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.fpext(i16 300) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.fpext( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.fpext(i16 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.fpext(i16 300) +; CHECK-NEXT: ret i1 true ; %call.1 = tail call i1 @f.fpext(i16 100) %call.2 = tail call i1 @f.fpext(i16 300) @@ -242,17 +240,17 @@ ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.inttoptr.ptrtoint(i64 %x) { -; CHECK-LABEL: define internal i1 @f.inttoptr.ptrtoint(i64 %x) { -; CHECK-NEXT: %to.ptr = inttoptr i64 %x to i8* -; CHECK-NEXT: %to.i64 = ptrtoint i8* %to.ptr to i64 -; CHECK-NEXT: %c.1 = icmp sgt i64 %to.i64, 300 -; CHECK-NEXT: %c.2 = icmp sgt i64 %to.i64, 299 -; CHECK-NEXT: %c.3 = icmp slt i64 %to.i64, 100 -; CHECK-NEXT: %c.4 = icmp slt i64 %to.i64, 101 -; CHECK-NEXT: %res.1 = add i1 %c.1, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, %c.3 -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: ret i1 %res.3 +; CHECK-LABEL: @f.inttoptr.ptrtoint( +; CHECK-NEXT: [[TO_PTR:%.*]] = inttoptr i64 [[X:%.*]] to i8* +; CHECK-NEXT: [[TO_I64:%.*]] = ptrtoint i8* [[TO_PTR]] to i64 +; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[TO_I64]], 299 +; CHECK-NEXT: [[C_3:%.*]] = icmp slt i64 [[TO_I64]], 100 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[TO_I64]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 [[C_1]], [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], [[C_3]] +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: ret i1 [[RES_3]] ; %to.ptr = inttoptr i64 %x to i8* %to.i64 = ptrtoint i8* %to.ptr to i64 @@ -267,11 +265,10 @@ } define i1 @caller.inttoptr.ptrtoint() { -; CHECK-LABEL: define i1 @caller.inttoptr.ptrtoint() { -; CHECK-NEXT: %call.1 = tail call i1 @f.inttoptr.ptrtoint(i64 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.inttoptr.ptrtoint(i64 300) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.inttoptr.ptrtoint( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.inttoptr.ptrtoint(i64 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.inttoptr.ptrtoint(i64 300) +; CHECK-NEXT: ret i1 true ; %call.1 = tail call i1 @f.inttoptr.ptrtoint(i64 100) %call.2 = tail call i1 @f.inttoptr.ptrtoint(i64 300) @@ -281,12 +278,12 @@ ; Make sure we do not create constant ranges for int to fp casts. define i1 @int_range_to_double_cast(i32 %a) { -; CHECK-LABEL: define i1 @int_range_to_double_cast(i32 %a) -; CHECK-NEXT: %r = and i32 %a, 255 -; CHECK-NEXT: %tmp4 = sitofp i32 %r to double -; CHECK-NEXT: %tmp10 = fadd double 0.000000e+00, %tmp4 -; CHECK-NEXT: %tmp11 = fcmp olt double %tmp4, %tmp10 -; CHECK-NEXT: ret i1 %tmp11 +; CHECK-LABEL: @int_range_to_double_cast( +; CHECK-NEXT: [[R:%.*]] = and i32 [[A:%.*]], 255 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp i32 [[R]] to double +; CHECK-NEXT: [[TMP10:%.*]] = fadd double 0.000000e+00, [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = fcmp olt double [[TMP4]], [[TMP10]] +; CHECK-NEXT: ret i1 [[TMP11]] ; %r = and i32 %a, 255 %tmp4 = sitofp i32 %r to double @@ -297,13 +294,14 @@ ; Make sure we do not use ranges to propagate info from vectors. define i16 @vector_binop_and_cast() { -; CHECK-LABEL: define i16 @vector_binop_and_cast( +; CHECK-LABEL: @vector_binop_and_cast( ; CHECK-NEXT: entry: -; CHECK-NEXT: %vecinit7 = insertelement <8 x i16> , i16 undef, i32 0 -; CHECK-NEXT: %rem = srem <8 x i16> , %vecinit7 -; CHECK-NEXT: %0 = bitcast <8 x i16> %rem to i128 -; CHECK-NEXT: %1 = trunc i128 %0 to i16 -; CHECK-NEXT: ret i16 %1 +; CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x i16> , i16 undef, i32 0 +; CHECK-NEXT: [[REM:%.*]] = srem <8 x i16> , [[VECINIT7]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[REM]] to i128 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i16 +; CHECK-NEXT: ret i16 [[TMP1]] +; entry: %vecinit7 = insertelement <8 x i16> , i16 undef, i32 0 %rem = srem <8 x i16> , %vecinit7 diff --git a/llvm/test/Transforms/SCCP/ipsccp-cycles.ll b/llvm/test/Transforms/SCCP/ipsccp-cycles.ll --- a/llvm/test/Transforms/SCCP/ipsccp-cycles.ll +++ b/llvm/test/Transforms/SCCP/ipsccp-cycles.ll @@ -92,7 +92,7 @@ ; CHECK-NEXT: [[V1:%.*]] = call i32 @test3a(i32 0) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[V2:%.*]] = call i32 @test3a(i32 [[V1]]) +; CHECK-NEXT: [[V2:%.*]] = call i32 @test3a(i32 1) ; CHECK-NEXT: [[V3:%.*]] = add i32 [[V2]], 1 ; CHECK-NEXT: [[V4:%.*]] = call i32 @test3a(i32 [[V3]]) ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[V4]], [[A:%.*]]