diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h --- a/llvm/include/llvm/Transforms/Utils/Evaluator.h +++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -123,6 +123,11 @@ /// in a static initializer of a global. SmallPtrSet SimpleConstants; + /// Whether or not we've stripped pointer casts down to alias analysis info. + /// If we have, we can't return a pointer value since we may return aliasing + /// but separate pointers, which cannot be substituted for each other. + bool StrippedPointerCastsForAliasAnalysis = false; + const DataLayout &DL; const TargetLibraryInfo *TLI; }; diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp --- a/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -550,56 +550,73 @@ LLVM_DEBUG(dbgs() << "Skipping pseudoprobe intrinsic.\n"); ++CurInst; continue; + } else { + Value *Stripped = CurInst->stripPointerCastsForAliasAnalysis(); + // Only attempt to getVal() if we've actually managed to strip + // anything away, or else we'll call getVal() on the current + // instruction. + if (Stripped != &*CurInst) { + InstResult = getVal(Stripped); + } + if (InstResult) { + LLVM_DEBUG(dbgs() + << "Stripped pointer casts for alias analysis for " + "intrinsic call.\n"); + StrippedPointerCastsForAliasAnalysis = true; + } else { + LLVM_DEBUG(dbgs() << "Unknown intrinsic. Cannot evaluate.\n"); + return false; + } } - - LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); - return false; } - // Resolve function pointers. - SmallVector Formals; - Function *Callee = getCalleeWithFormalArgs(CB, Formals); - if (!Callee || Callee->isInterposable()) { - LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n"); - return false; // Cannot resolve. - } + if (!InstResult) { + // Resolve function pointers. + SmallVector Formals; + Function *Callee = getCalleeWithFormalArgs(CB, Formals); + if (!Callee || Callee->isInterposable()) { + LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n"); + return false; // Cannot resolve. + } - if (Callee->isDeclaration()) { - // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) { - InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C); - if (!InstResult) + if (Callee->isDeclaration()) { + // If this is a function we can constant fold, do it. + if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) { + InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C); + if (!InstResult) + return false; + LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " + << *InstResult << "\n"); + } else { + LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n"); return false; - LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " - << *InstResult << "\n"); + } } else { - LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n"); - return false; - } - } else { - if (Callee->getFunctionType()->isVarArg()) { - LLVM_DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); - return false; - } + if (Callee->getFunctionType()->isVarArg()) { + LLVM_DEBUG(dbgs() + << "Can not constant fold vararg function call.\n"); + return false; + } - Constant *RetVal = nullptr; - // Execute the call, if successful, use the return value. - ValueStack.emplace_back(); - if (!EvaluateFunction(Callee, RetVal, Formals)) { - LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n"); - return false; - } - ValueStack.pop_back(); - InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal); - if (RetVal && !InstResult) - return false; + Constant *RetVal = nullptr; + // Execute the call, if successful, use the return value. + ValueStack.emplace_back(); + if (!EvaluateFunction(Callee, RetVal, Formals)) { + LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n"); + return false; + } + ValueStack.pop_back(); + InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal); + if (RetVal && !InstResult) + return false; - if (InstResult) { - LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " - << *InstResult << "\n\n"); - } else { - LLVM_DEBUG(dbgs() - << "Successfully evaluated function. Result: 0\n\n"); + if (InstResult) { + LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " + << *InstResult << "\n\n"); + } else { + LLVM_DEBUG(dbgs() + << "Successfully evaluated function. Result: 0\n\n"); + } } } } else if (CurInst->isTerminator()) { @@ -701,8 +718,23 @@ // Successfully running until there's no next block means that we found // the return. Fill it the return value and pop the call stack. ReturnInst *RI = cast(CurBB->getTerminator()); - if (RI->getNumOperands()) + if (RI->getNumOperands()) { + // The Evaluator can look through pointer casts as long as alias + // analysis holds because it's just a simple interpreter and doesn't + // skip memory accesses due to invariant group metadata, but we can't + // let users of Evaluator use a value that's been gleamed looking + // through stripping pointer casts since separate pointers that alias + // cannot necessarily be substituted for each other. + // FIXME: We could be more precise tracking if the return value had ever + // transitively been through a + // Value::stripPointerCastsForAliasAnalysis(), rather than giving up on + // all pointer return values. + if (StrippedPointerCastsForAliasAnalysis && + RI->getReturnValue()->getType()->isPointerTy()) { + return false; + } RetVal = getVal(RI->getOperand(0)); + } CallStack.pop_back(); return true; } diff --git a/llvm/test/Transforms/GlobalOpt/invariant.group.ll b/llvm/test/Transforms/GlobalOpt/invariant.group.ll --- a/llvm/test/Transforms/GlobalOpt/invariant.group.ll +++ b/llvm/test/Transforms/GlobalOpt/invariant.group.ll @@ -1,18 +1,16 @@ ; RUN: opt -S -globalopt < %s | FileCheck %s -; This test is hint, what could globalOpt optimize and what it can't -; FIXME: @tmp and @tmp2 can be safely set to 42 -; CHECK: @tmp = local_unnamed_addr global i32 0 -; CHECK: @tmp2 = local_unnamed_addr global i32 0 -; CHECK: @tmp3 = global i32 0 +; CHECK: @llvm.global_ctors = appending global [1 x {{.*}}@_GLOBAL__I_c +@llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__I_a, i8* null }, { i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__I_b, i8* null }, { i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__I_c, i8* null }] +; CHECK: @tmp = local_unnamed_addr global i32 42 +; CHECK: @tmp2 = local_unnamed_addr global i32 42 +; CHECK: @tmp3 = global i32 42 @tmp = global i32 0 @tmp2 = global i32 0 @tmp3 = global i32 0 @ptrToTmp3 = global i32* null -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__I_a, i8* null }] - define i32 @TheAnswerToLifeTheUniverseAndEverything() { ret i32 42 } @@ -43,7 +41,7 @@ ; We can't step through launder.invariant.group here, because that would change ; this load in @usage_of_globals() -; val = load i32, i32* %ptrVal, !invariant.group !0 +; %val = load i32, i32* %ptrVal, !invariant.group !0 ; into ; %val = load i32, i32* @tmp3, !invariant.group !0 ; and then we could assume that %val and %val2 to be the same, which coud be @@ -62,6 +60,7 @@ ret void } + define void @usage_of_globals() { entry: %ptrVal = load i32*, i32** @ptrToTmp3 @@ -72,8 +71,41 @@ ret void; } +@tmp4 = global i32 0 + +define void @_GLOBAL__I_b() { +enter: + %val = call i32 @TheAnswerToLifeTheUniverseAndEverything() + %p1 = bitcast i32* @tmp4 to i8* + %p2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %p1) + %p3 = bitcast i8* %p2 to i32* + store i32 %val, i32* %p3 + ret void +} + +@tmp5 = global i32 0 +@tmp6 = global i32* null +; CHECK: @tmp6 = local_unnamed_addr global i32* null + +define i32* @_dont_return_param(i32* %p) { + %p1 = bitcast i32* %p to i8* + %p2 = call i8* @llvm.launder.invariant.group(i8* %p1) + %p3 = bitcast i8* %p2 to i32* + ret i32* %p3 +} + +; We should bail out if we return any pointers derived via invariant.group intrinsics at any point. +define void @_GLOBAL__I_c() { +enter: + %tmp5 = call i32* @_dont_return_param(i32* @tmp5) + store i32* %tmp5, i32** @tmp6 + ret void +} + + declare void @changeTmp3ValAndCallBarrierInside() declare i8* @llvm.launder.invariant.group(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) !0 = !{}