Index: llvm/trunk/include/llvm/Transforms/Utils/Evaluator.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/Evaluator.h +++ llvm/trunk/include/llvm/Transforms/Utils/Evaluator.h @@ -73,6 +73,12 @@ ValueStack.back()[V] = C; } + Function *getCallee(Value *V); + + /// Given call expression and result value cast the result value + /// to call expression type if call expression contains bitcast. + Constant *castCallResultIfNeeded(Value *CallExpr, Constant *RV); + const DenseMap &getMutatedMemory() const { return MutatedMemory; } Index: llvm/trunk/lib/Transforms/Utils/Evaluator.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/Evaluator.cpp +++ llvm/trunk/lib/Transforms/Utils/Evaluator.cpp @@ -217,6 +217,33 @@ return nullptr; // don't know how to evaluate. } +Function *Evaluator::getCallee(Value *V) { + auto *CE = dyn_cast(V); + if (!CE) + return dyn_cast(getVal(V)); + + Constant *C = + CE->getOpcode() == Instruction::BitCast + ? ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL) + : CE; + return dyn_cast(C); +} + +/// If call expression contains bitcast then we may need to cast +/// evaluated return value to a type of the call expression. +Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) { + ConstantExpr *CE = dyn_cast(CallExpr); + if (!CE || CE->getOpcode() != Instruction::BitCast) + return RV; + + if (auto *FT = + dyn_cast(CE->getType()->getPointerElementType())) { + RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL); + assert(RV && "Failed to fold bitcast call expr"); + } + return RV; +} + /// Evaluate all instructions in block BB, returning true if successful, false /// if we can't evaluate it. NewBB returns the next BB that control flows into, /// or null upon return. @@ -465,7 +492,7 @@ } // Resolve function pointers. - Function *Callee = dyn_cast(getVal(CS.getCalledValue())); + Function *Callee = getCallee(CS.getCalledValue()); if (!Callee || Callee->isInterposable()) { LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n"); return false; // Cannot resolve. @@ -478,7 +505,7 @@ if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) { - InstResult = C; + InstResult = castCallResultIfNeeded(CS.getCalledValue(), C); LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " << *InstResult << "\n"); } else { @@ -499,7 +526,7 @@ return false; } ValueStack.pop_back(); - InstResult = RetVal; + InstResult = castCallResultIfNeeded(CS.getCalledValue(), RetVal); if (InstResult) { LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " Index: llvm/trunk/test/Transforms/GlobalOpt/evaluate-call.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalOpt/evaluate-call.ll +++ llvm/trunk/test/Transforms/GlobalOpt/evaluate-call.ll @@ -0,0 +1,52 @@ +; Checks if bitcasted call expression can be evaluated +; Given call expresion: +; %struct.Foo* bitcast (%struct.Bar* ()* @_Z6getFoov to %struct.Foo* ()*)() +; We evaluate call to function _Z6getFoov and then cast the result to %structFoo* + +; RUN: opt -globalopt -instcombine %s -S -o - | FileCheck %s + +; CHECK: i32 @main() +; CHECK-NEXT: %1 = load i64, i64* inttoptr (i64 32 to i64*), align 32 +; CHECK-NEXT: %2 = trunc i64 %1 to i32 +; CHECK-NEXT: ret i32 %2 +; CHECK-NOT: _GLOBAL__sub_I_main + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +%struct.Bar = type { i64, i64 } +%struct.S = type { %struct.Foo* } +%struct.Foo = type { i64, i64 } +%struct.Baz = type { i64, i64, %struct.Bar } + +@instance = internal local_unnamed_addr global %struct.S zeroinitializer, align 8 +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_main.cpp, i8* null }] +@gBaz = available_externally dso_local local_unnamed_addr global %struct.Baz* null, align 8 +@gFoo = available_externally dso_local local_unnamed_addr global %struct.Bar* null, align 8 + +; Function Attrs: norecurse nounwind readonly uwtable +define dso_local i32 @main() local_unnamed_addr { + %1 = load %struct.Foo*, %struct.Foo** getelementptr inbounds (%struct.S, %struct.S* @instance, i64 0, i32 0), align 8 + %2 = getelementptr inbounds %struct.Foo, %struct.Foo* %1, i64 0, i32 0 + %3 = load i64, i64* %2, align 8 + %4 = trunc i64 %3 to i32 + ret i32 %4 +} + +; Function Attrs: uwtable +define internal void @_GLOBAL__sub_I_main.cpp() section ".text.startup" { + %1 = tail call %struct.Foo* bitcast (%struct.Bar* ()* @_Z6getFoov to %struct.Foo* ()*)() + %2 = getelementptr inbounds %struct.Foo, %struct.Foo* %1, i64 1 + store %struct.Foo* %2, %struct.Foo** getelementptr inbounds (%struct.S, %struct.S* @instance, i64 0, i32 0), align 8 + ret void +} + +; Function Attrs: norecurse nounwind readonly uwtable +define available_externally dso_local %struct.Bar* @_Z6getFoov() local_unnamed_addr { + %1 = load %struct.Bar*, %struct.Bar** @gFoo, align 8 + %2 = icmp eq %struct.Bar* %1, null + %3 = load %struct.Baz*, %struct.Baz** @gBaz, align 8 + %4 = getelementptr inbounds %struct.Baz, %struct.Baz* %3, i64 0, i32 2 + %5 = select i1 %2, %struct.Bar* %4, %struct.Bar* %1 + ret %struct.Bar* %5 +} Index: llvm/trunk/test/Transforms/GlobalOpt/evaluate-constfold-call.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalOpt/evaluate-constfold-call.ll +++ llvm/trunk/test/Transforms/GlobalOpt/evaluate-constfold-call.ll @@ -0,0 +1,55 @@ +; Check if we can evaluate a bitcasted call to a function which is constant folded. +; Evaluator folds call to fmodf, replacing it with constant value in case both operands +; are known at compile time. +; RUN: opt -globalopt -instcombine %s -S -o - | FileCheck %s + +; CHECK: @_q = dso_local local_unnamed_addr global %struct.Q { i32 1066527622 } +; CHECK: define dso_local i32 @main +; CHECK-NEXT: %[[V:.+]] = load i32, i32* getelementptr inbounds (%struct.Q, %struct.Q* @_q, i64 0, i32 0), align 4 +; CHECK-NEXT: ret i32 %[[V]] + +source_filename = "main.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-none-linux-gnu" + +%struct.Q = type { i32 } + +$_ZN1QC2Ev = comdat any + +@_q = dso_local global %struct.Q zeroinitializer, align 4 +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_main.cpp, i8* null }] + +define internal void @__cxx_global_var_init() section ".text.startup" { + call void @_ZN1QC2Ev(%struct.Q* @_q) + ret void +} + +define linkonce_odr dso_local void @_ZN1QC2Ev(%struct.Q*) unnamed_addr #1 comdat align 2 { + %2 = alloca %struct.Q*, align 8 + store %struct.Q* %0, %struct.Q** %2, align 8 + %3 = load %struct.Q*, %struct.Q** %2, align 8 + %4 = getelementptr inbounds %struct.Q, %struct.Q* %3, i32 0, i32 0 + %5 = call i32 bitcast (float (float, float)* @fmodf to i32 (float, float)*)(float 0x40091EB860000000, float 2.000000e+00) + store i32 %5, i32* %4, align 4 + ret void +} + +define dso_local i32 @main(i32, i8**) { + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca i8**, align 8 + store i32 0, i32* %3, align 4 + store i32 %0, i32* %4, align 4 + store i8** %1, i8*** %5, align 8 + %6 = load i32, i32* getelementptr inbounds (%struct.Q, %struct.Q* @_q, i32 0, i32 0), align 4 + ret i32 %6 +} + +; Function Attrs: nounwind +declare dso_local float @fmodf(float, float) + +; Function Attrs: noinline uwtable +define internal void @_GLOBAL__sub_I_main.cpp() section ".text.startup" { + call void @__cxx_global_var_init() + ret void +}