diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -353,9 +353,21 @@ V = MappedV; if (!NewInst->mayHaveSideEffects()) { - VMap[&*II] = V; - NewInst->deleteValue(); - continue; + // Do not update the instruction, which is a call, to be an existing + // call instruction. This will confuse the subsequent inlining to + // double inline the existing call. If the instruction can be folded + // away at this point, it will be most likely be inlined and optimized + // away later on. Note that we check against that both the original + // instruction and the simplifed instruction are calls. This is + // because the simplification can promote a non-call instruction to a + // call or an intrinsic call on some targets. In this case we'd like + // to keep this simplification since the it will not cause double + // inlining. + if (!dyn_cast(NewInst) || !dyn_cast(V)) { + VMap[&*II] = V; + NewInst->deleteValue(); + continue; + } } } } diff --git a/llvm/test/Transforms/Inline/inline_call.ll b/llvm/test/Transforms/Inline/inline_call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_call.ll @@ -0,0 +1,71 @@ +; Check the optimizer doesn't crash at inlining the function top and all of its callees are inlined. +; RUN: opt < %s -O3 -S | FileCheck %s + +define dso_local void (...)* @second(i8** %p) { +entry: + %p.addr = alloca i8**, align 8 + store i8** %p, i8*** %p.addr, align 8 + %tmp = load i8**, i8*** %p.addr, align 8 + %tmp1 = load i8*, i8** %tmp, align 8 + %tmp2 = bitcast i8* %tmp1 to void (...)* + ret void (...)* %tmp2 +} + +define dso_local void @top() { +entry: + ; CHECK: {{.*}} = {{.*}} call {{.*}} @ext + ; CHECK-NOT: {{.*}} = {{.*}} call {{.*}} @third + ; CHECK-NOT: {{.*}} = {{.*}} call {{.*}} @second + ; CHECK-NOT: {{.*}} = {{.*}} call {{.*}} @wrapper + %q = alloca i8*, align 8 + store i8* bitcast (void ()* @third to i8*), i8** %q, align 8 + %tmp = call void (...)* @second(i8** %q) + ; The call to 'wrapper' here is to ensure that its function attributes + ; i.e., returning its parameter and having no side effect, will be decuded + ; before the next round of inlining happens to 'top' to expose the bug. + %call = call void (...)* @wrapper(void (...)* %tmp) + ; The indirect call here is to confuse the alias analyzer so that + ; an incomplete graph will be built during the first round of inlining. + ; This allows the current function to be processed before the actual + ; callee, i.e., the function 'run', is processed. Once it's simplified to + ; a direct call, it also enables an additional round of inlining with all + ; function attributes deduced. + call void (...) %call() + ret void +} + +define dso_local void (...)* @gen() { +entry: + %call = call void (...)* (...) @ext() + ret void (...)* %call +} + +declare dso_local void (...)* @ext(...) + +define dso_local void (...)* @wrapper(void (...)* %fn) { +entry: + ret void (...)* %fn +} + +define dso_local void @run(void (...)* %fn) { +entry: + %fn.addr = alloca void (...)*, align 8 + %f = alloca void (...)*, align 8 + store void (...)* %fn, void (...)** %fn.addr, align 8 + %tmp = load void (...)*, void (...)** %fn.addr, align 8 + %call = call void (...)* @wrapper(void (...)* %tmp) + store void (...)* %call, void (...)** %f, align 8 + %tmp1 = load void (...)*, void (...)** %f, align 8 + call void (...) %tmp1() + ret void +} + +define dso_local void @third() { +entry: + %f = alloca void (...)*, align 8 + %call = call void (...)* @gen() + store void (...)* %call, void (...)** %f, align 8 + %tmp = load void (...)*, void (...)** %f, align 8 + call void @run(void (...)* %tmp) + ret void +} \ No newline at end of file