Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -812,6 +812,12 @@ !CI->getAttributes().hasAttrSomewhere(Attribute::ByVal)) return ModRefInfo::NoModRef; + // Stack restore is able to modify unescaped dynamic allocas. Assume it may + // modify them even though the alloca is not escaped. + if (auto *AI = dyn_cast(Object)) + if (!AI->isStaticAlloca() && isIntrinsicCall(CS, Intrinsic::stackrestore)) + return ModRefInfo::Mod; + // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer // as an argument, and itself doesn't capture it. Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1060,6 +1060,8 @@ UseMemMove = true; // If all checks passed, then we can transform M. + LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n" + << *MDep << '\n' << *M << '\n'); // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. Index: llvm/test/Transforms/MemCpyOpt/stackrestore.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MemCpyOpt/stackrestore.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -memcpyopt < %s | FileCheck %s + +; PR40118: BasicAA didn't realize that stackrestore ends the lifetime of +; unescaped dynamic allocas, such as those that might come from inalloca. + +source_filename = "t.cpp" +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-unknown-windows-msvc19.14.26433" + +@str = internal constant [9 x i8] c"abcdxxxxx" + +define i32 @main() { + %tmpmem = alloca [10 x i8], align 4 + %tmp = getelementptr inbounds [10 x i8], [10 x i8]* %tmpmem, i32 0, i32 0 + %inalloca.save = tail call i8* @llvm.stacksave() + %argmem = alloca inalloca [10 x i8], align 4 + %p = getelementptr inbounds [10 x i8], [10 x i8]* %argmem, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) + + ; This extra byte exists to prevent memcpyopt from propagating @str. + %p10 = getelementptr inbounds [10 x i8], [10 x i8]* %argmem, i32 0, i32 9 + store i8 0, i8* %p10 + + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false) + call void @llvm.stackrestore(i8* %inalloca.save) + %heap = call i8* @malloc(i32 9) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %tmp, i32 9, i1 false) + call void @useit(i8* %heap) + ret i32 0 +} + +; CHECK-LABEL: define i32 @main() +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %tmp, i32 9, i1 false) + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) +declare i8* @malloc(i32) +declare void @useit(i8*)