Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -880,6 +880,11 @@ const Value *Object = getUnderlyingObject(Loc.Ptr); + // Stack restore is able to modify unescaped dynamic allocas. Assume it may + // modify them even though the alloca is not escaped. + if (isa(Object) && isIntrinsicCall(Call, Intrinsic::stackrestore)) + return ModRefInfo::Mod; + // Calls marked 'tail' cannot read or write allocas from the current frame // because the current frame might be destroyed by the time they run. However, // a tail call may use an alloca with byval. Calling with byval copies the @@ -891,12 +896,6 @@ !CI->getAttributes().hasAttrSomewhere(Attribute::ByVal)) return ModRefInfo::NoModRef; - // Stack restore is able to modify unescaped dynamic allocas. Assume it may - // modify them even though the alloca is not escaped. - if (auto *AI = dyn_cast(Object)) - if (!AI->isStaticAlloca() && isIntrinsicCall(Call, Intrinsic::stackrestore)) - return ModRefInfo::Mod; - // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer // as an argument, and itself doesn't capture it. Index: llvm/test/Transforms/MemCpyOpt/stackrestore.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/stackrestore.ll +++ llvm/test/Transforms/MemCpyOpt/stackrestore.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s -verify-memoryssa | FileCheck %s +; RUN: opt -S -passes=memcpyopt -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-TEST2 ; PR40118: BasicAA didn't realize that stackrestore ends the lifetime of ; unescaped dynamic allocas, such as those that might come from inalloca. @@ -80,6 +81,61 @@ ret i32 0 } +; Test that memcpyopt does not change the final memcpy because the source +; is an alloca that is clobbered by a call to stackrestore + +; Function Attrs: nobuiltin norecurse +define dso_local void @test() { +; CHECK-TEST2-LABEL: @test +; CHECK-TEST2-NOT: ret void +; CHECK-TEST2: tail call void @llvm.stackrestore(ptr %SS) +; CHECK-TEST2: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(56) %uglygep123, ptr noundef nonnull align 8 dereferenceable(56) %A1, i32 56, i1 false) +; CHECK-TEST2: ret void +entry: + %A1 = alloca [56 x i8], align 8 + %SS = tail call ptr @llvm.stacksave() + %A2 = alloca [56 x i8], align 4 + store i8 1, ptr %A2, align 4 + %GEP1 = getelementptr inbounds i8, ptr %A2, i32 8 + store i8 1, ptr %GEP1, align 4 + %GEP2 = getelementptr inbounds i8, ptr %A2, i32 12 + store i8 1, ptr %GEP2, align 4 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(56) %A1, ptr noundef nonnull align 4 dereferenceable(56) %A2, i32 56, i1 false) + tail call void @llvm.stackrestore(ptr %SS) + %A3 = alloca [56 x i8], align 4 + %uglygep123 = getelementptr i8, ptr %A3, i32 0 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(56) %uglygep123, ptr noundef nonnull align 8 dereferenceable(56) %A1, i32 56, i1 false) + ret void +} + +; Control test: mimic the previous test but substitute different functions +; for the intrinsics stacksave and stackrestore. Test that memcpyopt does +; optimize the final memcpy. + +; Function Attrs: nobuiltin norecurse +define dso_local void @control() { +; CHECK-TEST2-LABEL: @control +; CHECK-TEST2-NOT: ret void +; CHECK-TEST2: tail call void @useit(ptr %CSS) +; CHECK-TEST2: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %Cuglygep123, ptr align 4 %CA2, i32 56, i1 false) +; CHECK-TEST2: ret void +entry: + %CA1 = alloca [56 x i8], align 8 + %CSS = tail call ptr @external() + %CA2 = alloca [56 x i8], align 4 + store i8 1, ptr %CA2, align 4 + %CGEP1 = getelementptr inbounds i8, ptr %CA2, i32 8 + store i8 1, ptr %CGEP1, align 4 + %CGEP2 = getelementptr inbounds i8, ptr %CA2, i32 12 + store i8 1, ptr %CGEP2, align 4 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(56) %CA1, ptr noundef nonnull align 4 dereferenceable(56) %CA2, i32 56, i1 false) + tail call void @useit(ptr %CSS) + %CA3 = alloca [56 x i8], align 4 + %Cuglygep123 = getelementptr i8, ptr %CA3, i32 0 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(56) %Cuglygep123, ptr noundef nonnull align 8 dereferenceable(56) %CA1, i32 56, i1 false) + ret void +} + declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) declare ptr @llvm.stacksave() declare void @llvm.stackrestore(ptr)