diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -566,7 +566,7 @@ /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. -static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, +static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR, unsigned MaxElements) { using GEPIndicesSet = std::set; @@ -589,9 +589,6 @@ // // This set will contain all sets of indices that are loaded in the entry // block, and thus are safe to unconditionally load in the caller. - // - // This optimization is also safe for InAlloca parameters, because it verifies - // that the address isn't captured. GEPIndicesSet SafeToUnconditionallyLoad; // This set contains all the sets of indices that we are planning to promote. @@ -599,7 +596,7 @@ GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg)) + if (isByVal || allCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -656,8 +653,7 @@ // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR, - MaxElements); + return isSafeToPromoteArgument(Arg, isByVal, AAR, MaxElements); } // Ensure that all of the indices are constants. @@ -856,6 +852,11 @@ if (F->isVarArg()) return nullptr; + // Don't transform functions that receive inallocas, as the transformation may + // not be safe depending on calling convention. + if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) + return nullptr; + // First check: see if there are any pointer arguments! If not, quick exit. SmallVector PointerArgs; for (Argument &I : F->args()) @@ -914,8 +915,7 @@ // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely - // packed or if we can prove the padding bytes are never accessed. This does - // not apply to inalloca. + // packed or if we can prove the padding bytes are never accessed. bool isSafeToPromote = PtrArg->hasByValAttr() && (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); @@ -966,7 +966,7 @@ } // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr(), AAR, MaxElements)) ArgsToPromote.insert(PtrArg); } diff --git a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll --- a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll @@ -1,50 +1,35 @@ -; RUN: opt %s -argpromotion -sroa -S | FileCheck %s -; RUN: opt %s -passes='argpromotion,function(sroa)' -S | FileCheck %s +; PR41658 happened because we remove parameters from an x86_thiscallcc function +; and then try to pass an inalloca in a register. This test verifies that we +; don't do that anymore. +; +; RUN: opt -S -argpromotion %s | FileCheck %s +; CHECK: define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc19.11.0" -%struct.ss = type { i32, i32 } +%struct.a = type { i8 } -; Argpromote + sroa should change this to passing the two integers by value. -define internal i32 @f(%struct.ss* inalloca %s) { +define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) { entry: - %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0 - %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1 - %a = load i32, i32* %f0, align 4 - %b = load i32, i32* %f1, align 4 - %r = add i32 %a, %b - ret i32 %r + %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0 + %argmem = alloca inalloca <{ %struct.a }>, align 4 + %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0 + %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a) + call void @ext(<{ %struct.a }>* inalloca %argmem) + ret void } -; CHECK-LABEL: define internal i32 @f -; CHECK-NOT: load -; CHECK: ret -define i32 @main() { -entry: - %S = alloca inalloca %struct.ss - %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 - %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 - store i32 1, i32* %f0, align 4 - store i32 2, i32* %f1, align 4 - %r = call i32 @f(%struct.ss* inalloca %S) - ret i32 %r -} -; CHECK-LABEL: define i32 @main -; CHECK-NOT: load -; CHECK: ret - -; Argpromote can't promote %a because of the icmp use. -define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { -; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) -entry: - %c = icmp eq %struct.ss* %a, %b - ret i1 %c +; This is here to ensure @internalfun is live. +define void @exportedfun(%struct.a* %a) { + %inalloca.save = tail call i8* @llvm.stacksave() + %argmem = alloca inalloca <{ %struct.a }>, align 4 + call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem) + call void @llvm.stackrestore(i8* %inalloca.save) + ret void } -define i32 @test() { -entry: - %S = alloca inalloca %struct.ss - %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) -; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) - ret i32 0 -} +declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1)) +declare void @ext(<{ %struct.a }>* inalloca) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*)