Skip to content

Commit a78ab77

Browse files
committedMay 2, 2019
remove inalloca parameters in globalopt and simplify argpromotion
Summary: Inalloca parameters require special handling in some optimizations. This change causes globalopt to strip the inalloca attribute from function parameters when it is safe to do so, removes the special handling for inallocas from argpromotion, and replaces it with a simple check that causes argpromotion to skip functions that receive inallocas (for when the pass is invoked on code that didn't run through globalopt first). This also avoids a case where argpromotion would incorrectly try to pass an inalloca in a register. Fixes PR41658. Reviewers: rnk, efriedma Reviewed By: rnk Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D61286 llvm-svn: 359743
1 parent 1feaee5 commit a78ab77

File tree

5 files changed

+76
-34
lines changed

5 files changed

+76
-34
lines changed
 

‎llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -566,8 +566,8 @@ static void markIndicesSafe(const IndicesVector &ToMark,
566566
/// This method limits promotion of aggregates to only promote up to three
567567
/// elements of the aggregate in order to avoid exploding the number of
568568
/// arguments passed in.
569-
static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
570-
AAResults &AAR, unsigned MaxElements) {
569+
static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
570+
unsigned MaxElements) {
571571
using GEPIndicesSet = std::set<IndicesVector>;
572572

573573
// Quick exit for unused arguments
@@ -589,17 +589,14 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
589589
//
590590
// This set will contain all sets of indices that are loaded in the entry
591591
// block, and thus are safe to unconditionally load in the caller.
592-
//
593-
// This optimization is also safe for InAlloca parameters, because it verifies
594-
// that the address isn't captured.
595592
GEPIndicesSet SafeToUnconditionallyLoad;
596593

597594
// This set contains all the sets of indices that we are planning to promote.
598595
// This makes it possible to limit the number of arguments added.
599596
GEPIndicesSet ToPromote;
600597

601598
// If the pointer is always valid, any load with first index 0 is valid.
602-
if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
599+
if (isByVal || allCallersPassInValidPointerForArgument(Arg))
603600
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
604601

605602
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -656,8 +653,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
656653
// TODO: This runs the above loop over and over again for dead GEPs
657654
// Couldn't we just do increment the UI iterator earlier and erase the
658655
// use?
659-
return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR,
660-
MaxElements);
656+
return isSafeToPromoteArgument(Arg, isByVal, AAR, MaxElements);
661657
}
662658

663659
// Ensure that all of the indices are constants.
@@ -856,6 +852,11 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
856852
if (F->isVarArg())
857853
return nullptr;
858854

855+
// Don't transform functions that receive inallocas, as the transformation may
856+
// not be safe depending on calling convention.
857+
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
858+
return nullptr;
859+
859860
// First check: see if there are any pointer arguments! If not, quick exit.
860861
SmallVector<Argument *, 16> PointerArgs;
861862
for (Argument &I : F->args())
@@ -914,8 +915,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
914915

915916
// If this is a byval argument, and if the aggregate type is small, just
916917
// pass the elements, which is always safe, if the passed value is densely
917-
// packed or if we can prove the padding bytes are never accessed. This does
918-
// not apply to inalloca.
918+
// packed or if we can prove the padding bytes are never accessed.
919919
bool isSafeToPromote =
920920
PtrArg->hasByValAttr() &&
921921
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
@@ -966,7 +966,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
966966
}
967967

968968
// Otherwise, see if we can promote the pointer to its value.
969-
if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
969+
if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr(), AAR,
970970
MaxElements))
971971
ArgsToPromote.insert(PtrArg);
972972
}

‎llvm/lib/Transforms/IPO/GlobalOpt.cpp

+20-16
Original file line numberDiff line numberDiff line change
@@ -2090,21 +2090,21 @@ static void ChangeCalleesToFastCall(Function *F) {
20902090
}
20912091
}
20922092

2093-
static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
2094-
// There can be at most one attribute set with a nest attribute.
2095-
unsigned NestIndex;
2096-
if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
2097-
return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
2093+
static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
2094+
Attribute::AttrKind A) {
2095+
unsigned AttrIndex;
2096+
if (Attrs.hasAttrSomewhere(A, &AttrIndex))
2097+
return Attrs.removeAttribute(C, AttrIndex, A);
20982098
return Attrs;
20992099
}
21002100

2101-
static void RemoveNestAttribute(Function *F) {
2102-
F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
2101+
static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
2102+
F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
21032103
for (User *U : F->users()) {
21042104
if (isa<BlockAddress>(U))
21052105
continue;
21062106
CallSite CS(cast<Instruction>(U));
2107-
CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
2107+
CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
21082108
}
21092109
}
21102110

@@ -2119,13 +2119,6 @@ static bool hasChangeableCC(Function *F) {
21192119
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
21202120
return false;
21212121

2122-
// Don't break the invariant that the inalloca parameter is the only parameter
2123-
// passed in memory.
2124-
// FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
2125-
// alloca it uses to the entry block if possible.
2126-
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
2127-
return false;
2128-
21292122
// FIXME: Change CC for the whole chain of musttail calls when possible.
21302123
//
21312124
// Can't change CC of the function that either has musttail calls, or is a
@@ -2287,6 +2280,17 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
22872280
if (!F->hasLocalLinkage())
22882281
continue;
22892282

2283+
// If we have an inalloca parameter that we can safely remove the
2284+
// inalloca attribute from, do so. This unlocks optimizations that
2285+
// wouldn't be safe in the presence of inalloca.
2286+
// FIXME: We should also hoist alloca affected by this to the entry
2287+
// block if possible.
2288+
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
2289+
!F->hasAddressTaken()) {
2290+
RemoveAttribute(F, Attribute::InAlloca);
2291+
Changed = true;
2292+
}
2293+
22902294
if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
22912295
NumInternalFunc++;
22922296
TargetTransformInfo &TTI = GetTTI(*F);
@@ -2319,7 +2323,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
23192323
!F->hasAddressTaken()) {
23202324
// The function is not used by a trampoline intrinsic, so it is safe
23212325
// to remove the 'nest' attribute.
2322-
RemoveNestAttribute(F);
2326+
RemoveAttribute(F, Attribute::Nest);
23232327
++NumNestRemoved;
23242328
Changed = true;
23252329
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; In PR41658, argpromotion put an inalloca in a position that per the
2+
; calling convention is passed in a register. This test verifies that
3+
; we don't do that anymore. It also verifies that the combination of
4+
; globalopt and argpromotion is able to optimize the call safely.
5+
;
6+
; RUN: opt -S -argpromotion %s | FileCheck --check-prefix=THIS %s
7+
; RUN: opt -S -globalopt -argpromotion %s | FileCheck --check-prefix=OPT %s
8+
; THIS: define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a
9+
; OPT: define internal fastcc void @internalfun(<{ %struct.a }>*)
10+
11+
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
12+
target triple = "i386-pc-windows-msvc19.11.0"
13+
14+
%struct.a = type { i8 }
15+
16+
define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) {
17+
entry:
18+
%a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
19+
%argmem = alloca inalloca <{ %struct.a }>, align 4
20+
%1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0
21+
%call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a)
22+
call void @ext(<{ %struct.a }>* inalloca %argmem)
23+
ret void
24+
}
25+
26+
; This is here to ensure @internalfun is live.
27+
define void @exportedfun(%struct.a* %a) {
28+
%inalloca.save = tail call i8* @llvm.stacksave()
29+
%argmem = alloca inalloca <{ %struct.a }>, align 4
30+
call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem)
31+
call void @llvm.stackrestore(i8* %inalloca.save)
32+
ret void
33+
}
34+
35+
declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1))
36+
declare void @ext(<{ %struct.a }>* inalloca)
37+
declare i8* @llvm.stacksave()
38+
declare void @llvm.stackrestore(i8*)

‎llvm/test/Transforms/ArgumentPromotion/inalloca.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
; RUN: opt %s -argpromotion -sroa -S | FileCheck %s
2-
; RUN: opt %s -passes='argpromotion,function(sroa)' -S | FileCheck %s
1+
; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s
2+
; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s
33

44
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
55

@@ -15,7 +15,7 @@ entry:
1515
%r = add i32 %a, %b
1616
ret i32 %r
1717
}
18-
; CHECK-LABEL: define internal i32 @f
18+
; CHECK-LABEL: define internal fastcc i32 @f
1919
; CHECK-NOT: load
2020
; CHECK: ret
2121

@@ -35,7 +35,7 @@ entry:
3535

3636
; Argpromote can't promote %a because of the icmp use.
3737
define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind {
38-
; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b)
38+
; CHECK: define internal fastcc i1 @g(%struct.ss* %a, %struct.ss* %b)
3939
entry:
4040
%c = icmp eq %struct.ss* %a, %b
4141
ret i1 %c
@@ -45,6 +45,6 @@ define i32 @test() {
4545
entry:
4646
%S = alloca inalloca %struct.ss
4747
%c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
48-
; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
48+
; CHECK: call fastcc i1 @g(%struct.ss* %S, %struct.ss* %S)
4949
ret i32 0
5050
}

‎llvm/test/Transforms/GlobalOpt/fastcc.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define internal i32 @j(i32* %m) {
2727
}
2828

2929
define internal i32 @inalloca(i32* inalloca %p) {
30-
; CHECK-LABEL: define internal i32 @inalloca(i32* inalloca %p)
30+
; CHECK-LABEL: define internal fastcc i32 @inalloca(i32* %p)
3131
%rv = load i32, i32* %p
3232
ret i32 %rv
3333
}
@@ -52,4 +52,4 @@ define void @call_things() {
5252
; CHECK: call fastcc i32 @g
5353
; CHECK: call coldcc i32 @h
5454
; CHECK: call i32 @j
55-
; CHECK: call i32 @inalloca(i32* inalloca %args)
55+
; CHECK: call fastcc i32 @inalloca(i32* %args)

0 commit comments

Comments
 (0)
Please sign in to comment.