diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -462,7 +462,8 @@ /// parts it can be promoted into. static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, unsigned MaxElements, bool IsRecursive, - SmallVectorImpl &ArgPartsVec) { + SmallVectorImpl &ArgPartsVec, + int &LoadStoreCount) { // Quick exit for unused arguments if (Arg->use_empty()) return true; @@ -611,6 +612,7 @@ if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) return false; Loads.push_back(LI); + ++LoadStoreCount; continue; } @@ -621,6 +623,7 @@ if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), /* GuaranteedToExecute */ false)) return false; + ++LoadStoreCount; continue; // Only stores TO the argument is allowed, all the other stores are // unknown users @@ -750,6 +753,8 @@ if (PointerArgs.empty()) return nullptr; + bool MinSize = false; + // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. @@ -764,6 +769,15 @@ if (CB->isMustTailCall()) return nullptr; + // If the caller is marked minsize, this transformation may increase code + // size. We assume that there is more than one call to this function since + // otherwise this function would be inlined or is dead. + // Below we compare the number of loads/stores removed from the function with + // the number of introduced loads in callees to see if this is profitable + // code-size-wise. + if (CB->getFunction()->hasMinSize()) + MinSize = true; + if (CB->getFunction() == F) IsRecursive = true; } @@ -798,13 +812,16 @@ // If we can promote the pointer to its value. SmallVector ArgParts; + int LoadStoreCount = 0; - if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { + if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts, + LoadStoreCount)) { SmallVector Types; for (const auto &Pair : ArgParts) Types.push_back(Pair.second.Ty); - if (areTypesABICompatible(Types, *F, TTI)) { + if (areTypesABICompatible(Types, *F, TTI) && + !(MinSize && F->hasNUsesOrMore(LoadStoreCount + 1))) { NumArgsAfterPromote += ArgParts.size() - 1; ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); } diff --git a/llvm/test/Transforms/ArgumentPromotion/minsize.ll b/llvm/test/Transforms/ArgumentPromotion/minsize.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/minsize.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=argpromotion -S < %s | FileCheck %s + +; Basic case without minsize, argpromo should happen. +define internal i32 @f1(ptr %p) { +; CHECK-LABEL: define internal i32 @f1 +; CHECK-SAME: (i32 [[P_0_VAL:%.*]]) { +; CHECK-NEXT: ret i32 [[P_0_VAL]] +; + %i = load i32, ptr %p + ret i32 %i +} + +define i32 @g1(ptr %p) { +; CHECK-LABEL: define i32 @g1 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[P_VAL:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[I:%.*]] = call i32 @f1(i32 [[P_VAL]]) +; CHECK-NEXT: ret i32 [[I]] +; + %i = call i32 @f1(ptr %p) + ret i32 %i +} + +define i32 @g2(ptr %p) { +; CHECK-LABEL: define i32 @g2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[P_VAL:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[I:%.*]] = call i32 @f1(i32 [[P_VAL]]) +; CHECK-NEXT: ret i32 [[I]] +; + %i = call i32 @f1(ptr %p) + ret i32 %i +} + +; With a minsize caller, argpromo shouldn't happen because we only eliminate one load but introduce two loads. +define internal i32 @f2(ptr %p) { +; CHECK-LABEL: define internal i32 @f2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[I]] +; + %i = load i32, ptr %p + ret i32 %i +} + +define i32 @h1(ptr %p) minsize { +; CHECK-LABEL: define i32 @h1 +; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[I:%.*]] = call i32 @f2(ptr [[P]]) +; CHECK-NEXT: ret i32 [[I]] +; + %i = call i32 @f2(ptr %p) + ret i32 %i +} + +define i32 @h2(ptr %p) { +; CHECK-LABEL: define i32 @h2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = call i32 @f2(ptr [[P]]) +; CHECK-NEXT: ret i32 [[I]] +; + %i = call i32 @f2(ptr %p) + ret i32 %i +} + +; With a minsize caller, argpromo should still happen because we eliminate two loads and introduce two loads. +define internal i32 @f3(ptr %p) { +; CHECK-LABEL: define internal i32 @f3 +; CHECK-SAME: (i32 [[P_0_VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = add i32 [[P_0_VAL]], [[P_0_VAL]] +; CHECK-NEXT: ret i32 [[R]] +; + %i = load i32, ptr %p + %i2 = load i32, ptr %p + %r = add i32 %i, %i2 + ret i32 %r +} + +define i32 @i1(ptr %p) minsize { +; CHECK-LABEL: define i32 @i1 +; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[P_VAL:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[I:%.*]] = call i32 @f3(i32 [[P_VAL]]) +; CHECK-NEXT: ret i32 [[I]] +; + %i = call i32 @f3(ptr %p) + ret i32 %i +} + +define i32 @i2(ptr %p) { +; CHECK-LABEL: define i32 @i2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[P_VAL:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[I:%.*]] = call i32 @f3(i32 [[P_VAL]]) +; CHECK-NEXT: ret i32 [[I]] +; + %i = call i32 @f3(ptr %p) + ret i32 %i +}