Index: lib/IR/Function.cpp =================================================================== --- lib/IR/Function.cpp +++ lib/IR/Function.cpp @@ -154,10 +154,8 @@ /// it in its containing function. bool Argument::hasStructRetAttr() const { if (!getType()->isPointerTy()) return false; - if (this != getParent()->arg_begin()) - return false; // StructRet param must be first param return getParent()->getAttributes(). - hasAttribute(1, Attribute::StructRet); + hasAttribute(getArgNo()+1, Attribute::StructRet); } /// hasReturnedAttr - Return true if this argument has the returned attribute on Index: lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- lib/Transforms/IPO/ArgumentPromotion.cpp +++ lib/Transforms/IPO/ArgumentPromotion.cpp @@ -245,6 +245,24 @@ Argument *PtrArg = PointerArgs[i]; Type *AgTy = cast(PtrArg->getType())->getElementType(); + // Replace sret attribute with noalias. This reduces register pressure by + // avoiding a register copy. + if (PtrArg->hasStructRetAttr()) { + unsigned ArgNo = PtrArg->getArgNo(); + F->setAttributes( + F->getAttributes() + .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) + .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + for (Use &U : F->uses()) { + CallSite CS(U.getUser()); + CS.setAttributes( + CS.getAttributes() + .removeAttribute(F->getContext(), ArgNo + 1, + Attribute::StructRet) + .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + } + } + // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. This does Index: test/Transforms/ArgumentPromotion/sret.ll =================================================================== --- /dev/null +++ test/Transforms/ArgumentPromotion/sret.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -argpromotion -S | FileCheck %s + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +; CHECK: define internal void @add(i32 %[[THIS1:.*]], i32 %[[THIS2:.*]], i32* noalias %[[SR:.*]]) +define internal void @add({i32, i32}* %this, i32* sret %r) { + %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0 + %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1 + %a = load i32, i32* %ap + %b = load i32, i32* %bp + ; CHECK: %[[AB:.*]] = add i32 %[[THIS1]], %[[THIS2]] + %ab = add i32 %a, %b + ; CHECK: store i32 %[[AB]], i32* %[[SR]] + store i32 %ab, i32* %r + ret void +} + +; CHECK: define void @f() +define void @f() { + ; CHECK: %[[R:.*]] = alloca i32 + %r = alloca i32 + %pair = alloca {i32, i32} + + ; CHECK: call void @add(i32 %{{.*}}, i32 %{{.*}}, i32* noalias %[[R]]) + call void @add({i32, i32}* %pair, i32* sret %r) + ret void +}