Index: lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- lib/Transforms/IPO/ArgumentPromotion.cpp +++ lib/Transforms/IPO/ArgumentPromotion.cpp @@ -36,6 +36,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" @@ -78,6 +79,8 @@ const DataLayout *DL; private: + bool isDenselyPacked(Type *); + bool canPaddingBeAccessed(Argument *Arg); CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; CallGraphNode *DoPromotion(Function *F, @@ -125,6 +128,68 @@ return Changed; } +// hasPadding - This method checks to see if a type could have padding bytes +bool ArgPromotion::isDenselyPacked(Type *type) { + + // There is no size information be conservative + if (!type->isSized()) + return false; + + // If the alloc size is not equal to the storage size, then there are padding + // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128 + if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type)) + return false; + + if (!isa(type)) + return true; + + // For homogenous sequential types, check for padding within members + if (SequentialType *seqTy = dyn_cast(type)) + return isa(seqTy) || isDenselyPacked(seqTy->getElementType()); + + // Check for padding within and between elements of a struct + StructType *StructTy = cast(type); + const StructLayout *Layout = DL->getStructLayout(StructTy); + uint64_t StartPos = 0; + for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { + Type *ElTy = StructTy->getElementType(i); + if (!isDenselyPacked(ElTy)) + return false; + if (StartPos != Layout->getElementOffsetInBits(i)) + return false; + StartPos += DL->getTypeAllocSizeInBits(ElTy); + } + + return true; +} + +/// canPaddingBeRead - This method checks to see if the padding bytes of an +/// argument could be accessed. Currently, the method is conservative in the +/// face of PHINodes, but it's easy to extend if necessary +bool ArgPromotion::canPaddingBeAccessed(Argument *arg) { + + assert(arg->hasByValAttr()); + + // Check that the pointer is not captured, if the pointer is captured, it is + // very hard to make guarantees about which offsets are accessed + if (PointerMayBeCaptured(arg, true, true)) return true; + + // Scan through the uses recursively to make sure the pointer is always used + // sanely + SmallVector WorkList; + WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); + while (!WorkList.empty()) { + Value *V = WorkList.back(); + WorkList.pop_back(); + if (isa(V)) + WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); + else if (!isa(V) && !isa(V)) + return true; + } + + return false; +} + /// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it @@ -172,9 +237,13 @@ Type *AgTy = cast(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe. This does not apply to - // inalloca. - if (PtrArg->hasByValAttr()) { + // pass the elements, which is always safe, if the passed value is densely + // packed or if we can prove the padding bytes are never accessed. This does + // not apply to inalloca. + bool isSafeToPromote = + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg)); + if (isSafeToPromote) { if (StructType *STy = dyn_cast(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" Index: test/Transforms/ArgumentPromotion/fp80.ll =================================================================== --- test/Transforms/ArgumentPromotion/fp80.ll +++ test/Transforms/ArgumentPromotion/fp80.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -argpromotion -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%union.u = type { x86_fp80 } + +@b = internal global { { double, i16, i8, [5 x i8] } } { { double, i16, i8, [5 x i8] } { double 3.14, i16 9439, i8 25, [5 x i8] undef } }, align 16 + +%struct.Foo = type { i32, i64 } +@a = internal global %struct.Foo { i32 1, i64 2 }, align 8 + +; Function Attrs: nounwind uwtable +define i32 @main(i32 %argc, i8** nocapture readnone %argv) { +entry: + tail call fastcc i8 @foo(%union.u* byval align 16 bitcast ({ { double, i16, i8, [5 x i8] } }* @b to %union.u*)) + tail call fastcc x86_fp80 @baz(%union.u* byval align 16 bitcast ({ { double, i16, i8, [5 x i8] } }* @b to %union.u*)) + call i64 @qux(%struct.Foo* @a) + ret i32 0 +} + +; CHECK: internal fastcc i8 @foo(%union.u* byval +; Function Attrs: noinline nounwind uwtable +define internal fastcc i8 @foo(%union.u* byval nocapture readonly align 16 %arg) { +entry: + %bitcast = bitcast %union.u* %arg to { double, i16, i8, [5 x i8] }* + %gep = getelementptr inbounds { double, i16, i8, [5 x i8] }* %bitcast, i64 0, i32 2 + %result = load i8* %gep + ret i8 %result +} + +; CHECK: internal fastcc x86_fp80 @baz(x86_fp80 +; Function Attrs: noinline nounwind uwtable +define internal fastcc x86_fp80 @baz(%union.u* byval nocapture readonly align 16 %arg) { + %gep = getelementptr inbounds %union.u* %arg, i64 0, i32 0 + %fp80 = load x86_fp80* %gep + ret x86_fp80 %fp80 +} + +; CHECK: define internal i64 @qux(%struct.Foo* +define internal i64 @qux(%struct.Foo* byval %a) { + %p = bitcast %struct.Foo* %a to i64* + %v = load i64* %p + ret i64 %v +} Index: test/Transforms/ArgumentPromotion/tail.ll =================================================================== --- test/Transforms/ArgumentPromotion/tail.ll +++ test/Transforms/ArgumentPromotion/tail.ll @@ -1,6 +1,8 @@ ; RUN: opt %s -argpromotion -S -o - | FileCheck %s ; PR14710 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + %pair = type { i32, i32 } declare i8* @foo(%pair*)