Index: lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- lib/Transforms/IPO/ArgumentPromotion.cpp +++ lib/Transforms/IPO/ArgumentPromotion.cpp @@ -78,6 +78,8 @@ const DataLayout *DL; private: + bool isDenselyPacked(Type *); + bool canPaddingBeAccessed(Argument *Arg); CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; CallGraphNode *DoPromotion(Function *F, @@ -125,6 +127,79 @@ return Changed; } +// hasPadding - This method checks to see if a type could have padding bytes +bool ArgPromotion::isDenselyPacked(Type *type) { + + // There is no size information be conservative + if (!type->isSized()) + return false; + + // If the alloc size is not equal to the storage size, then there are padding + // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128 + if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type)) + return false; + + if (!isa(type)) + return true; + + // For homogenous sequential types, check for padding within members + if (SequentialType *seqTy = dyn_cast(type)) + return isa(seqTy) || isDenselyPacked(seqTy->getElementType()); + + // Check for padding within and between elements of a struct + StructType *StructTy = cast(type); + const StructLayout *Layout = DL->getStructLayout(StructTy); + uint64_t StartPos = 0; + for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { + Type *ElTy = StructTy->getElementType(i); + if (!isDenselyPacked(ElTy)) + return false; + if (StartPos != Layout->getElementOffsetInBits(i)) + return false; + StartPos += DL->getTypeAllocSizeInBits(ElTy); + } + + return true; +} + +/// canPaddingBeRead - This method checks to see if the padding bytes of an +/// argument could be accessed. +bool ArgPromotion::canPaddingBeAccessed(Argument *arg) { + + assert(arg->hasByValAttr()); + + // Track all the pointers to the argument to make sure they are not captured + SmallPtrSet PtrValues; + PtrValues.insert(arg); + + // Track all of the stores + SmallVector Stores; + + // Scan through the uses recursively to make sure the pointer is always used + // sanely + SmallVector WorkList; + WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); + while (!WorkList.empty()) { + Value *V = WorkList.back(); + WorkList.pop_back(); + if (isa(V) || isa(V)) { + if (PtrValues.insert(V)) + WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); + } else if (StoreInst *Store = dyn_cast(V)) { + Stores.push_back(Store); + } else if (!isa(V)) { + return true; + } + } + +// Check to make sure the pointers aren't captured + for (StoreInst *Store : Stores) + if (PtrValues.count(Store->getValueOperand())) + return true; + + return false; +} + /// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it @@ -172,9 +247,13 @@ Type *AgTy = cast(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe. This does not apply to - // inalloca. - if (PtrArg->hasByValAttr()) { + // pass the elements, which is always safe, if the passed value is densely + // packed or if we can prove the padding bytes are never accessed. This does + // not apply to inalloca. + bool isSafeToPromote = + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg)); + if (isSafeToPromote) { if (StructType *STy = dyn_cast(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" Index: test/Transforms/ArgumentPromotion/fp80.ll =================================================================== --- test/Transforms/ArgumentPromotion/fp80.ll +++ test/Transforms/ArgumentPromotion/fp80.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s -argpromotion -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%union.u = type { x86_fp80 } +%struct.s = type { double, i16, i8, [5 x i8] } + +@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16 + +%struct.Foo = type { i32, i64 } +@a = internal global %struct.Foo { i32 1, i64 2 }, align 8 + +define void @run() { +entry: + tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) + tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) + call i64 @AccessPaddingOfStruct(%struct.Foo* @a) + call i64 @CaptureAStruct(%struct.Foo* @a) + ret void +} + +; CHECK: internal i8 @UseLongDoubleUnsafely(%union.u* byval +define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) { +entry: + %bitcast = bitcast %union.u* %arg to %struct.s* + %gep = getelementptr inbounds %struct.s* %bitcast, i64 0, i32 2 + %result = load i8* %gep + ret i8 %result +} + +; CHECK: internal x86_fp80 @UseLongDoubleSafely(x86_fp80 +define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) { + %gep = getelementptr inbounds %union.u* %arg, i64 0, i32 0 + %fp80 = load x86_fp80* %gep + ret x86_fp80 %fp80 +} + +; CHECK: define internal i64 @AccessPaddingOfStruct(%struct.Foo* +define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) { + %p = bitcast %struct.Foo* %a to i64* + %v = load i64* %p + ret i64 %v +} + +; CHECK: define internal i64 @CaptureAStruct(%struct.Foo* byval %a) { +define internal i64 @CaptureAStruct(%struct.Foo* byval %a) { +entry: + %a_ptr = alloca %struct.Foo* + br label %loop + +loop: + %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ] + %0 = phi %struct.Foo* [ %a, %entry ], [ %0, %loop ] + store %struct.Foo* %phi, %struct.Foo** %a_ptr + %gep = getelementptr %struct.Foo* %a, i64 0 + br label %loop +} Index: test/Transforms/ArgumentPromotion/tail.ll =================================================================== --- test/Transforms/ArgumentPromotion/tail.ll +++ test/Transforms/ArgumentPromotion/tail.ll @@ -1,6 +1,8 @@ ; RUN: opt %s -argpromotion -S -o - | FileCheck %s ; PR14710 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + %pair = type { i32, i32 } declare i8* @foo(%pair*)