Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -314,7 +314,8 @@ /// /// Note that this will create all of the instructions with whatever insert /// point the \c InstCombiner currently is using. -static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) { +static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, + const Twine &Suffix = "") { Value *Ptr = LI.getPointerOperand(); unsigned AS = LI.getPointerAddressSpace(); SmallVector, 8> MD; @@ -322,7 +323,7 @@ LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)), - LI.getAlignment(), LI.getName()); + LI.getAlignment(), LI.getName() + Suffix); MDBuilder MDB(NewLoad->getContext()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; @@ -495,6 +496,31 @@ return nullptr; } +static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!LI.isSimple()) + return nullptr; + + Type *T = LI.getType(); + if (!T->isAggregateType()) + return nullptr; + + assert(LI.getAlignment() && "Alignement must be set at this point"); + + if (auto *ST = dyn_cast(T)) { + // If the struct only have one element, we unpack. + if (ST->getNumElements() == 1) { + LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U), + ".unpack"); + return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + UndefValue::get(T), NewLoad, 0, LI.getName())); + } + } + + return nullptr; +} + // If we can determine that all possible objects pointed to by the provided // pointer value are, not only dereferenceable, but also definitively less than // or equal to the provided maximum size, then return true. Otherwise, return @@ -701,6 +727,9 @@ // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return nullptr; + if (Instruction *Res = unpackLoadToAggregate(*this, LI)) + return Res; + // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. @@ -832,7 +861,7 @@ if (!T->isAggregateType()) return false; - if (StructType *ST = dyn_cast(T)) { + if (auto *ST = dyn_cast(T)) { // If the struct only have one element, we unpack. if (ST->getNumElements() == 1) { V = IC.Builder->CreateExtractValue(V, 0); Index: test/Transforms/InstCombine/unpack-fca.ll =================================================================== --- test/Transforms/InstCombine/unpack-fca.ll +++ test/Transforms/InstCombine/unpack-fca.ll @@ -12,20 +12,58 @@ declare i8* @allocmemory(i64) -define void @structA() { +define void @storeA() { body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to %A* +; CHECK-LABEL: storeA ; CHECK: store %A__vtbl* @A__vtblZ store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8 ret void } -define void @structOfA() { +define void @storeStructOfA() { body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to { %A }* +; CHECK-LABEL: storeStructOfA ; CHECK: store %A__vtbl* @A__vtblZ store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8 ret void } + +define %A @loadA() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %A* +; CHECK-LABEL: loadA +; CHECK: load %A__vtbl*, +; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 + %2 = load %A, %A* %1, align 8 + ret %A %2 +} + +define { %A } @loadStructOfA() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to { %A }* +; CHECK-LABEL: loadStructOfA +; CHECK: load %A__vtbl*, +; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 +; CHECK: insertvalue { %A } undef, %A {{.*}}, 0 + %2 = load { %A }, { %A }* %1, align 8 + ret { %A } %2 +} + +define { %A } @structOfA() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to { %A }* +; CHECK-LABEL: structOfA +; CHECK: store %A__vtbl* @A__vtblZ + store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8 + %2 = load { %A }, { %A }* %1, align 8 +; CHECK-NOT: load +; CHECK: ret { %A } { %A { %A__vtbl* @A__vtblZ } } + ret { %A } %2 +}