Index: include/llvm/IR/DataLayout.h =================================================================== --- include/llvm/IR/DataLayout.h +++ include/llvm/IR/DataLayout.h @@ -475,7 +475,8 @@ class StructLayout { uint64_t StructSize; unsigned StructAlignment; - unsigned NumElements; + bool IsPadded : 1; + unsigned NumElements : 31; uint64_t MemberOffsets[1]; // variable sized array! public: uint64_t getSizeInBytes() const { return StructSize; } @@ -484,6 +485,10 @@ unsigned getAlignment() const { return StructAlignment; } + /// Returns whether the struct has padding or not between its fields. + /// NB: Padding in nested element is not taken into account. + bool hasPadding() const { return IsPadded; } + /// \brief Given a valid byte offset into the structure, returns the structure /// index that contains it. unsigned getElementContainingOffset(uint64_t Offset) const; Index: lib/IR/DataLayout.cpp =================================================================== --- lib/IR/DataLayout.cpp +++ lib/IR/DataLayout.cpp @@ -41,6 +41,7 @@ assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); StructAlignment = 0; StructSize = 0; + IsPadded = false; NumElements = ST->getNumElements(); // Loop over each of the elements, placing them in memory. @@ -49,8 +50,10 @@ unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty); // Add padding if necessary to align the data element properly. - if ((StructSize & (TyAlign-1)) != 0) + if ((StructSize & (TyAlign-1)) != 0) { + IsPadded = true; StructSize = RoundUpToAlignment(StructSize, TyAlign); + } // Keep track of maximum alignment constraint. StructAlignment = std::max(TyAlign, StructAlignment); @@ -64,8 +67,10 @@ // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. - if ((StructSize & (StructAlignment-1)) != 0) + if ((StructSize & (StructAlignment-1)) != 0) { + IsPadded = true; StructSize = RoundUpToAlignment(StructSize, StructAlignment); + } } Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -524,12 +524,40 @@ if (auto *ST = dyn_cast(T)) { // If the struct only have one element, we unpack. - if (ST->getNumElements() == 1) { + unsigned Count = ST->getNumElements(); + if (Count == 1) { LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U), ".unpack"); return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue( UndefValue::get(T), NewLoad, 0, LI.getName())); } + + // We don't want to break loads with padding here as we'd loose + // the knowledge that padding exists for the rest of the pipeline. + const DataLayout &DL = IC.getDataLayout(); + auto *SL = DL.getStructLayout(ST); + if (SL->hasPadding()) + return nullptr; + + auto Name = LI.getName(); + auto LoadName = LI.getName() + ".unpack"; + auto EltName = Name + ".elt"; + auto *Addr = LI.getPointerOperand(); + Value *V = UndefValue::get(T); + auto *IdxType = Type::getInt32Ty(ST->getContext()); + auto *Zero = ConstantInt::get(IdxType, 0); + for (unsigned i = 0; i < Count; i++) { + Value *Indices[2] = { + Zero, + ConstantInt::get(IdxType, i), + }; + auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), EltName); + auto *L = IC.Builder->CreateLoad(ST->getTypeAtIndex(i), Ptr, LoadName); + V = IC.Builder->CreateInsertValue(V, L, i); + } + + V->setName(Name); + return IC.ReplaceInstUsesWith(LI, V); } if (auto *AT = dyn_cast(T)) { @@ -902,11 +930,36 @@ if (auto *ST = dyn_cast(T)) { // If the struct only have one element, we unpack. - if (ST->getNumElements() == 1) { + unsigned Count = ST->getNumElements(); + if (Count == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } + + // We don't want to break loads with padding here as we'd loose + // the knowledge that padding exists for the rest of the pipeline. + const DataLayout &DL = IC.getDataLayout(); + auto *SL = DL.getStructLayout(ST); + if (SL->hasPadding()) + return false; + + auto EltName = V->getName() + ".elt"; + auto *Addr = SI.getPointerOperand(); + auto AddrName = Addr->getName() + ".repack"; + auto *IdxType = Type::getInt32Ty(ST->getContext()); + auto *Zero = ConstantInt::get(IdxType, 0); + for (unsigned i = 0; i < Count; i++) { + Value *Indices[2] = { + Zero, + ConstantInt::get(IdxType, i), + }; + auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName); + auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + IC.Builder->CreateStore(Val, Ptr); + } + + return true; } if (auto *AT = dyn_cast(T)) { Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2311,9 +2311,10 @@ } if (LoadInst *L = dyn_cast(Agg)) // If the (non-volatile) load only has one use, we can rewrite this to a - // load from a GEP. This reduces the size of the load. - // FIXME: If a load is used only by extractvalue instructions then this - // could be done regardless of having multiple uses. + // load from a GEP. This reduces the size of the load. If a load is used + // only by extractvalue instructions then this either must have been + // optimized before, or it is a struct with padding, in which case we + // don't want to do the transformation as it loses padding knowledge. if (L->isSimple() && L->hasOneUse()) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector Indices; Index: test/Transforms/InstCombine/cast.ll =================================================================== --- test/Transforms/InstCombine/cast.ll +++ test/Transforms/InstCombine/cast.ll @@ -722,7 +722,7 @@ ; CHECK: ret i1 false } -%s = type { i32, i32, i32 } +%s = type { i32, i32, i16 } define %s @test68(%s *%p, i64 %i) { ; CHECK-LABEL: @test68( Index: test/Transforms/InstCombine/extractvalue.ll =================================================================== --- test/Transforms/InstCombine/extractvalue.ll +++ test/Transforms/InstCombine/extractvalue.ll @@ -48,16 +48,16 @@ ; CHECK: call {{.*}}(i32 [[LOAD]]) ; CHECK-NOT: extractvalue ; CHECK: ret i32 [[LOAD]] -define i32 @extract2gep({i32, i32}* %pair, i32* %P) { +define i32 @extract2gep({i16, i32}* %pair, i32* %P) { ; The load + extractvalue should be converted ; to an inbounds gep + smaller load. ; The new load should be in the same spot as the old load. - %L = load {i32, i32}, {i32, i32}* %pair + %L = load {i16, i32}, {i16, i32}* %pair store i32 0, i32* %P br label %loop loop: - %E = extractvalue {i32, i32} %L, 1 + %E = extractvalue {i16, i32} %L, 1 %C = call i32 @baz(i32 %E) store i32 %C, i32* %P %cond = icmp eq i32 %C, 0 @@ -67,17 +67,17 @@ ret i32 %E } -; CHECK-LABEL: define i32 @doubleextract2gep( +; CHECK-LABEL: define i16 @doubleextract2gep( ; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %arg, i64 0, i32 1, i32 1 -; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32, i32* [[GEP]] -; CHECK-NEXT: ret i32 [[LOAD]] -define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) { +; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i16, i16* [[GEP]] +; CHECK-NEXT: ret i16 [[LOAD]] +define i16 @doubleextract2gep({i16, {i32, i16}}* %arg) { ; The load + extractvalues should be converted ; to a 3-index inbounds gep + smaller load. - %L = load {i32, {i32, i32}}, {i32, {i32, i32}}* %arg - %E1 = extractvalue {i32, {i32, i32}} %L, 1 - %E2 = extractvalue {i32, i32} %E1, 1 - ret i32 %E2 + %L = load {i16, {i32, i16}}, {i16, {i32, i16}}* %arg + %E1 = extractvalue {i16, {i32, i16}} %L, 1 + %E2 = extractvalue {i32, i16} %E1, 1 + ret i16 %E2 } ; CHECK: define i32 @nogep-multiuse Index: test/Transforms/InstCombine/unpack-fca.ll =================================================================== --- test/Transforms/InstCombine/unpack-fca.ll +++ test/Transforms/InstCombine/unpack-fca.ll @@ -5,110 +5,134 @@ %A__vtbl = type { i8*, i32 (%A*)* } %A = type { %A__vtbl* } +%B = type { i8*, i64 } @A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo } declare i32 @A.foo(%A* nocapture %this) -declare i8* @allocmemory(i64) - -define void @storeA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to %A* +define void @storeA(%A* %a.ptr) { ; CHECK-LABEL: storeA -; CHECK: store %A__vtbl* @A__vtblZ - store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0 +; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: ret void + store %A { %A__vtbl* @A__vtblZ }, %A* %a.ptr, align 8 + ret void +} + +define void @storeB(%B* %b.ptr) { +; CHECK-LABEL: storeB +; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0 +; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8 +; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1 +; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8 +; CHECK-NEXT: ret void + store %B { i8* null, i64 42 }, %B* %b.ptr, align 8 ret void } -define void @storeStructOfA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to { %A }* +define void @storeStructOfA({ %A }* %sa.ptr) { ; CHECK-LABEL: storeStructOfA -; CHECK: store %A__vtbl* @A__vtblZ - store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 +; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: ret void + store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8 ret void } -define void @storeArrayOfA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to [1 x %A]* +define void @storeArrayOfA([1 x %A]* %aa.ptr) { ; CHECK-LABEL: storeArrayOfA -; CHECK: store %A__vtbl* @A__vtblZ - store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %1, align 8 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0 +; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: ret void + store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %aa.ptr, align 8 ret void } -define void @storeStructOfArrayOfA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to { [1 x %A] }* +define void @storeStructOfArrayOfA({ [1 x %A] }* %saa.ptr) { ; CHECK-LABEL: storeStructOfArrayOfA -; CHECK: store %A__vtbl* @A__vtblZ - store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %1, align 8 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0 +; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: ret void + store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %saa.ptr, align 8 ret void } -define %A @loadA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to %A* +define %A @loadA(%A* %a.ptr) { ; CHECK-LABEL: loadA -; CHECK: load %A__vtbl*, -; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 - %2 = load %A, %A* %1, align 8 - ret %A %2 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0 +; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: [[IV:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0 +; CHECK-NEXT: ret %A [[IV]] + %1 = load %A, %A* %a.ptr, align 8 + ret %A %1 } -define { %A } @loadStructOfA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to { %A }* +define %B @loadB(%B* %b.ptr) { +; CHECK-LABEL: loadB +; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0 +; CHECK-NEXT: [[LOAD1:%[a-z0-9\.]+]] = load i8*, i8** [[GEP1]], align 8 +; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD1]], 0 +; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1 +; CHECK-NEXT: [[LOAD2:%[a-z0-9\.]+]] = load i64, i64* [[GEP2]], align 8 +; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue %B [[IV1]], i64 [[LOAD2]], 1 +; CHECK-NEXT: ret %B [[IV2]] + %1 = load %B, %B* %b.ptr, align 8 + ret %B %1 +} + +define { %A } @loadStructOfA({ %A }* %sa.ptr) { ; CHECK-LABEL: loadStructOfA -; CHECK: load %A__vtbl*, -; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 -; CHECK: insertvalue { %A } undef, %A {{.*}}, 0 - %2 = load { %A }, { %A }* %1, align 8 - ret { %A } %2 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 +; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0 +; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue { %A } undef, %A [[IV1]], 0 +; CHECK-NEXT: ret { %A } [[IV2]] + %1 = load { %A }, { %A }* %sa.ptr, align 8 + ret { %A } %1 } -define [1 x %A] @loadArrayOfA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to [1 x %A]* +define [1 x %A] @loadArrayOfA([1 x %A]* %aa.ptr) { ; CHECK-LABEL: loadArrayOfA -; CHECK: load %A__vtbl*, -; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 -; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0 - %2 = load [1 x %A], [1 x %A]* %1, align 8 - ret [1 x %A] %2 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0 +; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0 +; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0 +; CHECK-NEXT: ret [1 x %A] [[IV2]] + %1 = load [1 x %A], [1 x %A]* %aa.ptr, align 8 + ret [1 x %A] %1 } -define { [1 x %A] } @loadStructOfArrayOfA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to { [1 x %A] }* +define { [1 x %A] } @loadStructOfArrayOfA({ [1 x %A] }* %saa.ptr) { ; CHECK-LABEL: loadStructOfArrayOfA -; CHECK: load %A__vtbl*, -; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 -; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0 -; CHECK: insertvalue { [1 x %A] } undef, [1 x %A] {{.*}}, 0 - %2 = load { [1 x %A] }, { [1 x %A] }* %1, align 8 - ret { [1 x %A] } %2 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0 +; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0 +; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0 +; CHECK-NEXT: [[IV3:%[a-z0-9\.]+]] = insertvalue { [1 x %A] } undef, [1 x %A] [[IV2]], 0 +; CHECK-NEXT: ret { [1 x %A] } [[IV3]] + %1 = load { [1 x %A] }, { [1 x %A] }* %saa.ptr, align 8 + ret { [1 x %A] } %1 } -define { %A } @structOfA() { -body: - %0 = tail call i8* @allocmemory(i64 32) - %1 = bitcast i8* %0 to { %A }* +define { %A } @structOfA({ %A }* %sa.ptr) { ; CHECK-LABEL: structOfA -; CHECK: store %A__vtbl* @A__vtblZ - store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8 - %2 = load { %A }, { %A }* %1, align 8 -; CHECK-NOT: load -; CHECK: ret { %A } { %A { %A__vtbl* @A__vtblZ } } - ret { %A } %2 +; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0 +; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8 +; CHECK-NEXT: ret { %A } { %A { %A__vtbl* @A__vtblZ } } + store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8 + %1 = load { %A }, { %A }* %sa.ptr, align 8 + ret { %A } %1 +} + +define %B @structB(%B* %b.ptr) { +; CHECK-LABEL: structB +; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0 +; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8 +; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1 +; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8 +; CHECK-NEXT: ret %B { i8* null, i64 42 } + store %B { i8* null, i64 42 }, %B* %b.ptr, align 8 + %1 = load %B, %B* %b.ptr, align 8 + ret %B %1 }