Index: lib/Transforms/Scalar/AggregateMemAccessRemoval.cpp =================================================================== --- lib/Transforms/Scalar/AggregateMemAccessRemoval.cpp +++ lib/Transforms/Scalar/AggregateMemAccessRemoval.cpp @@ -54,6 +54,11 @@ void runOnLoad(LoadInst* LI); void runOnStore(StoreInst* SI); + + Value* addToStore(IRBuilder<> &Builder, Value* V, unsigned Size, + Value* E, unsigned ElementOffset, unsigned ElementSize); + Value* getFromLoad(IRBuilder<> &Builder, Value* L, + unsigned Size, Type* T, unsigned ElementOffset); }; } @@ -110,9 +115,10 @@ if (ST->isOpaque()) return; + InstrsToErase.push_back(LI); + // If the struct only have one element, we unpack. if (ST->getNumElements() == 1) { - InstrsToErase.push_back(LI); LoadInst* NewLI = Builder.CreateLoad( Builder.CreateStructGEP(LI->getPointerOperand(), 0) ); @@ -124,7 +130,98 @@ runOnLoad(NewLI); return; } + + const StructLayout* SL = DL->getStructLayout(ST); + uint64_t Size = DL->getTypeStoreSize(ST); + unsigned Align = LI->getAlignment(); + + Value* Addr = LI->getPointerOperand(); + unsigned AddressSpace = LI->getPointerAddressSpace(); + IntegerType* PT = Type::getIntNTy(*C, Size * 8); + Addr = Builder.CreatePointerCast(Addr, PointerType::get(PT, AddressSpace)); + + LoadInst* ELI = Builder.CreateLoad(Addr); + ELI->setAlignment(Align); + + Value* V = UndefValue::get(T); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + Value* E = getFromLoad(Builder, ELI, Size, ST->getElementType(i), SL->getElementOffset(i)); + V = Builder.CreateInsertValue(V, E, i); + } + + LI->replaceAllUsesWith(V); + } else if (ArrayType* AT = dyn_cast(T)) { + InstrsToErase.push_back(LI); + + Value* V = UndefValue::get(T); + Value* Addr = LI->getPointerOperand(); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + LoadInst* NewLI = Builder.CreateLoad(Builder.CreateStructGEP(Addr, i)); + V = Builder.CreateInsertValue(V, NewLI, i); + runOnLoad(NewLI); + } + + LI->replaceAllUsesWith(V); + } +} + +Value* AggregateMemAccessRemoval::getFromLoad(IRBuilder<> &Builder, Value* L, + unsigned Size, Type* T, + unsigned ElementOffset) { + // If the type is an aggregate, forward. + if (T->isAggregateType()) { + if (StructType* ST = dyn_cast(T)) { + assert(!ST->isOpaque() && "Can't load opaque struct"); + + Value* E = UndefValue::get(T); + const StructLayout* SL = DL->getStructLayout(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + unsigned SEOffset = ElementOffset + SL->getElementOffset(i); + Value* SE = getFromLoad(Builder, L, Size, ST->getElementType(i), SEOffset); + E = Builder.CreateInsertValue(E, SE, i); + } + + return E; + } else if (ArrayType* AT = dyn_cast(T)) { + Value* E = UndefValue::get(T); + Type* ET = AT->getElementType(); + unsigned SESize = DL->getTypeAllocSize(ET); + unsigned SEOffset = ElementOffset; + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i, SEOffset += SESize) { + Value* SE = getFromLoad(Builder, L, Size, ET, SEOffset); + E = Builder.CreateInsertValue(E, SE, i); + } + + return E; + } + + llvm_unreachable("Invalid Aggregate Type"); } + + unsigned ElementSize = DL->getTypeStoreSize(T); + + assert(ElementSize <= Size && "Element do not fit into value."); + if (ElementOffset > 0) { + unsigned ShiftAmt; + if (DL->isLittleEndian()) + ShiftAmt = ElementOffset * 8; + else + ShiftAmt = (Size - ElementSize - ElementOffset) * 8; + if (ShiftAmt) + L = Builder.CreateLShr(L, ShiftAmt); + } + + if (ElementSize < Size) { + L = Builder.CreateTrunc(L, Type::getIntNTy(*C, ElementSize * 8)); + } + + // Retrieve element type + if (T->getScalarType()->isPointerTy()) + L = Builder.CreateIntToPtr(L, T); + if (!T->isIntegerTy()) + L = Builder.CreateBitCast(L, T); + + return L; } void AggregateMemAccessRemoval::runOnStore(StoreInst* SI) { @@ -140,16 +237,115 @@ if (ST->isOpaque()) return; + InstrsToErase.push_back(SI); + // If the struct only have one element, we unpack. if (ST->getNumElements() == 1) { - InstrsToErase.push_back(SI); runOnStore(Builder.CreateStore( Builder.CreateExtractValue(V, 0), Builder.CreateStructGEP(SI->getPointerOperand(), 0) )); return; } + + const StructLayout* SL = DL->getStructLayout(ST); + uint64_t Size = DL->getTypeStoreSize(ST); + unsigned Align = SI->getAlignment(); + + IntegerType* PT = Type::getIntNTy(*C, Size * 8); + ConstantInt* Zero = ConstantInt::get(PT, 0); + Value* NV = Zero; + + Value* Addr = SI->getPointerOperand(); + unsigned AddressSpace = SI->getPointerAddressSpace(); + Addr = Builder.CreatePointerCast(Addr, PointerType::get(PT, AddressSpace)); + + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + unsigned ElementOffset = SL->getElementOffset(i); + Value* E = Builder.CreateExtractValue(V, i); + unsigned ElementSize = DL->getTypeStoreSize(E->getType()); + NV = addToStore(Builder, NV, Size, E, ElementOffset, ElementSize); + } + + Builder.CreateStore(NV, Addr)->setAlignment(Align); + } else if (ArrayType* AT = dyn_cast(T)) { + InstrsToErase.push_back(SI); + + Value* Addr = SI->getPointerOperand(); + unsigned Align = SI->getAlignment(); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + StoreInst* ESI = Builder.CreateStore( + Builder.CreateExtractValue(V, i), + Builder.CreateStructGEP(Addr, i) + ); + ESI->setAlignment(Align); + runOnStore(ESI); + } + } +} + +Value* AggregateMemAccessRemoval::addToStore(IRBuilder<> &Builder, Value* V, unsigned Size, + Value* E, unsigned ElementOffset, unsigned ElementSize) { + DEBUG(dbgs() << "\t\tinserting : " << *E << " at offset " << ElementOffset << " into " << *V << "\n"); + + Type* ET = E->getType(); + + // If the type is an aggregate, forward. + if (ET->isAggregateType()) { + if (StructType* ST = dyn_cast(ET)) { + assert(!ST->isOpaque() && "Can't store opaque struct"); + + const StructLayout* SL = DL->getStructLayout(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + unsigned SEOffset = ElementOffset + SL->getElementOffset(i); + + assert(SEOffset < Size && "Element is past the end of value."); + + Value* SE = Builder.CreateExtractValue(E, i); + unsigned SESize = DL->getTypeStoreSize(SE->getType()); + + V = addToStore(Builder, V, Size, SE, SEOffset, SESize); + } + + return V; + } else if (ArrayType* AT = dyn_cast(ET)) { + unsigned SESize = DL->getTypeAllocSize(AT->getElementType()); + unsigned SEOffset = ElementOffset; + unsigned i = 0; + + for (unsigned e = AT->getNumElements(); i != e; ++i, SEOffset += SESize) { + assert(SEOffset < Size && "Element is past the end of value."); + + Value* SE = Builder.CreateExtractValue(E, i); + V = addToStore(Builder, V, Size, SE, SEOffset, SESize); + } + + return V; + } + + llvm_unreachable("Invalid Aggregate Type"); } + + // Transform the element into an integer. + if (E->getType()->getScalarType()->isPointerTy()) + E = Builder.CreatePtrToInt(E, DL->getIntPtrType(ET)); + if (!E->getType()->isIntegerTy()) + E = Builder.CreateBitCast(E, IntegerType::get(*C, ElementSize)); + + assert(ElementSize <= Size && "Element do not fit into value."); + if (ElementSize < Size) + E = Builder.CreateZExt(E, Type::getIntNTy(*C, Size * 8)); + + unsigned Offset = ElementOffset; + unsigned ShiftAmt; + if (DL->isLittleEndian()) + ShiftAmt = Offset * 8; + else + ShiftAmt = (Size - ElementSize - Offset) * 8; + if (ShiftAmt) + E = Builder.CreateShl(E, ShiftAmt); + + return Builder.CreateOr(V, E); } void AggregateMemAccessRemoval::getAnalysisUsage(AnalysisUsage &AU) const { Index: test/Transforms/AggregateMemAccessRemoval/load.ll =================================================================== --- test/Transforms/AggregateMemAccessRemoval/load.ll +++ test/Transforms/AggregateMemAccessRemoval/load.ll @@ -4,7 +4,15 @@ target triple = "x86_64-unknown-linux-gnu" %A__vtbl = type { i8*, i32 (%A*)* } +%B__vtbl = type { i8*, i32 (%B*)* } +%C__vtbl = type { i8*, i32 (%C*)* } +%D__vtbl = type { i8*, i32 (%D*)* } +%E__vtbl = type { i8*, i32 (%E*)* } %A = type { %A__vtbl* } +%B = type { %B__vtbl*, i32 } +%C = type { %C__vtbl*, i32, { i8 }, { i8 } } +%D = type { %D__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 } } +%E = type { %E__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 }, [2 x i32] } declare i8* @allocmemory(i64) @@ -18,3 +26,150 @@ ret %A %2 } +define %B @structB() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %B* +; CHECK: load i128* {{.*}}, align 8 +; CHECK: trunc i128 {{.*}} to i64 +; CHECK: inttoptr i64 {{.*}} to %B__vtbl* +; CHECK: insertvalue %B undef, %B__vtbl* {{.*}}, 0 +; CHECK: lshr i128 {{.*}}, 64 +; CHECK: trunc i128 {{.*}} to i32 +; CHECK: insertvalue %B {{.*}}, i32 {{.*}}, 1 + %2 = load %B* %1, align 8 + ret %B %2 +} + +define %C @structC() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %C* +; CHECK: load i128* {{.*}}, align 8 +; CHECK: trunc i128 {{.*}} to i64 +; CHECK: inttoptr i64 {{.*}} to %C__vtbl* +; CHECK: insertvalue %C undef, %C__vtbl* {{.*}}, 0 +; CHECK: lshr i128 {{.*}}, 64 +; CHECK: trunc i128 {{.*}} to i32 +; CHECK: insertvalue %C {{.*}}, i32 {{.*}}, 1 +; CHECK: lshr i128 {{.*}}, 96 +; CHECK: trunc i128 {{.*}} to i8 +; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0 +; CHECK: insertvalue %C {{.*}}, { i8 } {{.*}}, 2 +; CHECK: lshr i128 {{.*}}, 104 +; CHECK: trunc i128 {{.*}} to i8 +; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0 +; CHECK: insertvalue %C {{.*}}, { i8 } {{.*}}, 3 + %2 = load %C* %1, align 8 + ret %C %2 +} + +define %D @structD() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %D* +; CHECK: load i192* {{.*}}, align 8 +; CHECK: trunc i192 {{.*}} to i64 +; CHECK: inttoptr i64 {{.*}} to %D__vtbl* +; CHECK: insertvalue %D undef, %D__vtbl* {{.*}}, 0 +; CHECK: lshr i192 {{.*}}, 64 +; CHECK: trunc i192 {{.*}} to i32 +; CHECK: insertvalue %D {{.*}}, i32 {{.*}}, 1 +; CHECK: lshr i192 {{.*}}, 96 +; CHECK: trunc i192 {{.*}} to i8 +; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0 +; CHECK: insertvalue %D {{.*}}, { i8 } {{.*}}, 2 +; CHECK: lshr i192 {{.*}}, 104 +; CHECK: trunc i192 {{.*}} to i8 +; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0 +; CHECK: insertvalue %D {{.*}}, { i8 } {{.*}}, 3 +; CHECK: lshr i192 {{.*}}, 112 +; CHECK: trunc i192 {{.*}} to i8 +; CHECK: insertvalue { i8, i8, i8 } undef, i8 {{.*}}, 0 +; CHECK: lshr i192 {{.*}}, 120 +; CHECK: trunc i192 {{.*}} to i8 +; CHECK: insertvalue { i8, i8, i8 } {{.*}}, i8 {{.*}}, 1 +; CHECK: lshr i192 {{.*}}, 128 +; CHECK: trunc i192 {{.*}} to i8 +; CHECK: insertvalue { i8, i8, i8 } {{.*}}, i8 {{.*}}, 2 +; CHECK: insertvalue %D {{.*}}, { i8, i8, i8 } {{.*}}, 4 + %2 = load %D* %1, align 8 + ret %D %2 +} + +define %E @structE() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %E* +; CHECK: load i256* {{.*}}, align 8 +; CHECK: trunc i256 {{.*}} to i64 +; CHECK: inttoptr i64 {{.*}} to %E__vtbl* +; CHECK: insertvalue %E undef, %E__vtbl* {{.*}}, 0 +; CHECK: lshr i256 {{.*}}, 64 +; CHECK: trunc i256 {{.*}} to i32 +; CHECK: insertvalue %E {{.*}}, i32 {{.*}}, 1 +; CHECK: lshr i256 {{.*}}, 96 +; CHECK: trunc i256 {{.*}} to i8 +; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0 +; CHECK: insertvalue %E {{.*}}, { i8 } {{.*}}, 2 +; CHECK: lshr i256 {{.*}}, 104 +; CHECK: trunc i256 {{.*}} to i8 +; CHECK: insertvalue { i8 } undef, i8 {{.*}}, 0 +; CHECK: insertvalue %E {{.*}}, { i8 } {{.*}}, 3 +; CHECK: lshr i256 {{.*}}, 112 +; CHECK: trunc i256 {{.*}} to i8 +; CHECK: insertvalue { i8, i8, i8 } undef, i8 {{.*}}, 0 +; CHECK: lshr i256 {{.*}}, 120 +; CHECK: trunc i256 {{.*}} to i8 +; CHECK: insertvalue { i8, i8, i8 } {{.*}}, i8 {{.*}}, 1 +; CHECK: lshr i256 {{.*}}, 128 +; CHECK: trunc i256 {{.*}} to i8 +; CHECK: insertvalue { i8, i8, i8 } {{.*}}, i8 {{.*}}, 2 +; CHECK: insertvalue %E {{.*}}, { i8, i8, i8 } {{.*}}, 4 +; CHECK: lshr i256 {{.*}}, 160 +; CHECK: trunc i256 {{.*}} to i32 +; CHECK: insertvalue [2 x i32] undef, i32 {{.*}}, 0 +; CHECK: lshr i256 {{.*}}, 192 +; CHECK: trunc i256 {{.*}} to i32 +; CHECK: insertvalue [2 x i32] {{.*}}, i32 {{.*}}, 1 +; CHECK: insertvalue %E {{.*}}, [2 x i32] {{.*}}, 5 + %2 = load %E* %1, align 8 + ret %E %2 +} + +define void @arrays() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to [1 x i64]* +; CHECK: load i64* +; CHECK: insertvalue [1 x i64] undef, i64 {{.*}}, 0 + %2 = load [1 x i64]* %1, align 8 + %3 = bitcast i8* %0 to [2 x i64]* +; CHECK: load i64* +; CHECK: insertvalue [2 x i64] undef, i64 {{.*}}, 0 +; CHECK: load i64* +; CHECK: insertvalue [2 x i64] {{.*}}, i64 {{.*}}, 1 + %4 = load [2 x i64]* %3, align 8 + %5 = bitcast i8* %0 to [2 x %A]* +; CHECK: load %A__vtbl** +; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 +; CHECK: insertvalue [2 x %A] undef, %A {{.*}}, 0 +; CHECK: load %A__vtbl** +; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 +; CHECK: insertvalue [2 x %A] {{.*}}, %A {{.*}}, 1 + %6 = load [2 x %A]* %5, align 8 + %7 = bitcast i8* %0 to [2 x %B]* +; CHECK: inttoptr i64 {{.*}} to %B__vtbl* +; CHECK: insertvalue %B undef, %B__vtbl* {{.*}}, 0 +; CHECK: trunc i128 {{.*}} to i32 +; CHECK: insertvalue %B {{.*}}, i32 {{.*}}, 1 +; CHECK: insertvalue [2 x %B] undef, %B {{.*}}, 0 +; CHECK: inttoptr i64 {{.*}} to %B__vtbl* +; CHECK: insertvalue %B undef, %B__vtbl* {{.*}}, 0 +; CHECK: trunc i128 {{.*}} to i32 +; CHECK: insertvalue %B {{.*}}, i32 {{.*}}, 1 +; CHECK: insertvalue [2 x %B] {{.*}}, %B {{.*}}, 1 + %8 = load [2 x %B]* %7, align 8 + ret void +} + Index: test/Transforms/AggregateMemAccessRemoval/store.ll =================================================================== --- test/Transforms/AggregateMemAccessRemoval/store.ll +++ test/Transforms/AggregateMemAccessRemoval/store.ll @@ -4,11 +4,27 @@ target triple = "x86_64-unknown-linux-gnu" %A__vtbl = type { i8*, i32 (%A*)* } +%B__vtbl = type { i8*, i32 (%B*)* } +%C__vtbl = type { i8*, i32 (%C*)* } +%D__vtbl = type { i8*, i32 (%D*)* } +%E__vtbl = type { i8*, i32 (%E*)* } %A = type { %A__vtbl* } +%B = type { %B__vtbl*, i32 } +%C = type { %C__vtbl*, i32, { i8 }, { i8 } } +%D = type { %D__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 } } +%E = type { %E__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 }, [2 x i32] } @A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo } +@B__vtblZ = constant %B__vtbl { i8* null, i32 (%B*)* @B.foo } +@C__vtblZ = constant %C__vtbl { i8* null, i32 (%C*)* @C.foo } +@D__vtblZ = constant %D__vtbl { i8* null, i32 (%D*)* @D.foo } +@E__vtblZ = constant %E__vtbl { i8* null, i32 (%E*)* @E.foo } declare i32 @A.foo(%A* nocapture %this) +declare i32 @B.foo(%B* nocapture %this) +declare i32 @C.foo(%C* nocapture %this) +declare i32 @D.foo(%D* nocapture %this) +declare i32 @E.foo(%E* nocapture %this) declare i8* @allocmemory(i64) @@ -18,6 +34,39 @@ %1 = bitcast i8* %0 to %A* ; CHECK: store %A__vtbl* @A__vtblZ store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8 + %2 = bitcast i8* %0 to %B* +; CHECK: store i128 or (i128 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i128), i128 774763251095801167872), i128* {{.*}}, align 8 + store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %2, align 8 + %3 = bitcast i8* %0 to %C* +; CHECK: store i128 or (i128 zext (i64 ptrtoint (%C__vtbl* @C__vtblZ to i64) to i128), i128 466891561697334504689850300497920), i128* {{.*}}, align 8 + store %C { %C__vtbl* @C__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 } }, %C* %3, align 8 + %4 = bitcast i8* %0 to %D* +; CHECK: store i192 or (i192 zext (i64 ptrtoint (%D__vtbl* @D__vtblZ to i64) to i192), i192 1023511215942805454298064656762495041536), i192* {{.*}}, align 8 + store %D { %D__vtbl* @D__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 }, { i8, i8, i8 } { i8 1, i8 2, i8 3 } }, %D* %4, align 8 + %5 = bitcast i8* %0 to %E* +; CHECK: store i256 or (i256 zext (i64 ptrtoint (%E__vtbl* @E__vtblZ to i64) to i256), i256 31385508682779410369526070004795876865674973957706397777920), i256* {{.*}}, align 8 + store %E { %E__vtbl* @E__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 }, { i8, i8, i8 } { i8 1, i8 2, i8 3 }, [2 x i32] [i32 4, i32 5] }, %E* %5, align 8 + ret void +} + +define void @arrays() { +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to [1 x i64]* +; CHECK: store i64 42 + store [1 x i64] [ i64 42 ], [1 x i64]* %1, align 8 + %2 = bitcast i8* %0 to [2 x i64]* +; CHECK: store i64 666 +; CHECK: store i64 42 + store [2 x i64] [ i64 666, i64 42 ], [2 x i64]* %2, align 8 + %3 = bitcast i8* %0 to [2 x %A]* +; CHECK: store %A__vtbl* @A__vtblZ +; CHECK: store %A__vtbl* @A__vtblZ + store [2 x %A] [%A { %A__vtbl* @A__vtblZ }, %A { %A__vtbl* @A__vtblZ }], [2 x %A]* %3, align 8 + %4 = bitcast i8* %0 to [2 x %B]* +; CHECK: store i128 or (i128 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i128), i128 1826227663297245609984), i128* {{.*}}, align 8 +; CHECK: store i128 or (i128 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i128), i128 461168601842738790400), i128* {{.*}}, align 8 + store [2 x %B] [%B { %B__vtbl* @B__vtblZ, i32 99 }, %B { %B__vtbl* @B__vtblZ, i32 25 }], [2 x %B]* %4, align 8 ret void }