Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -859,6 +859,104 @@ return false; } +static Value *repackToIntegral(InstCombiner &IC, + Value *Pack, unsigned PackSize, + Value* Element, unsigned ElementOffset); + +static Value *repackStructToIntegral(InstCombiner &IC, + Value *Pack, unsigned PackSize, + StructType *ST, Value* Element, + unsigned ElementOffset) { + assert(!ST->isOpaque() && "Can't pack opaque structs"); + assert(Element->getType() == ST && "ST must be Element's type"); + + // Pass it down and assert. + const DataLayout &DL = IC.getDataLayout(); + unsigned ElementSize = DL.getTypeStoreSize(ST); + assert((ElementSize + ElementOffset) <= PackSize && + "Can't pack past the end of the pack"); + + const StructLayout *SL = DL.getStructLayout(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + unsigned SEOffset = ElementOffset + SL->getElementOffset(i); + Value* SE = IC.Builder->CreateExtractValue(Element, i); + Pack = repackToIntegral(IC, Pack, PackSize, SE, SEOffset); + } + + return Pack; +} + +static Value *repackArrayToIntegral(InstCombiner &IC, + Value *Pack, unsigned PackSize, + ArrayType *AT, Value* Element, + unsigned ElementOffset) { + assert(Element->getType() == AT && "AT must be Element's type"); + + // Pass it down and assert. + const DataLayout &DL = IC.getDataLayout(); + unsigned SESize = DL.getTypeAllocSize(AT->getElementType()); + unsigned ElementSize = DL.getTypeStoreSize(AT); + assert((ElementSize + ElementOffset) <= PackSize && + "Can't pack past the end of the pack"); + + unsigned SEOffset = ElementOffset; + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i, SEOffset += SESize) { + Value* SE = IC.Builder->CreateExtractValue(Element, i); + Pack = repackToIntegral(IC, Pack, PackSize, SE, SEOffset); + } + + return Pack; +} + +static Value *repackToIntegral(InstCombiner &IC, + Value *Pack, unsigned PackSize, + Value* Element, unsigned ElementOffset) { + Type* ET = Element->getType(); + if (ET->isAggregateType()) { + if (StructType *ST = dyn_cast(ET)) { + return repackStructToIntegral(IC, Pack, PackSize, + ST, Element, ElementOffset); + } + + if (ArrayType *AT = dyn_cast(ET)) { + return repackArrayToIntegral(IC, Pack, PackSize, + AT, Element, ElementOffset); + } + + llvm_unreachable("Invalid Aggregate Type"); + } + + // Transform the element into an integer. + const DataLayout &DL = IC.getDataLayout(); + if (ET->getScalarType()->isPointerTy()) { + Element = IC.Builder->CreatePtrToInt(Element, DL.getIntPtrType(ET)); + ET = Element->getType(); + } + + LLVMContext &C = Pack->getContext(); + unsigned ElementSize = DL.getTypeStoreSize(ET); + if (!ET->isIntegerTy()) { + ET = IntegerType::get(C, ElementSize); + Element = IC.Builder->CreateBitCast(Element, ET); + } + + // If the element is too small, zext. + assert(DL.getTypeStoreSize(Pack->getType()) == PackSize && + "PackSize to not match the size of Pack"); + assert(ElementSize <= PackSize && "Element do not fit into value"); + if (ElementSize < PackSize) + Element = IC.Builder->CreateZExt(Element, Type::getIntNTy(C, PackSize * 8)); + + unsigned Offset = ElementOffset; + unsigned ShiftAmt = DL.isLittleEndian() + ? Offset * 8 + : (PackSize - ElementSize - Offset) * 8; + if (ShiftAmt) + Element = IC.Builder->CreateShl(Element, ShiftAmt); + + return IC.Builder->CreateOr(Pack, Element); +} + static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic // stores here but it isn't clear that this is important. @@ -875,8 +973,14 @@ // If the struct only have one element, we unpack. if (ST->getNumElements() == 1) { V = IC.Builder->CreateExtractValue(V, 0); - combineStoreToNewValue(IC, SI, V); - return true; + } else { + const DataLayout &DL = IC.getDataLayout(); + unsigned PackSize = DL.getTypeStoreSize(ST); + + Value *Pack = ConstantInt::get(Type::getIntNTy(ST->getContext(), + PackSize * 8), 0); + V = repackStructToIntegral(IC, Pack, PackSize, ST, V, 0); + assert(!V->getType()->isAggregateType() && "Failed to unpack aggregate"); } } @@ -884,12 +988,19 @@ // If the array only have one element, we unpack. if (AT->getNumElements() == 1) { V = IC.Builder->CreateExtractValue(V, 0); - combineStoreToNewValue(IC, SI, V); - return true; + } else { + const DataLayout &DL = IC.getDataLayout(); + unsigned PackSize = DL.getTypeStoreSize(AT); + + Value *Pack = ConstantInt::get(Type::getIntNTy(AT->getContext(), + PackSize * 8), 0); + V = repackArrayToIntegral(IC, Pack, PackSize, AT, V, 0); + assert(!V->getType()->isAggregateType() && "Failed to unpack aggregate"); } } - return false; + combineStoreToNewValue(IC, SI, V); + return true; } /// equivalentAddressValues - Test if A and B will obviously have the same Index: test/Transforms/InstCombine/unpack-fca.ll =================================================================== --- test/Transforms/InstCombine/unpack-fca.ll +++ test/Transforms/InstCombine/unpack-fca.ll @@ -4,59 +4,145 @@ target triple = "x86_64-unknown-linux-gnu" %A__vtbl = type { i8*, i32 (%A*)* } +%B__vtbl = type { i8*, i32 (%B*)* } +%C__vtbl = type { i8*, i32 (%C*)* } +%D__vtbl = type { i8*, i32 (%D*)* } +%E__vtbl = type { i8*, i32 (%E*)* } %A = type { %A__vtbl* } +%B = type { %B__vtbl*, i32 } +%C = type { %C__vtbl*, i32, { i8 }, { i8 } } +%D = type { %D__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 } } +%E = type { %E__vtbl*, i32, { i8 }, { i8 }, { i8, i8, i8 }, [2 x i32] } @A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo } +@B__vtblZ = constant %B__vtbl { i8* null, i32 (%B*)* @B.foo } +@C__vtblZ = constant %C__vtbl { i8* null, i32 (%C*)* @C.foo } +@D__vtblZ = constant %D__vtbl { i8* null, i32 (%D*)* @D.foo } +@E__vtblZ = constant %E__vtbl { i8* null, i32 (%E*)* @E.foo } declare i32 @A.foo(%A* nocapture %this) +declare i32 @B.foo(%B* nocapture %this) +declare i32 @C.foo(%C* nocapture %this) +declare i32 @D.foo(%D* nocapture %this) +declare i32 @E.foo(%E* nocapture %this) declare i8* @allocmemory(i64) define void @storeA() { +; CHECK-LABEL: storeA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to %A* -; CHECK-LABEL: storeA ; CHECK: store %A__vtbl* @A__vtblZ store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8 ret void } +define void @storeB() { +; CHECK-LABEL: storeB +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %B* +; CHECK: store i128 or (i128 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i128), i128 774763251095801167872), i128* %1, align 8 + store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %1, align 8 + ret void +} + +define void @storeC() { +; CHECK-LABEL: storeC +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %C* +; CHECK: store i128 or (i128 zext (i64 ptrtoint (%C__vtbl* @C__vtblZ to i64) to i128), i128 466891561697334504689850300497920), i128* %1, align 8 + store %C { %C__vtbl* @C__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 } }, %C* %1, align 8 + ret void +} + +define void @storeD() { +; CHECK-LABEL: storeD +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %D* +; CHECK: store i192 or (i192 zext (i64 ptrtoint (%D__vtbl* @D__vtblZ to i64) to i192), i192 1023511215942805454298064656762495041536), i192* %1, align 8 + store %D { %D__vtbl* @D__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 }, { i8, i8, i8 } { i8 1, i8 2, i8 3 } }, %D* %1, align 8 + ret void +} + +define void @storeE() { +; CHECK-LABEL: storeE +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to %E* +; CHECK: store i256 or (i256 zext (i64 ptrtoint (%E__vtbl* @E__vtblZ to i64) to i256), i256 31385508682779410369526070004795876865674973957706397777920), i256* %1, align 8 + store %E { %E__vtbl* @E__vtblZ, i32 42, { i8 } { i8 5 }, { i8 } { i8 23 }, { i8, i8, i8 } { i8 1, i8 2, i8 3 }, [2 x i32] [i32 4, i32 5] }, %E* %1, align 8 + ret void +} + define void @storeStructOfA() { +; CHECK-LABEL: storeStructOfA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to { %A }* -; CHECK-LABEL: storeStructOfA ; CHECK: store %A__vtbl* @A__vtblZ store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8 ret void } define void @storeArrayOfA() { +; CHECK-LABEL: storeArrayOfA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to [1 x %A]* -; CHECK-LABEL: storeArrayOfA ; CHECK: store %A__vtbl* @A__vtblZ store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %1, align 8 ret void } define void @storeStructOfArrayOfA() { +; CHECK-LABEL: storeStructOfArrayOfA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to { [1 x %A] }* -; CHECK-LABEL: storeStructOfArrayOfA ; CHECK: store %A__vtbl* @A__vtblZ store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %1, align 8 ret void } +define void @storeBigArray() { +; CHECK-LABEL: storeBigArray +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to [2 x i64]* +; CHECK: store i128 774763251095801168538, i128* %1, align 8 + store [2 x i64] [ i64 666, i64 42 ], [2 x i64]* %1, align 8 + ret void +} + +define void @storeBigArrayOfA() { +; CHECK-LABEL: storeBigArrayOfA +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to [2 x %A]* +; CHECK: store i128 or (i128 zext (i64 ptrtoint (%A__vtbl* @A__vtblZ to i64) to i128), i128 shl (i128 zext (i64 ptrtoint (%A__vtbl* @A__vtblZ to i64) to i128), i128 64)), i128* %1, align 8 + store [2 x %A] [%A { %A__vtbl* @A__vtblZ }, %A { %A__vtbl* @A__vtblZ }], [2 x %A]* %1, align 8 + ret void +} + +define void @storeBigArrayOfB() { +; CHECK-LABEL: storeBigArrayOfB +body: + %0 = tail call i8* @allocmemory(i64 32) + %1 = bitcast i8* %0 to [2 x %B]* +; CHECK: store i256 or (i256 or (i256 or (i256 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i256), i256 1826227663297245609984), i256 shl (i256 zext (i64 ptrtoint (%B__vtbl* @B__vtblZ to i64) to i256), i256 128)), i256 156927543384667019095894735580191660402558886111600862822400), i256* %1, align 8 + store [2 x %B] [%B { %B__vtbl* @B__vtblZ, i32 99 }, %B { %B__vtbl* @B__vtblZ, i32 25 }], [2 x %B]* %1, align 8 + ret void +} + define %A @loadA() { +; CHECK-LABEL: loadA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to %A* -; CHECK-LABEL: loadA ; CHECK: load %A__vtbl*, ; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 %2 = load %A, %A* %1, align 8 @@ -64,10 +150,10 @@ } define { %A } @loadStructOfA() { +; CHECK-LABEL: loadStructOfA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to { %A }* -; CHECK-LABEL: loadStructOfA ; CHECK: load %A__vtbl*, ; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 ; CHECK: insertvalue { %A } undef, %A {{.*}}, 0 @@ -76,10 +162,10 @@ } define [1 x %A] @loadArrayOfA() { +; CHECK-LABEL: loadArrayOfA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to [1 x %A]* -; CHECK-LABEL: loadArrayOfA ; CHECK: load %A__vtbl*, ; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 ; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0 @@ -88,10 +174,10 @@ } define { [1 x %A] } @loadStructOfArrayOfA() { +; CHECK-LABEL: loadStructOfArrayOfA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to { [1 x %A] }* -; CHECK-LABEL: loadStructOfArrayOfA ; CHECK: load %A__vtbl*, ; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0 ; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0 @@ -101,10 +187,10 @@ } define { %A } @structOfA() { +; CHECK-LABEL: structOfA body: %0 = tail call i8* @allocmemory(i64 32) %1 = bitcast i8* %0 to { %A }* -; CHECK-LABEL: structOfA ; CHECK: store %A__vtbl* @A__vtblZ store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8 %2 = load { %A }, { %A }* %1, align 8