Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -3164,7 +3164,12 @@ /// value (as opposed to the user). Use *U; + /// Used to calculate offsets, and hence alignment, of subobjects. + const DataLayout &DL; + public: + AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {} + /// Rewrite loads and stores through a pointer and all pointers derived from /// it. bool rewrite(Instruction &I) { @@ -3208,14 +3213,25 @@ /// split operations. Value *Ptr; + /// The base pointee type being GEPed into. + Type *BaseTy; + /// TBAA information to attach to the new op. AAMDNodes AATags; + /// Known alignment of the base pointer. + unsigned BaseAlign; + + /// To calculate offset of each component so we can correctly deduce + /// alignments. + const DataLayout &DL; + /// Initialize the splitter with an insertion point, Ptr and start with a /// single zero GEP index. - OpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags) + OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, + AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL) : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), - AATags(AATags) {} + BaseTy(BaseTy), AATags(AATags), BaseAlign(BaseAlign), DL(DL) {} public: /// Generic recursive split emission routine. @@ -3232,8 +3248,11 @@ /// \param Agg The aggregate value being built up or stored, depending on /// whether this is splitting a load or a store respectively. void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { - if (Ty->isSingleValueType()) - return static_cast(this)->emitFunc(Ty, Agg, Name); + if (Ty->isSingleValueType()) { + unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices); + return static_cast(this)->emitFunc( + Ty, Agg, MinAlign(BaseAlign, Offset), Name); + } if (ArrayType *ATy = dyn_cast(Ty)) { unsigned OldSize = Indices.size(); @@ -3270,17 +3289,19 @@ }; struct LoadOpSplitter : public OpSplitter { - LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags) - : OpSplitter(InsertionPoint, Ptr, AATags) {} + LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, + AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL) + : OpSplitter(InsertionPoint, Ptr, BaseTy, AATags, + BaseAlign, DL) {} /// Emit a leaf load of a single value. This is called at the leaves of the /// recursive emission to actually load values. - void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { + void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. Value *GEP = IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); - LoadInst *Load = IRB.CreateLoad(GEP, Name + ".load"); + LoadInst *Load = IRB.CreateAlignedLoad(GEP, Align, Name + ".load"); if (AATags) Load->setAAMetadata(AATags); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); @@ -3297,7 +3318,8 @@ LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); AAMDNodes AATags; LI.getAAMetadata(AATags); - LoadOpSplitter Splitter(&LI, *U, AATags); + LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, + getAdjustedAlignment(&LI, 0, DL), DL); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); LI.replaceAllUsesWith(V); @@ -3306,12 +3328,13 @@ } struct StoreOpSplitter : public OpSplitter { - StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags) - : OpSplitter(InsertionPoint, Ptr, AATags) {} - + StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, + AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL) + : OpSplitter(InsertionPoint, Ptr, BaseTy, AATags, + BaseAlign, DL) {} /// Emit a leaf store of a single value. This is called at the leaves of the /// recursive emission to actually produce stores. - void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { + void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) { assert(Ty->isSingleValueType()); // Extract the single value and store it using the indices. // @@ -3321,7 +3344,8 @@ IRB.CreateExtractValue(Agg, Indices, Name + ".extract"); Value *InBoundsGEP = IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); - StoreInst *Store = IRB.CreateStore(ExtractValue, InBoundsGEP); + StoreInst *Store = + IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align); if (AATags) Store->setAAMetadata(AATags); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -3339,7 +3363,8 @@ LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); AAMDNodes AATags; SI.getAAMetadata(AATags); - StoreOpSplitter Splitter(&SI, *U, AATags); + StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags, + getAdjustedAlignment(&SI, 0, DL), DL); Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); SI.eraseFromParent(); return true; @@ -4357,7 +4382,7 @@ // First, split any FCA loads and stores touching this alloca to promote // better splitting and promotion opportunities. - AggLoadStoreRewriter AggRewriter; + AggLoadStoreRewriter AggRewriter(DL); Changed |= AggRewriter.rewrite(AI); // Build the slices using a recursive instruction-visiting builder. Index: llvm/test/Transforms/SROA/alignment.ll =================================================================== --- llvm/test/Transforms/SROA/alignment.ll +++ llvm/test/Transforms/SROA/alignment.ll @@ -181,3 +181,50 @@ ret void ; CHECK: ret void } + +define void @test8() { +; CHECK-LABEL: @test8( +; CHECK: load i32, {{.*}}, align 1 +; CHECK: load i32, {{.*}}, align 1 +; CHECK: load i32, {{.*}}, align 1 +; CHECK: load i32, {{.*}}, align 1 +; CHECK: load i32, {{.*}}, align 1 + + %ptr = alloca [5 x i32], align 1 + %ptr.8 = bitcast [5 x i32]* %ptr to i8* + call void @populate(i8* %ptr.8) + %val = load [5 x i32], [5 x i32]* %ptr, align 1 + ret void +} + +define void @test9() { +; CHECK-LABEL: @test9( +; CHECK: load i32, {{.*}}, align 8 +; CHECK: load i32, {{.*}}, align 4 +; CHECK: load i32, {{.*}}, align 8 +; CHECK: load i32, {{.*}}, align 4 +; CHECK: load i32, {{.*}}, align 8 + + %ptr = alloca [5 x i32], align 8 + %ptr.8 = bitcast [5 x i32]* %ptr to i8* + call void @populate(i8* %ptr.8) + %val = load [5 x i32], [5 x i32]* %ptr, align 8 + ret void +} + +define void @test10() { +; CHECK-LABEL: @test10( +; CHECK: load i32, {{.*}}, align 2 +; CHECK: load i8, {{.*}}, align 2 +; CHECK: load i8, {{.*}}, align 1 +; CHECK: load i8, {{.*}}, align 2 +; CHECK: load i16, {{.*}}, align 2 + + %ptr = alloca {i32, i8, i8, {i8, i16}}, align 2 + %ptr.8 = bitcast {i32, i8, i8, {i8, i16}}* %ptr to i8* + call void @populate(i8* %ptr.8) + %val = load {i32, i8, i8, {i8, i16}}, {i32, i8, i8, {i8, i16}}* %ptr, align 2 + ret void +} + +declare void @populate(i8*)