Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -976,6 +976,14 @@ return false; } + /// Return true if the alloca arguments to CI should be aligned. If so then + /// AllocaSize is set to the minimum size the allocated object must be to be + /// aligned and AllocaAlign is set to the alignment the alloca is to be given. + virtual bool shouldAlignAllocaArgs(CallInst */*CI*/, unsigned &/*AllocaSize*/, + unsigned &/*AllocaAlign*/) const { + return false; + } + //===--------------------------------------------------------------------===// /// \name Helpers for TargetTransformInfo implementations /// @{ Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1228,6 +1228,40 @@ return true; } + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; + + // Align the alloca arguments to this call if the target thinks it's a good + // idea + unsigned AllocaSize = 0, AllocaAlign = 0; + if (TLI && TD && TLI->shouldAlignAllocaArgs(CI, AllocaSize, AllocaAlign)) { + assert(AllocaAlign != 0 && "shouldAlignAllocaArgs must set AllocaAlign"); + for (auto &Arg : CI->arg_operands()) { + // We want to align both direct allocas and allocas used in casts and + // GEPs, though it only makes sense for GEPs if the offset is a multiple + // of the desired alignment and if size - offset meets the size threshold. + if (!Arg->getType()->isPointerTy()) + continue; + APInt Offset(TD->getPointerSizeInBits( + cast(Arg->getType())->getAddressSpace()), 0); + Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset); + uint64_t Offset2 = Offset.getLimitedValue(); + AllocaInst *AI; + if ((Offset2 & (AllocaAlign-1)) == 0 && + (AI = dyn_cast(Val)) && + AI->getAlignment() < AllocaAlign && + TD->getTypeAllocSize(AI->getAllocatedType()) - Offset2 >= AllocaSize) + AI->setAlignment(AllocaAlign); + } + // If this is a memcpy (or similar) then we may have improved the alignment + if (MemIntrinsic *MI = dyn_cast(CI)) { + unsigned Align = getKnownAlignment(MI->getDest()); + if (MemTransferInst *MTI = dyn_cast(MI)) + Align = std::min(Align, getKnownAlignment(MTI->getSource())); + if (Align > MI->getAlignment()) + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + } + } + IntrinsicInst *II = dyn_cast(CI); if (II) { switch (II->getIntrinsicID()) { @@ -1288,7 +1322,6 @@ if (!CI->getCalledFunction()) return false; // We'll need DataLayout from here on out. - const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; if (!TD) return false; // Lower all default uses of _chk calls. This is very similar Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -362,6 +362,9 @@ return true; } + bool shouldAlignAllocaArgs(CallInst *CI, unsigned &AllocaSize, + unsigned &AllocaAlign) const override; + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1165,6 +1165,17 @@ return TargetLowering::getRegClassFor(VT); } +// Arrays (or other objects) whose address leaks into another function may end +// up being memcpy'd there. memcpy typically tries to use LDM/STM if the +// source/dest is aligned and the copy size is large enough. We therefore want +// to align such objects. +bool ARMTargetLowering::shouldAlignAllocaArgs(CallInst *, unsigned &AllocaSize, + unsigned &AllocaAlign) const { + AllocaSize = 8; + AllocaAlign = 4; + return true; +} + // Create a fast isel object. FastISel * ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, Index: test/CodeGen/ARM/stack-object-align.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stack-object-align.ll @@ -0,0 +1,99 @@ +; RUN: llc -mtriple=arm-eabi < %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7-eabi < %s -o - | FileCheck %s + +; Expect that small arrays are not aligned when passed to a function +define void @test1() { +entry: + %arr1 = alloca [3 x i8], align 1 + %arr2 = alloca [3 x i8], align 1 + +; CHECK: add{{(\.w)?}} r0, sp, #5 + %arraydecay = getelementptr inbounds [3 x i8], [3 x i8]* %arr1, i32 0, i32 0 +; CHECK: bl takeptr + call void @takeptr(i8* %arraydecay) + +; CHECK: add{{(\.w)?}} r0, sp, #2 + %arraydecay1 = getelementptr inbounds [3 x i8], [3 x i8]* %arr2, i32 0, i32 0 +; CHECK: bl takeptr + call void @takeptr(i8* %arraydecay1) + + ret void +} + +; Expect that larger arrays are aligned when passed to a function +define void @test2() { +entry: + %arr1 = alloca [9 x i8], align 1 + %arr2 = alloca [9 x i8], align 1 + +; CHECK: add{{(\.w)?}} r0, sp, #12 + %arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 0 +; CHECK: bl takeptr + call void @takeptr(i8* %arraydecay) + +; CHECK: mov r0, sp + %arraydecay1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 0 +; CHECK: bl takeptr + call void @takeptr(i8* %arraydecay1) + + ret void +} + +; Expect that larger arrays only accessed through array access are not aligned +define void @test3() { +entry: + %arr1 = alloca [9 x i8], align 1 + %arr2 = alloca [9 x i8], align 1 + +; CHECK: strb{{(\.w)?}} {{r[0-9]+}}, [sp, #11] + %arrayidx = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 0 + store i8 1, i8* %arrayidx, align 1 + +; CHECK: strb{{(\.w)?}} {{r[0-9]+}}, [sp, #2] + %arrayidx1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 0 + store i8 1, i8* %arrayidx1, align 1 + + ret void +} + +; Expect that when an element of a larger array is passed to a function the array is aligned +; if the offset is a multiple of 4 +define void @test4() { +entry: + %arr1 = alloca [13 x i8], align 1 + %arr2 = alloca [13 x i8], align 1 + +; CHECK: add{{(\.w)?}} r0, sp, #16 + %arrayelem = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 4 +; CHECK: bl takeptr + call void @takeptr(i8* %arrayelem) + +; CHECK: mov r0, sp + %arrayelem1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 4 +; CHECK: bl takeptr + call void @takeptr(i8* %arrayelem1) + + ret void +} + +; Expect that when an element of a larger array is passed to a function the array is not +; aligned if the offset is a not multiple of 4 +define void @test5() { +entry: + %arr1 = alloca [13 x i8], align 1 + %arr2 = alloca [13 x i8], align 1 + +; CHECK: add{{(\.w)?}} r0, sp, #19 + %arrayelem = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 3 +; CHECK: bl takeptr + call void @takeptr(i8* %arrayelem) + +; CHECK: add{{(\.w)?}} r0, sp, #6 + %arrayelem1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 3 +; CHECK: bl takeptr + call void @takeptr(i8* %arrayelem1) + + ret void +} + +declare void @takeptr(i8*)