Index: lib/Target/ARM/ARM.h =================================================================== --- lib/Target/ARM/ARM.h +++ lib/Target/ARM/ARM.h @@ -39,6 +39,7 @@ FunctionPass *createThumb2ITBlockPass(); FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass(); +FunctionPass *createARMAlignAllocaPass(const ARMBaseTargetMachine &TM); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); Index: lib/Target/ARM/ARMAlignAllocaPass.cpp =================================================================== --- /dev/null +++ lib/Target/ARM/ARMAlignAllocaPass.cpp @@ -0,0 +1,77 @@ +//===-- ARMAlignAllocaPass.cpp - Align Alloca instructions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Arrays (or other objects) whose address leaks into another function may end +// up being memcpy'd there. memcpy typically tries to use LDM/STM if the +// source/dest is aligned and the copy size is large enough. We therefore want +// to align such objects. This pass looks for alloca instructions that fit this +// criteria and aligns them. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMTargetMachine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; + +namespace { + class ARMAlignAlloca : public FunctionPass, + public InstVisitor { + static char ID; + const DataLayout *DL; + public: + ARMAlignAlloca(const ARMBaseTargetMachine &TM) : FunctionPass(ID), DL(TM.getDataLayout()) { } + bool runOnFunction(Function &F) override; + const char *getPassName() const override { return "ARM Align Alloca"; } + void visitCallInst(CallInst &I); + void tryAlignAlloca(Value *Val); + }; +} + +char ARMAlignAlloca::ID = 0; + +bool ARMAlignAlloca::runOnFunction(Function &F) { + visit(F); + return false; +} + +// If this is an AllocaInst, and the allocated object is large enough that we +// think memcpy would want to use LDM/STM, then align it. +void ARMAlignAlloca::tryAlignAlloca(Value *Val) { + if (AllocaInst *AI = dyn_cast(Val)) + if (AI->getAlignment() < 4 && + DL->getTypeAllocSize(AI->getAllocatedType()) >= 8) + AI->setAlignment(4); +} + +void ARMAlignAlloca::visitCallInst(CallInst &CI) { + // Align any allocas used as arguments, either directly as argument operands + // or as operands to those arguments. + for (auto &Arg : CI.arg_operands()) { + tryAlignAlloca(Arg.get()); + if (User *U = dyn_cast(Arg.get())) + for (auto *Val : U->operand_values()) + tryAlignAlloca(Val); + } + // If this is a memcpy (or similar) then we may have improved the alignment + if (MemIntrinsic *MI = dyn_cast(&CI)) { + unsigned Align = getKnownAlignment(MI->getDest()); + if (MemTransferInst *MTI = dyn_cast(MI)) + Align = std::min(Align, getKnownAlignment(MTI->getSource())); + if (Align > MI->getAlignment()) + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + } +} + +FunctionPass *llvm::createARMAlignAllocaPass(const ARMBaseTargetMachine &TM) { + return new ARMAlignAlloca(TM); +} Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -295,6 +295,7 @@ } void addIRPasses() override; + void addCodeGenPrepare() override; bool addPreISel() override; bool addInstSelector() override; void addPreRegAlloc() override; @@ -324,6 +325,11 @@ TargetPassConfig::addIRPasses(); } +void ARMPassConfig::addCodeGenPrepare() { + addPass(createARMAlignAllocaPass(getARMTargetMachine())); + TargetPassConfig::addCodeGenPrepare(); +} + bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) // FIXME: This is using the thumb1 only constant value for Index: lib/Target/ARM/CMakeLists.txt =================================================================== --- lib/Target/ARM/CMakeLists.txt +++ lib/Target/ARM/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_target(ARMCodeGen A15SDOptimizer.cpp + ARMAlignAllocaPass.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp Index: test/CodeGen/ARM/stack-object-align.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stack-object-align.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=arm-eabi < %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7-eabi < %s -o - | FileCheck %s + +; Expect that small arrays are not aligned when passed to a function +define void @test1() { +entry: + %arr1 = alloca [3 x i8], align 1 + %arr2 = alloca [3 x i8], align 1 + +; CHECK: add{{(\.w)?}} r0, sp, #5 + %arraydecay = getelementptr inbounds [3 x i8], [3 x i8]* %arr1, i32 0, i32 0 +; CHECK: bl takeptr + call void @takeptr(i8* %arraydecay) + +; CHECK: add{{(\.w)?}} r0, sp, #2 + %arraydecay1 = getelementptr inbounds [3 x i8], [3 x i8]* %arr2, i32 0, i32 0 +; CHECK: bl takeptr + call void @takeptr(i8* %arraydecay1) + + ret void +} + +; Expect that larger arrays are aligned when passed to a function +define void @test2() { +entry: + %arr1 = alloca [9 x i8], align 1 + %arr2 = alloca [9 x i8], align 1 + +; CHECK: add{{(\.w)?}} r0, sp, #12 + %arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 0 +; CHECK: bl takeptr + call void @takeptr(i8* %arraydecay) + +; CHECK: mov r0, sp + %arraydecay1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 0 +; CHECK: bl takeptr + call void @takeptr(i8* %arraydecay1) + + ret void +} + +; Expect that larger arrays only accessed through array access are not aligned +define void @test3() { +entry: + %arr1 = alloca [9 x i8], align 1 + %arr2 = alloca [9 x i8], align 1 + +; CHECK: strb{{(\.w)?}} {{r[0-9]+}}, [sp, #11] + %arrayidx = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 0 + store i8 1, i8* %arrayidx, align 1 + +; CHECK: strb{{(\.w)?}} {{r[0-9]+}}, [sp, #2] + %arrayidx1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 0 + store i8 1, i8* %arrayidx1, align 1 + + ret void +} + +declare void @takeptr(i8*)