Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1191,6 +1191,12 @@ SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); + // Only multiples of 4 are allowed for the offset, so the frame object + // alignment must be at least 4. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) { + MFI->setObjectAlignment(FI, 4); + } Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -1208,6 +1214,12 @@ Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); + // For LHS+RHS to result in an offset that's a multiple of 4 the object + // indexed by the LHS must be 4-byte aligned. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) { + MFI->setObjectAlignment(FI, 4); + } Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); @@ -2496,6 +2508,12 @@ int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); if (Subtarget->isThumb1Only()) { + // Set the alignment of the frame object to 4, to avoid having to generate + // more than one ADD + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) { + MFI->setObjectAlignment(FI, 4); + } return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, CurDAG->getTargetConstant(0, MVT::i32)); } else { Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== --- lib/Target/ARM/ARMInstrThumb.td +++ lib/Target/ARM/ARMInstrThumb.td @@ -1394,6 +1394,12 @@ def : T1Pat<(zextloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +// extload from the stack -> word load from the stack, as it avoids having to +// materialize the base in a separate register. +def : T1Pat<(extloadi1 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>; + // extload -> zextload def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; Index: test/CodeGen/Thumb/stack-access.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/stack-access.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=thumb-eabi < %s -o - | FileCheck %s + +; Check that stack addresses are generated using a single ADD +define void @test1(i8** %p) { + %x = alloca i8, align 1 + %y = alloca i8, align 1 + %z = alloca i8, align 1 +; CHECK: add r1, sp, #8 +; CHECK: str r1, [r0] + store i8* %x, i8** %p, align 4 +; CHECK: add r1, sp, #4 +; CHECK: str r1, [r0] + store i8* %y, i8** %p, align 4 +; CHECK: mov r1, sp +; CHECK: str r1, [r0] + store i8* %z, i8** %p, align 4 + ret void +} + +; Stack offsets larger than 1020 still need two ADDs +define void @test2([1024 x i8]** %p) { + %arr1 = alloca [1024 x i8], align 1 + %arr2 = alloca [1024 x i8], align 1 +; CHECK: add r1, sp, #1020 +; CHECK: adds r1, #4 +; CHECK: str r1, [r0] + store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4 +; CHECK: mov r1, sp +; CHECK: str r1, [r0] + store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4 + ret void +}