Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -360,13 +360,15 @@ int BaseOpc = isThumb2 ? ARM::t2ADDri : + (isThumb1 && Offset < 8) ? ARM::tADDi3 : isThumb1 ? ARM::tADDi8 : ARM::ADDri; if (Offset < 0) { + Offset = - Offset; BaseOpc = isThumb2 ? ARM::t2SUBri : + (isThumb1 && Offset < 8) ? ARM::tSUBi3 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; - Offset = - Offset; } if (!TL->isLegalAddImmediate(Offset)) @@ -374,22 +376,28 @@ return false; // Probably not worth it then. if (isThumb1) { - if (Base != NewBase) { + // Thumb1: depending on immediate size, use either + // ADD NewBase, Base, #imm3 + // or + // MOV NewBase, Base + // ADD NewBase, #imm8. + if (Base != NewBase && Offset >= 8) { // Need to insert a MOV to the new base first. - // FIXME: If the immediate fits in 3 bits, use ADD instead. BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); + // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. + Base = NewBase; + BaseKill = false; } AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) - .addReg(NewBase, getKillRegState(true)).addImm(Offset) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg); } else { BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg).addReg(0); } - Base = NewBase; BaseKill = true; // New base is always killed straight away. } Index: test/CodeGen/Thumb/ldm-stm-base-materialization.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/ldm-stm-base-materialization.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-none--eabi" + +@a = external global i32* +@b = external global i32* + +; Function Attrs: nounwind +define void @foo() #0 { +entry: +; CHECK-LABEL: foo: +; CHECK: ldr r[[SB:[0-9]]], .LCPI +; CHECK: ldr r[[LB:[0-9]]], .LCPI +; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4 +; CHECK-NEXT: ldm r[[NLB]], +; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4 +; CHECK-NEXT: stm r[[NSB]] + %0 = load i32** @a, align 4 + %arrayidx = getelementptr inbounds i32* %0, i32 1 + %1 = bitcast i32* %arrayidx to i8* + %2 = load i32** @b, align 4 + %arrayidx1 = getelementptr inbounds i32* %2, i32 1 + %3 = bitcast i32* %arrayidx1 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 Index: test/CodeGen/Thumb/thumb-ldm.ll =================================================================== --- test/CodeGen/Thumb/thumb-ldm.ll +++ test/CodeGen/Thumb/thumb-ldm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s @X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] Index: test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll =================================================================== --- test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll +++ test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s @d = external global [64 x i32] @s = external global [64 x i32]