diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -518,9 +518,12 @@ addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { - // in v8, IfConversion depends on Thumb instruction widths + // When optimising for size, always run the Thumb2SizeReduction pass before + // IfConversion. Otherwise, check whether IT blocks are restricted + // (e.g. in v8, IfConversion depends on Thumb instruction widths) addPass(createThumb2SizeReductionPass([this](const Function &F) { - return this->TM->getSubtarget(F).restrictIT(); + return this->TM->getSubtarget(F).hasMinSize() || + this->TM->getSubtarget(F).restrictIT(); })); addPass(createIfConverter([](const MachineFunction &MF) { diff --git a/llvm/test/CodeGen/ARM/t2-shrink-ldrpost.ll b/llvm/test/CodeGen/ARM/t2-shrink-ldrpost.ll --- a/llvm/test/CodeGen/ARM/t2-shrink-ldrpost.ll +++ b/llvm/test/CodeGen/ARM/t2-shrink-ldrpost.ll @@ -1,13 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" target triple = "thumbv7m--linux-gnu" -; CHECK-LABEL: f: -; CHECK: ldm r{{[0-9]}}!, {r[[x:[0-9]]]} -; CHECK: add.w r[[x]], r[[x]], #3 -; CHECK: stm r{{[0-9]}}!, {r[[x]]} +; NOTE: When optimising for minimum size, an LDM is expected to be generated define void @f(i32 %n, i32* nocapture %a, i32* nocapture readonly %b) optsize minsize { +; CHECK-LABEL: f: +; CHECK: @ %bb.0: +; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: blt .LBB0_2 +; CHECK-NEXT: .LBB0_1: @ %.lr.ph +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldm r2!, {r3} +; CHECK-NEXT: adds r3, #3 +; CHECK-NEXT: stm r1!, {r3} +; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: .LBB0_2: @ %._crit_edge +; CHECK-NEXT: bx lr %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -28,9 +39,22 @@ ret void } -; CHECK-LABEL: f_nominsize: -; CHECK-NOT: ldm +; NOTE: When not optimising for minimum size, an LDM is expected not to be generated define void @f_nominsize(i32 %n, i32* nocapture %a, i32* nocapture readonly %b) optsize { +; CHECK-LABEL: f_nominsize: +; CHECK: @ %bb.0: +; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: bxlt lr +; CHECK-NEXT: .LBB1_1: @ %.lr.ph +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr r3, [r2], #4 +; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: add.w r3, r3, #3 +; CHECK-NEXT: str r3, [r1], #4 +; CHECK-NEXT: bne .LBB1_1 +; CHECK-NEXT: @ %bb.2: @ %._crit_edge +; CHECK-NEXT: bx lr %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge diff --git a/llvm/test/CodeGen/Thumb2/constant-hoisting.ll b/llvm/test/CodeGen/Thumb2/constant-hoisting.ll --- a/llvm/test/CodeGen/Thumb2/constant-hoisting.ll +++ b/llvm/test/CodeGen/Thumb2/constant-hoisting.ll @@ -37,25 +37,26 @@ ; CHECK-V7M: mov r2, r0 ; CHECK-V7M-NEXT: ldr r0, .LCPI0_0 ; CHECK-V7M-NEXT: cmp r2, #50 -; CHECK-V7M-NEXT: beq .LBB0_3 +; CHECK-V7M-NEXT: beq .LBB0_5 ; CHECK-V7M-NEXT: cmp r2, #1 -; CHECK-V7M-NEXT: ittt eq -; CHECK-V7M-NEXT: addeq r0, r1 -; CHECK-V7M-NEXT: addeq r0, #1 -; CHECK-V7M-NEXT: bxeq lr +; CHECK-V7M-NEXT: beq .LBB0_7 ; CHECK-V7M-NEXT: cmp r2, #30 -; CHECK-V7M-NEXT: ittt eq -; CHECK-V7M-NEXT: addeq r0, r1 -; CHECK-V7M-NEXT: addeq r0, #2 -; CHECK-V7M-NEXT: bxeq lr -; CHECK-V7M-NEXT: cbnz r2, .LBB0_4 -; CHECK-V7M-NEXT: .LBB0_2: +; CHECK-V7M-NEXT: beq .LBB0_8 +; CHECK-V7M-NEXT: cbnz r2, .LBB0_6 ; CHECK-V7M-NEXT: add r0, r1 ; CHECK-V7M-NEXT: bx lr -; CHECK-V7M-NEXT: .LBB0_3: +; CHECK-V7M-NEXT: .LBB0_5: ; CHECK-V7M-NEXT: add r0, r1 ; CHECK-V7M-NEXT: adds r0, #4 -; CHECK-V7M-NEXT: .LBB0_4: +; CHECK-V7M-NEXT: .LBB0_6: +; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .LBB0_7: +; CHECK-V7M-NEXT: add r0, r1 +; CHECK-V7M-NEXT: adds r0, #1 +; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .LBB0_8: +; CHECK-V7M-NEXT: add r0, r1 +; CHECK-V7M-NEXT: adds r0, #2 ; CHECK-V7M-NEXT: bx lr ; CHECK-V7M-NEXT: .p2align 2 ; CHECK-V7M-NEXT: .LCPI0_0: