Index: lib/Target/ARM/ARM.h =================================================================== --- lib/Target/ARM/ARM.h +++ lib/Target/ARM/ARM.h @@ -61,6 +61,7 @@ void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); +void initializeThumb2SizeReducePass(PassRegistry &); } // end namespace llvm Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -92,6 +92,7 @@ initializeARMConstantIslandsPass(Registry); initializeARMExecutionDepsFixPass(Registry); initializeARMExpandPseudoPass(Registry); + initializeThumb2SizeReducePass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { Index: lib/Target/ARM/Thumb2SizeReduction.cpp =================================================================== --- lib/Target/ARM/Thumb2SizeReduction.cpp +++ lib/Target/ARM/Thumb2SizeReduction.cpp @@ -45,6 +45,7 @@ using namespace llvm; #define DEBUG_TYPE "t2-reduce-size" +#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass" STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); @@ -162,7 +163,7 @@ const Thumb2InstrInfo *TII; const ARMSubtarget *STI; - Thumb2SizeReduce(std::function Ftor); + Thumb2SizeReduce(std::function Ftor = nullptr); bool runOnMachineFunction(MachineFunction &MF) override; @@ -172,7 +173,7 @@ } StringRef getPassName() const override { - return "Thumb2 instruction size reduction pass"; + return THUMB2_SIZE_REDUCE_NAME; } private: @@ -237,6 +238,9 @@ } // end anonymous namespace +INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false, + false) + Thumb2SizeReduce::Thumb2SizeReduce(std::function Ftor) : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { OptimizeSize = MinimizeSize = false; Index: test/CodeGen/Thumb2/t2sizereduction.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/t2sizereduction.mir @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=t2-reduce-size %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8m.main-arm-none-eabi" + + ; Function Attrs: norecurse nounwind readnone + define i32 @test(i32 %x, i32 %y) local_unnamed_addr #0 { + entry: + %cmp6 = icmp sgt i32 %y, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 1, %entry ], [ %mul, %for.body ] + ret i32 %sum.0.lcssa + + for.body: ; preds = %for.body, %for.body.preheader + %lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body ], [ %x, %for.body.preheader ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %y, %for.body.preheader ] + %sum.07 = phi i32 [ %mul, %for.body ], [ 1, %for.body.preheader ] + %mul = mul nsw i32 %lsr.iv1, %sum.07 + %lsr.iv.next = add i32 %lsr.iv, -1 + %lsr.iv.next2 = add i32 %lsr.iv1, 1 + %exitcond = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + } + + attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m7" "target-features"="+d16,+dsp,+fp-armv8,+fp-only-sp,+hwdiv,+strict-align,+thumb-mode,-crc,-dotprod,-hwdiv-arm,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" } + +... +--- +name: test +tracksRegLiveness: true +liveins: + - { reg: '%r0', virtual-reg: '' } + - { reg: '%r1', virtual-reg: '' } +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: %r0, %r1 + ; CHECK: %r2 = tMOVr %r0, 14, %noreg + ; CHECK: %r0, dead %cpsr = tMOVi8 1, 14, %noreg + ; CHECK: tCMPi8 %r1, 1, 14, %noreg, implicit-def %cpsr + ; CHECK: t2Bcc %bb.2, 11, killed %cpsr + ; CHECK: bb.1.for.body: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: %r0, %r1, %r2 + ; CHECK: %r0, dead %cpsr = tMUL %r2, killed %r0, 14, %noreg + ; CHECK: %r2, dead %cpsr = tADDi8 killed %r2, 1, 14, %noreg + ; CHECK: %r1, %cpsr = tSUBi8 killed %r1, 1, 14, %noreg + ; CHECK: t2Bcc %bb.1, 1, killed %cpsr + ; CHECK: bb.2.for.cond.cleanup: + ; CHECK: liveins: %r0 + ; CHECK: tBX_RET 14, %noreg, implicit %r0 + bb.0.entry: + successors: %bb.1.for.body, %bb.2.for.cond.cleanup + liveins: %r0, %r1 + + %r2 = tMOVr %r0, 14, _ + %r0 = t2MOVi 1, 14, _, _ + t2CMPri %r1, 1, 14, _, implicit-def %cpsr + t2Bcc %bb.2.for.cond.cleanup, 11, killed %cpsr + + bb.1.for.body: + successors: %bb.2.for.cond.cleanup, %bb.1.for.body + liveins: %r0, %r1, %r2 + + %r0 = t2MUL %r2, killed %r0, 14, _ + %r2 = t2ADDri killed %r2, 1, 14, _, _ + %r1 = t2SUBri killed %r1, 1, 14, _, def %cpsr + t2Bcc %bb.1.for.body, 1, killed %cpsr + + bb.2.for.cond.cleanup: + liveins: %r0 + + tBX_RET 14, _, implicit %r0 + +...