diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -778,6 +778,19 @@ return false; } + /// Return the increase in code size needed to predicate a contiguous run of + /// NumInsts instructions. + virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + return 0; + } + + /// Return an estimate for the code size reduction (in bytes) which will be + /// caused by removing the given branch instruction during if-conversion. + virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const { + return getInstSizeInBytes(MI); + } + /// Return true if it's profitable to unpredicate /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually /// exclusive predicates. diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -285,14 +285,113 @@ Prediction); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, - unsigned TCycle, unsigned TExtra, - MachineBasicBlock &FBB, - unsigned FCycle, unsigned FExtra, - BranchProbability Prediction) const { - return TCycle > 0 && FCycle > 0 && - TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction); + bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo, + MachineBasicBlock &CommBB, unsigned Dups, + BranchProbability Prediction, bool Forked) const { + const MachineFunction &MF = *TBBInfo.BB->getParent(); + if (MF.getFunction().hasMinSize()) { + MachineBasicBlock::iterator TIB = TBBInfo.BB->begin(); + MachineBasicBlock::iterator FIB = FBBInfo.BB->begin(); + MachineBasicBlock::iterator TIE = TBBInfo.BB->end(); + MachineBasicBlock::iterator FIE = FBBInfo.BB->end(); + + unsigned Dups1, Dups2; + if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TBBInfo.BB, *FBBInfo.BB, + /*SkipUnconditionalBranches*/ true)) + llvm_unreachable("should already have been checked by ValidDiamond"); + + unsigned BranchBytes = 0; + unsigned CommonBytes = 0; + + // Count common instructions at the start of the true and false blocks. + for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + + // Count instructions at the end of the true and false blocks, after + // the ones we plan to predicate. Analyzable branches will be removed + // (unless this is a forked diamond), and all other instructions are + // common between the two blocks. + for (auto &I : make_range(TIE, TBBInfo.BB->end())) { + if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : make_range(FIE, FBBInfo.BB->end())) { + if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : CommBB.terminators()) { + if (I.isBranch()) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } + } + + // The common instructions in one branch will be eliminated, halving + // their code size. + CommonBytes /= 2; + + // Count the instructions which we need to predicate. + unsigned NumPredicatedInstructions = 0; + for (auto &I : make_range(TIB, TIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + for (auto &I : make_range(FIB, FIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + + // Even though we're optimising for size at the expense of performance, + // avoid creating really long predicated blocks. + if (NumPredicatedInstructions > 15) + return false; + + // Some targets (e.g. Thumb2) need to insert extra instructions to + // start predicated blocks. + unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions( + MF, NumPredicatedInstructions); + + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes + << ", CommonBytes=" << CommonBytes + << ", NumPredicatedInstructions=" + << NumPredicatedInstructions + << ", ExtraPredicateBytes=" << ExtraPredicateBytes + << ")\n"); + return (BranchBytes + CommonBytes) > ExtraPredicateBytes; + } else { + unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups; + unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups; + bool Res = TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt( + *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB, + FCycle, FBBInfo.ExtraCost2, Prediction); + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle + << ", FCycle=" << FCycle + << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra=" + << FBBInfo.ExtraCost2 << ") = " << Res << "\n"); + return Res; + } } /// Returns true if Block ends without a terminator. @@ -842,6 +941,8 @@ TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; @@ -899,6 +1000,8 @@ TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; // The size is used to decide whether to if-convert, and the shared portions @@ -1186,13 +1289,9 @@ if (CanRevCond) { BBInfo TrueBBICalc, FalseBBICalc; - auto feasibleDiamond = [&]() { - bool MeetsSize = MeetIfcvtSizeLimit( - *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + - TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, - *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + - FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, - Prediction); + auto feasibleDiamond = [&](bool Forked) { + bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB, + Dups + Dups2, Prediction, Forked); bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, /* IsTriangle */ false, /* RevCond */ false, /* hasCommonTail */ true); @@ -1204,7 +1303,7 @@ if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(false)) { // Diamond: // EBB // / \_ @@ -1220,7 +1319,7 @@ } } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(true)) { // ForkedDiamond: // if TBB and FBB have a common tail that includes their conditional // branch instructions, then we can If Convert this pattern. diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -276,6 +276,10 @@ return NumCycles == 1; } + unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const override; + unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override; + bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2079,6 +2079,38 @@ return PredCost <= UnpredCost; } +unsigned +ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. + // ARM has a condition code field in every predicable instruction, using it + // doesn't change code size. + return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0; +} + +unsigned +ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { + // If this branch is likely to be folded into the comparison to form a + // CB(N)Z, then removing it won't reduce code size at all, because that will + // just replace the CB(N)Z with a CMP. + if (MI.getOpcode() == ARM::t2Bcc && + findCMPToFoldIntoCBZ(&MI, &getRegisterInfo())) + return 0; + + unsigned Size = getInstSizeInBytes(MI); + + // For Thumb2, all branches are 32-bit instructions during the if conversion + // pass, but may be replaced with 16-bit instructions during size reduction. + // Since the branches considered by if conversion tend to be forward branches + // over small basic blocks, they are very likely to be in range for the + // narrow instructions, so we assume the final code size will be half what it + // currently is. + if (Subtarget.isThumb2()) + Size /= 2; + + return Size; +} + bool ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const { diff --git a/llvm/test/CodeGen/ARM/ifcvt-size.mir b/llvm/test/CodeGen/ARM/ifcvt-size.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ifcvt-size.mir @@ -0,0 +1,559 @@ +# RUN: llc %s -o - -run-pass=if-converter -debug-only=if-converter 2>%t| FileCheck %s +# RUN: FileCheck %s < %t --check-prefix=DEBUG +# REQUIRES: asserts + +# When optimising for size, we use a different set of heuristics for +# if-conversion, which take into account the size of the instructions, not the +# time taken to execute them. This is more complicated for Thumb, where it if +# also affected by selection of narrow branch instructions, insertion if IT +# instructions, and selection of the CB(N)Z instructions. + +--- | + target triple = "thumbv7-unknown-linux-gnueabi" + + define void @fn1() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn2() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn3() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn4() minsize "target-features"="-thumb-mode" { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn5() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn6() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if2.then: + unreachable + if2.else: + unreachable + } + + define void @fn7() minsize "target-features"="-thumb-mode" { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn8() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + if.end: + unreachable + } + + define void @fn9() minsize { + entry: + unreachable + if.then: + unreachable + if.else: + unreachable + lab1: + unreachable + } +... +--- +name: fn1 +alignment: 1 +tracksRegLiveness: true + +# If-conversion is profitable here because it will remove two branches of 2 +# bytes each (assuming they can become narrow branches later), and will only +# add 2 bytes with the IT instruction. + +# CHECK-LABEL: name: fn1 +# CHECK: t2CMPri +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: t2MOVi + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn1' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + t2B %bb.3, 14, $noreg + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r1, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = t2MOVi 0, 14, $noreg, $noreg + t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn2 +alignment: 1 +tracksRegLiveness: true + +# If-conversion is not profitable here, because the 5 conditional instructions +# would require 2 IT instructions. + +# CHECK-LABEL: name: fn2 +# CHECK: t2CMPri +# CHECK-NEXT: t2Bcc + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn2' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=4) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + t2B %bb.3, 14, $noreg + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r1, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = t2MOVi 0, 14, $noreg, $noreg + t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn3 +alignment: 1 +tracksRegLiveness: true + +# Here, the true and false blocks both end in a tBX_RET instruction. One of +# these will be removed, saving 2 bytes, and the remaining one isn't +# conditional, so doesn't push us over the limit of 4 instructions in an IT +# block. + +# CHECK-LABEL: name: fn3 +# CHECK: t2CMPri +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: tBX_RET + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn3' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + + bb.2.if.else: + liveins: $r1, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn4 +alignment: 1 +tracksRegLiveness: true + +# This is the same as fn2, but compiled for ARM, which doesn't need IT +# instructions, so if-conversion is profitable. + +# CHECK-LABEL: name: fn4 +# CHECK: CMPri +# CHECK-NEXT: LDRi12 +# CHECK-NEXT: LDRi12 +# CHECK-NEXT: LDRSH +# CHECK-NEXT: LDRi12 +# CHECK-NEXT: LDRi12 +# CHECK-NEXT: MOVi + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn4' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=8, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=0) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r1, $r3 + + renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = MOVi 0, 14, $noreg, $noreg + STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + BX_RET 14, $noreg, implicit $r0 + +--- +name: fn5 +alignment: 1 +tracksRegLiveness: true + +# Here, the compare and conditional branch can be turned into a CBZ, so we +# don't want to if-convert. + +# CHECK-LABEL: name: fn5 +# CHECK: t2CMPri +# CHECK: t2Bcc + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn5' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=0, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $r0, $r1, $r2 + + t2CMPri killed renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.1.if.then: + liveins: $r0 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + + bb.2.if.else: + liveins: $r1 + + renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn6 +alignment: 1 +tracksRegLiveness: true + +# This is a forked-diamond pattern, we recognise that the conditional branches +# at the ends of the true and false blocks are the same, and can be shared. + +# CHECK-LABEL: name: fn6 +# CHECK: t2CMPri +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2CMPri +# CHECK-NEXT: t2Bcc + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn6' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=12, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 4, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x30000000), %bb.4(0x50000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.3.if2.then, 1, killed $cpsr + t2B %bb.4.if2.else, 14, $noreg + + bb.2.if.else: + successors: %bb.3(0x30000000), %bb.4(0x50000000) + liveins: $r0, $r1, $r3 + + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.3.if2.then, 1, killed $cpsr + t2B %bb.4.if2.else, 14, $noreg + + bb.3.if2.then: + liveins: $r0, $r1, $r3 + + t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + + bb.4.if2.else: + liveins: $r0 + + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn7 +alignment: 1 +tracksRegLiveness: true + +# When compiling for ARM, it would be good for code size to generate very long +# runs of conditional instructions, but we put an (arbitrary) limit on this to +# avoid generating code which is very bad for performance, and only saves a few +# bytes of code size. + +# CHECK-LABEL: name: fn7 +# CHECK: CMPri +# CHECK-NEXT: Bcc + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r1, $r3 + + renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg + renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = MOVi 0, 14, $noreg, $noreg + STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + BX_RET 14, $noreg, implicit $r0 + +--- +name: fn8 +alignment: 1 +tracksRegLiveness: true + +# The first t2LDRi12 instruction in each branch is the same, so one copy of it +# will be removed, and it doesn't need to be predicated, keeping us under the 4 +# instruction IT block limit. + +# CHECK-LABEL: name: fn8 +# CHECK: t2CMPri +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: t2MOVi + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn8' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=4, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2, $r3 + + t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 11, killed $cpsr + + bb.1.if.then: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 4, 14, $noreg + t2B %bb.3, 14, $noreg + + bb.2.if.else: + successors: %bb.3(0x80000000) + liveins: $r0, $r3 + + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + + bb.3.if.end: + liveins: $r0, $r3 + + renamable $r1 = t2MOVi 0, 14, $noreg, $noreg + t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 + +--- +name: fn9 +alignment: 2 +tracksRegLiveness: true + +# The INLINEASM_BR instructions aren't analyzable, but they are identical so we +# can still do diamond if-conversion. From a code-size POV, they are common +# instructions, so one will be removed, and they don't need an IT block slot. + +# CHECK-LABEL: name: fn9 +# CHECK: tCMPi8 +# CHECK-NEXT: tLDRi +# CHECK-NEXT: tLDRi +# CHECK-NEXT: tLDRi +# CHECK-NEXT: t2LDRSHi12 +# CHECK-NEXT: INLINEASM_BR + +# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn9' +# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=6, NumPredicatedInstructions=4, ExtraPredicateBytes=2) + +body: | + bb.0.entry: + successors: %bb.1(0x30000000), %bb.3(0x50000000) + liveins: $r0, $r1, $r2 + + tCMPi8 killed renamable $r2, 42, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.3, 1, killed $cpsr + + bb.1.if.then: + successors: %bb.5(0x7fffffff) + liveins: $r0 + + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg + INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) + + bb.3.if.else: + successors: %bb.5(0x7fffffff) + liveins: $r1 + + renamable $r0 = tLDRi killed renamable $r1, 0, 14, $noreg + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg + renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg + INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) + + bb.5.lab1 (address-taken): + liveins: $r0 + + renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg + tBX_RET 14, $noreg, implicit $r0 +...