Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -614,56 +614,6 @@ } } -static inline -unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) { - switch (Opcode) { - default: - llvm_unreachable("unhandled vctp opcode"); - break; - case ARM::MVE_VCTP8: - return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; - case ARM::MVE_VCTP16: - return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; - case ARM::MVE_VCTP32: - return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; - case ARM::MVE_VCTP64: - return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; - } - return 0; -} - -static inline unsigned getTailPredVectorWidth(unsigned Opcode) { - switch (Opcode) { - default: - llvm_unreachable("unhandled vctp opcode"); - case ARM::MVE_VCTP8: return 16; - case ARM::MVE_VCTP16: return 8; - case ARM::MVE_VCTP32: return 4; - case ARM::MVE_VCTP64: return 2; - } - return 0; -} - -static inline bool isVCTP(const MachineInstr *MI) { - switch (MI->getOpcode()) { - default: - break; - case ARM::MVE_VCTP8: - case ARM::MVE_VCTP16: - case ARM::MVE_VCTP32: - case ARM::MVE_VCTP64: - return true; - } - return false; -} - -static inline -bool isLoopStart(MachineInstr &MI) { - return MI.getOpcode() == ARM::t2DoLoopStart || - MI.getOpcode() == ARM::t2DoLoopStartTP || - MI.getOpcode() == ARM::t2WhileLoopStart; -} - static inline bool isCondBranchOpcode(int Opc) { return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc; Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -19,6 +19,7 @@ #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" +#include "MVETailPredUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -56,6 +56,7 @@ #include "ARMBaseRegisterInfo.h" #include "ARMBasicBlockInfo.h" #include "ARMSubtarget.h" +#include "MVETailPredUtils.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallSet.h" @@ -1321,33 +1322,16 @@ // another low register. void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(ARM::t2CMPri)); - MIB.add(MI->getOperand(0)); - MIB.addImm(0); - MIB.addImm(ARMCC::AL); - MIB.addReg(ARM::NoRegister); - MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc; - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); - MIB.add(MI->getOperand(1)); // branch target - MIB.addImm(ARMCC::EQ); // condition code - MIB.addReg(ARM::CPSR); - MI->eraseFromParent(); + RevertWhileLoopStart(MI, TII, BrOpc); } void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI); - MachineBasicBlock *MBB = MI->getParent(); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr)) - .add(MI->getOperand(0)) - .add(MI->getOperand(1)) - .add(predOps(ARMCC::AL)); - MI->eraseFromParent(); + RevertDoLoopStart(MI, TII); } bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { @@ -1365,21 +1349,7 @@ bool SetFlags = RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore); - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(ARM::t2SUBri)); - MIB.addDef(ARM::LR); - MIB.add(MI->getOperand(1)); - MIB.add(MI->getOperand(2)); - MIB.addImm(ARMCC::AL); - MIB.addReg(0); - - if (SetFlags) { - MIB.addReg(ARM::CPSR); - MIB->getOperand(5).setIsDef(true); - } else - MIB.addReg(0); - - MI->eraseFromParent(); + llvm::RevertLoopDec(MI, TII, SetFlags); return SetFlags; } @@ -1387,28 +1357,11 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI); - MachineBasicBlock *MBB = MI->getParent(); - // Create cmp - if (!SkipCmp) { - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(ARM::t2CMPri)); - MIB.addReg(ARM::LR); - MIB.addImm(0); - MIB.addImm(ARMCC::AL); - MIB.addReg(ARM::NoRegister); - } - MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc; - // Create bne - MachineInstrBuilder MIB = - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); - MIB.add(MI->getOperand(1)); // branch target - MIB.addImm(ARMCC::NE); // condition code - MIB.addReg(ARM::CPSR); - MI->eraseFromParent(); + llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp); } // Perform dead code elimation on the loop iteration count setup expression. Index: llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp =================================================================== --- llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp +++ llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp @@ -18,6 +18,7 @@ #include "ARM.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" +#include "MVETailPredUtils.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -58,6 +59,7 @@ } private: + bool RevertLoopWithCall(MachineLoop *ML); bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT); MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB, MachineInstr &Instr, @@ -156,6 +158,31 @@ return true; } +bool MVETPAndVPTOptimisations::RevertLoopWithCall(MachineLoop *ML) { + LLVM_DEBUG(dbgs() << "RevertLoopWithCall on loop " << ML->getHeader()->getName() + << "\n"); + + MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; + if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) + return false; + + // Check if there is an illegal instruction (a call) in the low overhead loop + // and if so revert it now before we get any further. + for (MachineBasicBlock *MBB : ML->blocks()) { + for (MachineInstr &MI : *MBB) { + if (MI.isCall()) { + LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI); + RevertDoLoopStart(LoopStart, TII); + RevertLoopDec(LoopDec, TII); + RevertLoopEnd(LoopEnd, TII); + return true; + } + } + } + + return false; +} + // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP // instruction, making the backend ARMLowOverheadLoops passes job of finding the @@ -661,7 +688,7 @@ const ARMSubtarget &STI = static_cast(Fn.getSubtarget()); - if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) + if (!STI.isThumb2() || !STI.hasLOB()) return false; TII = static_cast(STI.getInstrInfo()); @@ -673,8 +700,10 @@ << "********** Function: " << Fn.getName() << '\n'); bool Modified = false; - for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) + for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) { + Modified |= RevertLoopWithCall(ML); Modified |= ConvertTailPredLoop(ML, DT); + } for (MachineBasicBlock &MBB : Fn) { Modified |= ReplaceVCMPsByVPNOTs(MBB); Index: llvm/lib/Target/ARM/MVETailPredUtils.h =================================================================== --- /dev/null +++ llvm/lib/Target/ARM/MVETailPredUtils.h @@ -0,0 +1,157 @@ +//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for low overhead and tail predicated +// loops, shared between the ARMLowOverheadLoops pass and anywhere else that +// needs them. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H +#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +namespace llvm { + +static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) { + switch (Opcode) { + default: + llvm_unreachable("unhandled vctp opcode"); + break; + case ARM::MVE_VCTP8: + return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; + case ARM::MVE_VCTP16: + return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; + case ARM::MVE_VCTP32: + return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; + case ARM::MVE_VCTP64: + return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; + } + return 0; +} + +static inline unsigned getTailPredVectorWidth(unsigned Opcode) { + switch (Opcode) { + default: + llvm_unreachable("unhandled vctp opcode"); + case ARM::MVE_VCTP8: + return 16; + case ARM::MVE_VCTP16: + return 8; + case ARM::MVE_VCTP32: + return 4; + case ARM::MVE_VCTP64: + return 2; + } + return 0; +} + +static inline bool isVCTP(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + break; + case ARM::MVE_VCTP8: + case ARM::MVE_VCTP16: + case ARM::MVE_VCTP32: + case ARM::MVE_VCTP64: + return true; + } + return false; +} + +static inline bool isLoopStart(MachineInstr &MI) { + return MI.getOpcode() == ARM::t2DoLoopStart || + MI.getOpcode() == ARM::t2DoLoopStartTP || + MI.getOpcode() == ARM::t2WhileLoopStart; +} + +// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a +// beq that branches to the exit branch. +inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII, + unsigned BrOpc = ARM::t2Bcc) { + MachineBasicBlock *MBB = MI->getParent(); + + // Cmp + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri)); + MIB.add(MI->getOperand(0)); + MIB.addImm(0); + MIB.addImm(ARMCC::AL); + MIB.addReg(ARM::NoRegister); + + // Branch + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); + MIB.add(MI->getOperand(1)); // branch target + MIB.addImm(ARMCC::EQ); // condition code + MIB.addReg(ARM::CPSR); + + MI->eraseFromParent(); +} + +inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) { + MachineBasicBlock *MBB = MI->getParent(); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .add(predOps(ARMCC::AL)); + + MI->eraseFromParent(); +} + +inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, + bool SetFlags = false) { + MachineBasicBlock *MBB = MI->getParent(); + + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); + MIB.add(MI->getOperand(0)); + MIB.add(MI->getOperand(1)); + MIB.add(MI->getOperand(2)); + MIB.addImm(ARMCC::AL); + MIB.addReg(0); + + if (SetFlags) { + MIB.addReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + } else + MIB.addReg(0); + + MI->eraseFromParent(); +} + +// Generate a subs, or sub and cmp, and a branch instead of an LE. +inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, + unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) { + MachineBasicBlock *MBB = MI->getParent(); + + // Create cmp + if (!SkipCmp) { + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri)); + MIB.add(MI->getOperand(0)); + MIB.addImm(0); + MIB.addImm(ARMCC::AL); + MIB.addReg(ARM::NoRegister); + } + + // Create bne + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); + MIB.add(MI->getOperand(1)); // branch target + MIB.addImm(ARMCC::NE); // condition code + MIB.addReg(ARM::CPSR); + MI->eraseFromParent(); +} + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir @@ -272,7 +272,7 @@ ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) - ; CHECK: dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = tMOVr $r7, 14 /* CC::al */, $noreg ; CHECK: $r4 = tMOVr $r5, 14 /* CC::al */, $noreg Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir @@ -258,7 +258,7 @@ ; CHECK: renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) - ; CHECK: dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr + ; CHECK: dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) ; CHECK: $r8 = tMOVr $r5, 14 /* CC::al */, $noreg Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir @@ -296,7 +296,7 @@ ; CHECK: renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep1) - ; CHECK: t2CMPri killed $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2CMPri killed renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.4, 1 /* CC::ne */, killed $cpsr ; CHECK: tB %bb.5, 14 /* CC::al */, $noreg ; CHECK: bb.5.bb13: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir @@ -8,7 +8,7 @@ # CHECK: tBcc %bb.4, 0 # CHECK: tB %bb.2 # CHECK: bb.3.while.body: -# CHECK: t2CMPri $lr, 0, 14 +# CHECK: t2CMPri renamable $lr, 0, 14 # CHECK: tBcc %bb.3, 1 # CHECK: tB %bb.4 # CHECK: bb.4.while.end: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir @@ -0,0 +1,145 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+lob -run-pass=arm-mve-vpt-opts --verify-machineinstrs %s -o - | FileCheck %s + +--- | + @d = local_unnamed_addr global i32 0, align 4 + @c = local_unnamed_addr global [1 x i32] zeroinitializer, align 4 + + define i32 @e() optsize { + entry: + %.pr = load i32, i32* @d, align 4 + %cmp13 = icmp sgt i32 %.pr, -1 + br i1 %cmp13, label %for.cond1.preheader.preheader, label %for.end9 + + for.cond1.preheader.preheader: ; preds = %entry + %0 = add i32 %.pr, 1 + %1 = call i32 @llvm.start.loop.iterations.i32(i32 %0) + br label %for.cond1.preheader + + for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader + %2 = phi i32 [ %1, %for.cond1.preheader.preheader ], [ %3, %for.cond1.preheader ] + call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(24) bitcast ([1 x i32]* @c to i8*), i8 0, i32 24, i1 false) + %3 = call i32 @llvm.loop.decrement.reg.i32(i32 %2, i32 1) + %4 = icmp ne i32 %3, 0 + br i1 %4, label %for.cond1.preheader, label %for.cond.for.end9_crit_edge + + for.cond.for.end9_crit_edge: ; preds = %for.cond1.preheader + store i32 -1, i32* @d, align 4 + br label %for.end9 + + for.end9: ; preds = %for.cond.for.end9_crit_edge, %entry + ret i32 undef + } + + declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) + declare i32 @llvm.start.loop.iterations.i32(i32) + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) + +... +--- +name: e +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gprnopc, preferred-register: '' } + - { id: 1, class: gpr, preferred-register: '' } + - { id: 2, class: gprlr, preferred-register: '' } + - { id: 3, class: gpr, preferred-register: '' } + - { id: 4, class: rgpr, preferred-register: '' } + - { id: 5, class: rgpr, preferred-register: '' } + - { id: 6, class: gprlr, preferred-register: '' } + - { id: 7, class: rgpr, preferred-register: '' } + - { id: 8, class: rgpr, preferred-register: '' } + - { id: 9, class: gprlr, preferred-register: '' } + - { id: 10, class: gprlr, preferred-register: '' } + - { id: 11, class: rgpr, preferred-register: '' } + - { id: 12, class: rgpr, preferred-register: '' } + - { id: 13, class: gpr, preferred-register: '' } +liveins: [] +body: | + ; CHECK-LABEL: name: e + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x50000000), %bb.4(0x30000000) + ; CHECK: [[t2MOVi32imm:%[0-9]+]]:rgpr = t2MOVi32imm @d + ; CHECK: [[t2LDRi12_:%[0-9]+]]:gprnopc = t2LDRi12 [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: t2CMPri [[t2LDRi12_]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2Bcc %bb.4, 4 /* CC::mi */, $cpsr + ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg + ; CHECK: bb.1.for.cond1.preheader.preheader: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[t2LDRi12_]], 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: [[tMOVr:%[0-9]+]]:gprlr = tMOVr killed [[t2ADDri]], 14 /* CC::al */, $noreg + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY [[tMOVr]] + ; CHECK: [[t2MOVi32imm1:%[0-9]+]]:rgpr = t2MOVi32imm @c + ; CHECK: [[t2MOVi:%[0-9]+]]:rgpr = t2MOVi 24, 14 /* CC::al */, $noreg, $noreg + ; CHECK: bb.2.for.cond1.preheader: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: [[PHI:%[0-9]+]]:gprlr = PHI [[COPY]], %bb.1, %3, %bb.2 + ; CHECK: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp + ; CHECK: $r0 = COPY [[t2MOVi32imm1]] + ; CHECK: $r1 = COPY [[t2MOVi]] + ; CHECK: tBL 14 /* CC::al */, $noreg, &__aeabi_memclr4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp + ; CHECK: ADJCALLSTACKUP 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp + ; CHECK: [[t2SUBri:%[0-9]+]]:gprlr = t2SUBri [[PHI]], 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY [[t2SUBri]] + ; CHECK: t2CMPri [[t2SUBri]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2Bcc %bb.2, 1 /* CC::ne */, $cpsr + ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg + ; CHECK: bb.3.for.cond.for.end9_crit_edge: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[t2MOVi1:%[0-9]+]]:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2STRi12 killed [[t2MOVi1]], [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (store 4 into @d) + ; CHECK: bb.4.for.end9: + ; CHECK: [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF + ; CHECK: $r0 = COPY [[DEF]] + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.4(0x30000000) + + %4:rgpr = t2MOVi32imm @d + %0:gprnopc = t2LDRi12 %4, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + t2CMPri %0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2Bcc %bb.4, 4 /* CC::mi */, $cpsr + t2B %bb.1, 14 /* CC::al */, $noreg + + bb.1.for.cond1.preheader.preheader: + successors: %bb.2(0x80000000) + + %5:rgpr = t2ADDri %0, 1, 14 /* CC::al */, $noreg, $noreg + %6:gprlr = t2DoLoopStart killed %5 + %1:gpr = COPY %6 + %7:rgpr = t2MOVi32imm @c + %8:rgpr = t2MOVi 24, 14 /* CC::al */, $noreg, $noreg + + bb.2.for.cond1.preheader: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + + %2:gprlr = PHI %1, %bb.1, %3, %bb.2 + ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp + $r0 = COPY %7 + $r1 = COPY %8 + tBL 14 /* CC::al */, $noreg, &__aeabi_memclr4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp + ADJCALLSTACKUP 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp + %9:gprlr = t2LoopDec %2, 1 + %3:gpr = COPY %9 + t2LoopEnd %9, %bb.2, implicit-def dead $cpsr + t2B %bb.3, 14 /* CC::al */, $noreg + + bb.3.for.cond.for.end9_crit_edge: + successors: %bb.4(0x80000000) + + %12:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg + t2STRi12 killed %12, %4, 0, 14 /* CC::al */, $noreg :: (store 4 into @d) + + bb.4.for.end9: + %13:gpr = IMPLICIT_DEF + $r0 = COPY %13 + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + +... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir @@ -113,7 +113,7 @@ ; CHECK: renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2 /* CC::hs */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep4) ; CHECK: renamable $lr = tMOVr killed $lr, 14 /* CC::al */, $noreg - ; CHECK: t2CMPri $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK: t2CMPri renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr ; CHECK: tB %bb.2, 14 /* CC::al */, $noreg ; CHECK: bb.2.while.end: