diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CodeGenCoverage.h" #include "llvm/Support/LowLevelTypeImpl.h" @@ -355,7 +356,18 @@ /// if returns true: /// for I in all mutated/inserted instructions: /// !isPreISelGenericOpcode(I.getOpcode()) - virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const = 0; + virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) = 0; + + /// Initialize selector state. + void init() { + InstsMarkedForDeletion.clear(); + } + + /// Returns true if the given generic instruction has been marked for deletion + /// and therefore shouldn't be selected. + bool isInstMarkedForDeletion(MachineInstr *MI) const { + return InstsMarkedForDeletion.count(MI); + } protected: using ComplexRendererFns = @@ -371,6 +383,14 @@ MatcherState(unsigned MaxRenderers); }; + // Set of generic instructions which, during selection, have been marked for + // deletion. They're not immediately deleted during selection because the + // InstructionSelect pass has iterators which can't handle being invalidated. + SmallPtrSet InstsMarkedForDeletion; + + void markInstForDeletion(MachineInstr *MI) { + InstsMarkedForDeletion.insert(MI); + } public: template diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h --- a/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/llvm/include/llvm/CodeGen/MachineOperand.h @@ -684,6 +684,11 @@ Contents.RegMask = RegMaskPtr; } + void setPredicate(unsigned Predicate) { + assert(isPredicate() && "Wrong MachineOperand mutator"); + Contents.Pred = Predicate; + } + //===--------------------------------------------------------------------===// // Other methods. //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -106,7 +106,7 @@ // us do things like a dedicated avx512 selector). However, we might want // to also specialize selectors by MachineFunction, which would let us be // aware of optsize/optnone and such. - virtual const InstructionSelector *getInstructionSelector() const { + virtual InstructionSelector *getInstructionSelector() const { return nullptr; } diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp --- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -66,7 +66,8 @@ LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); const TargetPassConfig &TPC = getAnalysis(); - const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + ISel->init(); CodeGenCoverage CoverageInfo; assert(ISel && "Cannot work without InstructionSelector"); @@ -124,6 +125,12 @@ continue; } + if (ISel->isInstMarkedForDeletion(&MI)) { + LLVM_DEBUG(dbgs() << "marked for deletion. Erasing...\n"); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + continue; + } + if (!ISel->select(MI, CoverageInfo)) { // FIXME: It would be nice to dump all inserted instructions. It's // not obvious how, esp. considering select() can insert after MI. diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -51,7 +51,7 @@ const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -64,17 +64,19 @@ void preISelLower(MachineInstr &I) const; // An early selection function that runs before the selectImpl() call. - bool earlySelect(MachineInstr &I) const; + bool earlySelect(MachineInstr &I); bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool earlySelectBr(MachineInstr &I, MachineRegisterInfo &MRI); bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, - MachineRegisterInfo &MRI) const; + bool matchCompareBranch(MachineInstr &I, MachineRegisterInfo &MRI, + unsigned &CBOpc, Register &Val) const; + bool selectCompareBranch(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; @@ -126,8 +128,10 @@ Register Op2, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, - MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitIntegerCompareOpt(MachineOperand &LHS, MachineOperand &RHS, + MachineOperand &Predicate, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitExtractVectorElt(Optional DstReg, @@ -882,11 +886,11 @@ } } -bool AArch64InstructionSelector::selectCompareBranch( - MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { - +bool AArch64InstructionSelector::matchCompareBranch(MachineInstr &I, + MachineRegisterInfo &MRI, + unsigned &CBOpc, + Register &Val) const { const Register CondReg = I.getOperand(0).getReg(); - MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); MachineInstr *CCMI = MRI.getVRegDef(CondReg); if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg()); @@ -911,16 +915,27 @@ return false; const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits(); - unsigned CBOpc = 0; + CBOpc = 0; if (CmpWidth <= 32) CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW); else if (CmpWidth == 64) CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX); else return false; + Val = LHS; + return true; +} + +bool AArch64InstructionSelector::selectCompareBranch( + MachineInstr &I, MachineRegisterInfo &MRI) const { + MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); + unsigned CBOpc; + Register Val; + if (!matchCompareBranch(I, MRI, CBOpc, Val)) + return false; BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) - .addUse(LHS) + .addUse(Val) .addMBB(DestMBB) .constrainAllUses(TII, TRI, RBI); @@ -1143,7 +1158,98 @@ return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); } -bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { +bool AArch64InstructionSelector::earlySelectBr(MachineInstr &I, + MachineRegisterInfo &MRI) { + assert(I.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); + + // The importer will select a standard unconditional branch, but we first + // try to match the following: + // bb1: + // %c(s32) = G_ICMP pred, %a, %b + // %c1(s1) = G_TRUNC %c(s32) + // G_BRCOND %c1, %bb2 + // G_BR %bb3 + // bb2: + // ... + // bb3: + + // The above pattern does not have a fall through to the successor bb2, always + // resulting in a branch no matter which path is taken. Here we try to find + // and replace that pattern with conditional branch to bb3 and otherwise + // fallthrough to bb2. + + MachineBasicBlock *MBB = I.getParent(); + MachineBasicBlock *BrTarget = I.getOperand(0).getMBB(); + MachineBasicBlock::iterator BrIt(I); + if (BrIt == MBB->begin()) + return false; + assert(std::next(BrIt) == MBB->end() && + "expected G_BR to be a terminator"); + + MachineInstr *BrCond = &*std::prev(BrIt); + if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) + return false; + + // Check that the next block is the conditional branch target. + if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) + return false; + + // Look through the trunc to get the icmp. + MachineInstr *TruncMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); + if (!TruncMI || TruncMI->getOpcode() != TargetOpcode::G_TRUNC) + return false; + MachineInstr *CmpMI = MRI.getVRegDef(TruncMI->getOperand(1).getReg()); + if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP) + return false; + + CmpInst::Predicate InversePred = CmpInst::getInversePredicate( + (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); + const AArch64CC::CondCode InverseCC = + changeICMPPredToAArch64CC((CmpInst::Predicate)InversePred); + + MachineIRBuilder MIB(I); + + auto *MF = MBB->getParent(); + // Before we generate our replacement code, first check to see if the brcond + // will be selected to a cbz/cbnz later. If so, modify icmp predicate and + // swap the branch targets, leaving the brcond to be selected later. + if (!MF->getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) { + unsigned CBOpc; + Register Val; + if (matchCompareBranch(*BrCond, MRI, CBOpc, Val)) { + // Need to invert our G_ICMP predicate since we're going to swap the + // branch targets around. First we have to check that there are no other + // users of the icmp/trunc except for the condbr. + if (!MRI.hasOneUse(TruncMI->getOperand(0).getReg()) || + !MRI.hasOneUse(CmpMI->getOperand(0).getReg())) + return false; + CmpMI->getOperand(1).setPredicate(InversePred); + BrCond->getOperand(1).setMBB(BrTarget); + I.eraseFromParent(); + return true; + } + } + + // Generate a normal compare. + MachineInstr *Cmp = + emitIntegerCompare(CmpMI->getOperand(2), CmpMI->getOperand(3), MIB); + if (!Cmp) + return false; + + // Create the conditional branch. + MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(InverseCC).addMBB(BrTarget); + + // We can't rely on the simple DCE to erase the brcond so we need to tell the + // select pass to delete it. We don't do it here to avoid invalidating + // iterators already held by callers. + markInstForDeletion(BrCond); + + // Now delete old br, this block will now fall through to the successor. + I.eraseFromParent(); + return true; +} + +bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1154,13 +1260,15 @@ switch (I.getOpcode()) { case TargetOpcode::G_SHL: return earlySelectSHL(I, MRI); + case TargetOpcode::G_BR: + return earlySelectBr(I, MRI); default: return false; } } bool AArch64InstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { + CodeGenCoverage &CoverageInfo) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1256,7 +1364,7 @@ // instructions that do not set flags. bool ProduceNonFlagSettingCondBr = !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); - if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI)) + if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MRI)) return true; if (ProduceNonFlagSettingCondBr) { @@ -1980,8 +2088,8 @@ } MachineIRBuilder MIRBuilder(I); - if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), - MIRBuilder)) + if (!emitIntegerCompareOpt(I.getOperand(2), I.getOperand(3), + I.getOperand(1), MIRBuilder)) return false; emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(), MIRBuilder); @@ -2920,17 +3028,10 @@ } MachineInstr *AArch64InstructionSelector::emitIntegerCompare( - MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, + MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); - - // Fold the compare into a CMN if possible. - MachineInstr *Cmn = tryOptCMN(LHS, RHS, Predicate, MIRBuilder); - if (Cmn) - return Cmn; - - // Can't fold into a CMN. Just emit a normal compare. unsigned CmpOpc = 0; Register ZReg; @@ -2966,6 +3067,17 @@ return &*CmpMI; } +MachineInstr *AArch64InstructionSelector::emitIntegerCompareOpt( + MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, + MachineIRBuilder &MIRBuilder) const { + // Fold the compare into a CMN if possible. + MachineInstr *Cmn = tryOptCMN(LHS, RHS, Predicate, MIRBuilder); + if (Cmn) + return Cmn; + // Can't fold into a CMN. Just emit a normal compare. + return emitIntegerCompare(LHS, RHS, MIRBuilder); +} + MachineInstr *AArch64InstructionSelector::emitVectorConcat( Optional Dst, Register Op1, Register Op2, MachineIRBuilder &MIRBuilder) const { @@ -3128,8 +3240,8 @@ if (CondOpc == TargetOpcode::G_ICMP) { CondCode = changeICMPPredToAArch64CC( (CmpInst::Predicate)CondDef->getOperand(1).getPredicate()); - if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), - CondDef->getOperand(1), MIB)) { + if (!emitIntegerCompareOpt(CondDef->getOperand(2), CondDef->getOperand(3), + CondDef->getOperand(1), MIB)) { LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); return false; } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -247,7 +247,7 @@ return &getInstrInfo()->getRegisterInfo(); } const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -187,7 +187,7 @@ return CallLoweringInfo.get(); } -const InstructionSelector *AArch64Subtarget::getInstructionSelector() const { +InstructionSelector *AArch64Subtarget::getInstructionSelector() const { return InstSelector.get(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -47,7 +47,7 @@ const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) override; static const char *getName(); private: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1119,7 +1119,7 @@ } bool AMDGPUInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { + CodeGenCoverage &CoverageInfo) { if (I.isPHI()) return selectPHI(I); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -411,7 +411,7 @@ return CallLoweringInfo.get(); } - const InstructionSelector *getInstructionSelector() const override { + InstructionSelector *getInstructionSelector() const override { return InstSelector.get(); } diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp --- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -34,7 +34,7 @@ ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -834,7 +834,7 @@ } bool ARMInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { + CodeGenCoverage &CoverageInfo) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -535,7 +535,7 @@ } const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -125,7 +125,7 @@ return CallLoweringInfo.get(); } -const InstructionSelector *ARMSubtarget::getInstructionSelector() const { +InstructionSelector *ARMSubtarget::getInstructionSelector() const { return InstSelector.get(); } diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp --- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp +++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp @@ -33,7 +33,7 @@ MipsInstructionSelector(const MipsTargetMachine &TM, const MipsSubtarget &STI, const MipsRegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -166,7 +166,7 @@ } bool MipsInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { + CodeGenCoverage &CoverageInfo) { MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); diff --git a/llvm/lib/Target/Mips/MipsSubtarget.h b/llvm/lib/Target/Mips/MipsSubtarget.h --- a/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/llvm/lib/Target/Mips/MipsSubtarget.h @@ -391,7 +391,7 @@ const CallLowering *getCallLowering() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp --- a/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -281,6 +281,6 @@ return RegBankInfo.get(); } -const InstructionSelector *MipsSubtarget::getInstructionSelector() const { +InstructionSelector *MipsSubtarget::getInstructionSelector() const { return InstSelector.get(); } diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -60,7 +60,7 @@ X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -94,11 +94,9 @@ MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const; + MachineFunction &MF, CodeGenCoverage &CoverageInfo); bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const; + MachineFunction &MF, CodeGenCoverage &CoverageInfo); bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, @@ -309,7 +307,7 @@ } bool X86InstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { + CodeGenCoverage &CoverageInfo) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1336,7 +1334,7 @@ bool X86InstructionSelector::selectUnmergeValues( MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const { + CodeGenCoverage &CoverageInfo) { assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && "unexpected instruction"); @@ -1362,7 +1360,7 @@ bool X86InstructionSelector::selectMergeValues( MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const { + CodeGenCoverage &CoverageInfo) { assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES || I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) && "unexpected instruction"); diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -527,7 +527,7 @@ /// Methods used by Global ISel const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -355,7 +355,7 @@ return CallLoweringInfo.get(); } -const InstructionSelector *X86Subtarget::getInstructionSelector() const { +InstructionSelector *X86Subtarget::getInstructionSelector() const { return InstSelector.get(); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-br-brcond.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-br-brcond.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-br-brcond.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# This test checks that we correctly optimize a brcond->br sequence to fallthrough when available +# and eliminate unnecessary csets/unconditional branches. +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios5.0.0" + + define i32 @foo(i32 %a, i32 %b) { + entry: + %cmp = icmp sgt i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + + if.then: ; preds = %entry + %add = add nsw i32 %b, %a + %call = tail call i32 @bar(i32 %add) + %add1 = add nsw i32 %call, %b + br label %return + + if.end: ; preds = %entry + %mul = mul nsw i32 %b, %b + %add2 = add nuw nsw i32 %mul, 2 + br label %return + + return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %add1, %if.then ], [ %add2, %if.end ] + ret i32 %retval.0 + } + + declare i32 @bar(i32) + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + +... +--- +name: foo +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + hasCalls: true +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: $wzr = SUBSWrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK: Bcc 13, %bb.2, implicit $nzcv + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %5:gpr32 = nsw ADDWrr [[COPY1]], [[COPY]] + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $w0 = COPY %5 + ; CHECK: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: %7:gpr32 = nsw ADDWrr [[COPY2]], [[COPY1]] + ; CHECK: B %bb.3 + ; CHECK: bb.2.if.end: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[MADDWrrr:%[0-9]+]]:gpr32common = MADDWrrr [[COPY1]], [[COPY1]], $wzr + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[MADDWrrr]], 2, 0 + ; CHECK: bb.3.return: + ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI %7, %bb.1, [[ADDWri]], %bb.2 + ; CHECK: $w0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $w0 + bb.1.entry: + liveins: $w0, $w1 + + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %4:gpr(s32) = G_CONSTANT i32 2 + %10:gpr(s32) = G_ICMP intpred(sgt), %0(s32), %1 + %2:gpr(s1) = G_TRUNC %10(s32) + G_BRCOND %2(s1), %bb.2 + G_BR %bb.3 + + bb.2.if.then: + %6:gpr(s32) = nsw G_ADD %1, %0 + ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + $w0 = COPY %6(s32) + BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0 + %7:gpr(s32) = COPY $w0 + ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + %8:gpr(s32) = nsw G_ADD %7, %1 + G_BR %bb.4 + + bb.3.if.end: + %3:gpr(s32) = nsw G_MUL %1, %1 + %5:gpr(s32) = nuw nsw G_ADD %3, %4 + + bb.4.return: + %9:gpr(s32) = G_PHI %8(s32), %bb.2, %5(s32), %bb.3 + $w0 = COPY %9(s32) + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir @@ -1,24 +1,29 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | - define void @cbz_s32() { ret void } - define void @cbz_s64() { ret void } define void @cbnz_s32() { ret void } define void @cbnz_s64() { ret void } + define void @cbz_s32() { ret void } + define void @cbz_s64() { ret void } ... +# These tests check for CBZ/CBNZ codegen, but they look inverted w.r.t the +# constant in the icmp. Another optimization inverts the predicate so that the +# block can fallthrough without using an unconditional br. + --- -# CHECK-LABEL: name: cbz_s32 -name: cbz_s32 +name: cbnz_s32 legalized: true regBankSelected: true -# CHECK: body: -# CHECK: bb.0: -# CHECK: %0:gpr32 = COPY $w0 -# CHECK: CBZW %0, %bb.1 -# CHECK: B %bb.0 body: | + ; CHECK-LABEL: name: cbnz_s32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: CBNZW [[COPY]], %bb.0 + ; CHECK: bb.1: bb.0: liveins: $w0 successors: %bb.0, %bb.1 @@ -34,17 +39,17 @@ ... --- -# CHECK-LABEL: name: cbz_s64 -name: cbz_s64 +name: cbnz_s64 legalized: true regBankSelected: true -# CHECK: body: -# CHECK: bb.0: -# CHECK: %0:gpr64 = COPY $x0 -# CHECK: CBZX %0, %bb.1 -# CHECK: B %bb.0 body: | + ; CHECK-LABEL: name: cbnz_s64 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: CBNZX [[COPY]], %bb.0 + ; CHECK: bb.1: bb.0: liveins: $x0 successors: %bb.0, %bb.1 @@ -60,17 +65,17 @@ ... --- -# CHECK-LABEL: name: cbnz_s32 -name: cbnz_s32 +name: cbz_s32 legalized: true regBankSelected: true -# CHECK: body: -# CHECK: bb.0: -# CHECK: %0:gpr32 = COPY $w0 -# CHECK: CBNZW %0, %bb.1 -# CHECK: B %bb.0 body: | + ; CHECK-LABEL: name: cbz_s32 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK: CBZW [[COPY]], %bb.0 + ; CHECK: bb.1: bb.0: liveins: $w0 successors: %bb.0, %bb.1 @@ -86,17 +91,17 @@ ... --- -# CHECK-LABEL: name: cbnz_s64 -name: cbnz_s64 +name: cbz_s64 legalized: true regBankSelected: true -# CHECK: body: -# CHECK: bb.0: -# CHECK: %0:gpr64 = COPY $x0 -# CHECK: CBNZX %0, %bb.1 -# CHECK: B %bb.0 body: | + ; CHECK-LABEL: name: cbz_s64 + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: CBZX [[COPY]], %bb.0 + ; CHECK: bb.1: bb.0: liveins: $x0 successors: %bb.0, %bb.1