Index: include/llvm/CodeGen/GlobalISel/Combiner.h =================================================================== --- include/llvm/CodeGen/GlobalISel/Combiner.h +++ include/llvm/CodeGen/GlobalISel/Combiner.h @@ -24,14 +24,42 @@ class TargetPassConfig; class MachineFunction; -class CombinerChangeObserver { +class CombinerChangeObserver : public MachineFunction::Delegate { + SmallPtrSet ChangingAllUsesOfReg; + public: + CombinerChangeObserver() = default; virtual ~CombinerChangeObserver() {} - /// An instruction was erased. - virtual void erasedInstr(MachineInstr &MI) = 0; - /// An instruction was created and inseerted into the function. - virtual void createdInstr(MachineInstr &MI) = 0; + /// An instruction is about to be erased. + virtual void erasingInstr(const MachineInstr &MI) = 0; + + /// An instruction has been created and inserted into the function. + /// Note that the instruction might not be a fully fledged instruction at this + /// point and won't be if the MachineFunction::Delegate is calling it. This is + /// because the delegate only sees the construction of the MachineInstr before + /// operands have been added. + virtual void createdInstr(const MachineInstr &MI) = 0; + + /// An existing instruction is about to be changed. + virtual void changingInstr(const MachineInstr &MI) = 0; + + /// An existing instruction has been changed. + virtual void changedInstr(const MachineInstr &MI) = 0; + + /// All the instructions using the given register are being changed. + /// For convenience, finishedChangingAllUsesOfReg() will report the completion + /// of the changes. + void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg); + /// All instructions reported as changing have finished being changed. + virtual void finishedChangingAllUsesOfReg(); + + void MF_HandleInsertion(const MachineInstr &MI) override { + createdInstr(MI); + } + void MF_HandleRemoval(const MachineInstr &MI) override { + erasingInstr(MI); + } }; class Combiner { Index: include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -30,12 +30,16 @@ MachineRegisterInfo &MRI; CombinerChangeObserver &Observer; - void eraseInstr(MachineInstr &MI); - void scheduleForVisit(MachineInstr &MI); - public: CombinerHelper(CombinerChangeObserver &Observer, MachineIRBuilder &B); + /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes + void replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, unsigned ToReg) const; + + /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes + void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromReg, + unsigned ToReg) const; + /// If \p MI is COPY, try to combine it. /// Returns true if MI changed. bool tryCombineCopy(MachineInstr &MI); Index: include/llvm/CodeGen/GlobalISel/CombinerInfo.h =================================================================== --- include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -43,6 +43,17 @@ /// illegal ops that are created. bool LegalizeIllegalOps; // TODO: Make use of this. const LegalizerInfo *LInfo; + + /// Attempt to combine instructions using MI as the root. + /// + /// Use Observer to report the creation, modification, and erasure of + /// instructions. CombinerChangeObserver will automatically report certain + /// kinds of operations. These operations are: + /// * Instructions that are newly inserted into the MachineFunction + /// * Instructions that are erased from the MachineFunction. + /// + /// However, it is important to report instruction modification and this is + /// not automatic. virtual bool combine(CombinerChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const = 0; }; Index: include/llvm/CodeGen/GlobalISel/GISelWorkList.h =================================================================== --- include/llvm/CodeGen/GlobalISel/GISelWorkList.h +++ include/llvm/CodeGen/GlobalISel/GISelWorkList.h @@ -24,26 +24,39 @@ // FIXME: Does it make sense to factor out common code with the instcombinerWorkList? template class GISelWorkList { - SmallVector Worklist; - DenseMap WorklistMap; + MachineFunction *MF; + SmallVector Worklist; + DenseMap WorklistMap; public: - GISelWorkList() = default; + GISelWorkList(MachineFunction *MF) : MF(MF) {} bool empty() const { return WorklistMap.empty(); } unsigned size() const { return WorklistMap.size(); } - /// Add - Add the specified instruction to the worklist if it isn't already - /// in it. + /// Add the specified instruction to the worklist if it isn't already in it. void insert(MachineInstr *I) { if (WorklistMap.try_emplace(I, Worklist.size()).second) { Worklist.push_back(I); } } - /// Remove - remove I from the worklist if it exists. - void remove(MachineInstr *I) { + void insert(const MachineInstr *I) { + // Confirm we'd be able to find the non-const pointer we want to schedule if + // we wanted to. We have the right to schedule work that may modify any + // instruction in MF. + assert(I->getParent() && "Expected parent BB"); + assert(I->getParent()->getParent() && "Expected parent function"); + assert((!MF || I->getParent()->getParent() == MF) && + "Expected parent function to be current function or not given"); + // But don't actually do the search since we can derive it from the const + // pointer. + insert(const_cast(I)); + } + + /// Remove I from the worklist if it exists. + void remove(const MachineInstr *I) { auto It = WorklistMap.find(I); if (It == WorklistMap.end()) return; // Not in worklist. Index: lib/CodeGen/GlobalISel/Combiner.cpp =================================================================== --- lib/CodeGen/GlobalISel/Combiner.cpp +++ lib/CodeGen/GlobalISel/Combiner.cpp @@ -36,23 +36,63 @@ /// their CombinerChangeObserver subclass. class WorkListMaintainer : public CombinerChangeObserver { using WorkListTy = GISelWorkList<512>; + MachineFunction &MF; WorkListTy &WorkList; + /// The instructions that have been created but we want to report once they + /// have their operands. This is only maintained if debug output is requested. + SmallPtrSet CreatedInstrs; public: - WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {} - virtual ~WorkListMaintainer() {} + WorkListMaintainer(MachineFunction &MF, WorkListTy &WorkList) + : CombinerChangeObserver(), MF(MF), WorkList(WorkList) { + MF.setDelegate(this); + } + virtual ~WorkListMaintainer() { + MF.resetDelegate(this); + } - void erasedInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Erased: "; MI.print(dbgs()); dbgs() << "\n"); + void erasingInstr(const MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << "Erased: "; MI.print(dbgs())); WorkList.remove(&MI); } - void createdInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Created: "; MI.print(dbgs()); dbgs() << "\n"); + void createdInstr(const MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << "Creating: "; MI.print(dbgs())); + WorkList.insert(&MI); + LLVM_DEBUG(CreatedInstrs.insert(&MI)); + } + void changingInstr(const MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << "Changing: "; MI.print(dbgs())); WorkList.insert(&MI); } + void changedInstr(const MachineInstr &MI) override { + LLVM_DEBUG(dbgs() << "Changed: "; MI.print(dbgs())); + WorkList.insert(&MI); + } + + void reportFullyCreatedInstrs() { + LLVM_DEBUG(for (const auto *MI + : CreatedInstrs) { + dbgs() << "Created: "; + MI->print(dbgs()); + }); + LLVM_DEBUG(CreatedInstrs.clear()); + } }; } +void CombinerChangeObserver::changingAllUsesOfReg( + const MachineRegisterInfo &MRI, unsigned Reg) { + for (auto &ChangingMI : MRI.use_instructions(Reg)) { + changingInstr(ChangingMI); + ChangingAllUsesOfReg.insert(&ChangingMI); + } +} + +void CombinerChangeObserver::finishedChangingAllUsesOfReg() { + for (auto *ChangedMI : ChangingAllUsesOfReg) + changedInstr(*ChangedMI); +} + Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC) : CInfo(Info), TPC(TPC) { (void)this->TPC; // FIXME: Remove when used. @@ -80,8 +120,8 @@ // insert with list bottom up, so while we pop_back_val, we'll traverse top // down RPOT. Changed = false; - GISelWorkList<512> WorkList; - WorkListMaintainer Observer(WorkList); + GISelWorkList<512> WorkList(&MF); + WorkListMaintainer Observer(MF, WorkList); for (MachineBasicBlock *MBB : post_order(&MF)) { if (MBB->empty()) continue; @@ -100,8 +140,9 @@ // Main Loop. Process the instructions here. while (!WorkList.empty()) { MachineInstr *CurrInst = WorkList.pop_back_val(); - LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";); + LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;); Changed |= CInfo.combine(Observer, *CurrInst, Builder); + Observer.reportFullyCreatedInstrs(); } MFChanged |= Changed; } while (Changed); Index: lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#define DEBUG_TYPE "gi-combine" +#define DEBUG_TYPE "gi-combiner" using namespace llvm; @@ -22,11 +22,27 @@ MachineIRBuilder &B) : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {} -void CombinerHelper::eraseInstr(MachineInstr &MI) { - Observer.erasedInstr(MI); +void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, + unsigned ToReg) const { + Observer.changingAllUsesOfReg(MRI, FromReg); + + if (MRI.constrainRegAttrs(ToReg, FromReg)) + MRI.replaceRegWith(FromReg, ToReg); + else + Builder.buildCopy(ToReg, FromReg); + + Observer.finishedChangingAllUsesOfReg(); } -void CombinerHelper::scheduleForVisit(MachineInstr &MI) { - Observer.createdInstr(MI); + +void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI, + MachineOperand &FromReg, + unsigned ToReg) const { + assert(FromReg.getParent() && "Expected an operand in an MI"); + Observer.changingInstr(*FromReg.getParent()); + + FromReg.setReg(ToReg); + + Observer.changedInstr(*FromReg.getParent()); } bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { @@ -40,7 +56,7 @@ // a(sx) = COPY b(sx) -> Replace all uses of a with b. if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) { MI.eraseFromParent(); - MRI.replaceRegWith(DstReg, SrcReg); + replaceRegWith(MRI, DstReg, SrcReg); return true; } return false; @@ -193,8 +209,11 @@ // type since by definition the result of an extend is larger. assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); + LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI); + // Rewrite the load to the chosen extending load. unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg(); + Observer.changingInstr(MI); MI.setDesc( Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT ? TargetOpcode::G_SEXTLOAD @@ -213,7 +232,7 @@ if (UseMI->getOpcode() == Preferred.ExtendOpcode || UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { unsigned UseDstReg = UseMI->getOperand(0).getReg(); - unsigned UseSrcReg = UseMI->getOperand(1).getReg(); + MachineOperand &UseSrcMO = UseMI->getOperand(1); const LLT &UseDstTy = MRI.getType(UseDstReg); if (UseDstReg != ChosenDstReg) { if (Preferred.Ty == UseDstTy) { @@ -226,7 +245,7 @@ // rewrites to: // %2:_(s32) = G_SEXTLOAD ... // ... = ... %2(s32) - MRI.replaceRegWith(UseDstReg, ChosenDstReg); + replaceRegWith(MRI, UseDstReg, ChosenDstReg); ScheduleForErase.push_back(UseMO.getParent()); } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { // If the preferred size is smaller, then keep the extend but extend @@ -239,7 +258,7 @@ // %2:_(s32) = G_SEXTLOAD ... // %3:_(s64) = G_ANYEXT %2:_(s32) // ... = ... %3(s64) - MRI.replaceRegWith(UseSrcReg, ChosenDstReg); + replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg); } else { // If the preferred size is large, then insert a truncate. For // example: @@ -286,7 +305,9 @@ MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB); if (PreviouslyEmitted) { + Observer.changingInstr(*UseMO->getParent()); UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg()); + Observer.changedInstr(*UseMO->getParent()); continue; } @@ -294,14 +315,14 @@ unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg()); MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg); EmittedInsns[InsertIntoBB] = NewMI; + Observer.changingInstr(*UseMO->getParent()); UseMO->setReg(NewDstReg); - Observer.createdInstr(*NewMI); + Observer.changedInstr(*UseMO->getParent()); } - for (auto &EraseMI : ScheduleForErase) { + for (auto &EraseMI : ScheduleForErase) EraseMI->eraseFromParent(); - Observer.erasedInstr(*EraseMI); - } MI.getOperand(0).setReg(ChosenDstReg); + Observer.changedInstr(MI); return true; } Index: lib/CodeGen/GlobalISel/Legalizer.cpp =================================================================== --- lib/CodeGen/GlobalISel/Legalizer.cpp +++ lib/CodeGen/GlobalISel/Legalizer.cpp @@ -83,8 +83,8 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); // Populate Insts - GISelWorkList<256> InstList; - GISelWorkList<128> ArtifactList; + GISelWorkList<256> InstList(&MF); + GISelWorkList<128> ArtifactList(&MF); ReversePostOrderTraversal RPOT(&MF); // Perform legalization bottom up so we can DCE as we legalize. // Traverse BB in RPOT and within each basic block, add insts top down, Index: test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir +++ test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir @@ -1,4 +1,7 @@ # RUN: llc -O0 -run-pass=aarch64-prelegalizer-combiner -global-isel %s -o - | FileCheck %s +# RUN: llc -O0 -run-pass=aarch64-prelegalizer-combiner -global-isel %s -o - \ +# RUN: -debug-only=aarch64-prelegalizer-combiner,gi-combiner 2>&1 >/dev/null \ +# RUN: | FileCheck %s --check-prefix=CHECK-WORKLIST --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -102,6 +105,23 @@ ; CHECK: $w1 = COPY [[T6]](s32) $w0 = COPY %3 $w1 = COPY %9 + +# Check that we report the correct modifications to the observer. This acts as +# a test of the debug output and a test. +# +# CHECK-WORKLIST-LABEL: Generic MI Combiner for: multiple_copies +# CHECK-WORKLIST: Try combining [[IN0:%[0-9]+]]:_(s8) = G_LOAD [[IN1:%[0-9]+]]:_(p0) :: (load 1 from %ir.addr) +# CHECK-WORKLIST: Preferred use is: [[IN2:%[0-9]+]]:_(s32) = G_SEXT [[IN0]]:_(s8) +# CHECK-WORKLIST-DAG: Changing: [[IN0]]:_(s8) = G_LOAD [[IN1]]:_(p0) :: (load 1 from %ir.addr) +# CHECK-WORKLIST-DAG: Changing: [[IN3:%[0-9]+]]:_(s8) = G_ADD [[IN0]]:_, [[IN4:%[0-9]+]]:_ +# CHECK-WORKLIST-DAG: Changed: [[IN3]]:_(s8) = G_ADD [[NEW1:%[0-9]+]]:_, [[IN4]]:_ +# CHECK-WORKLIST-DAG: Changing: [[IN5:%[0-9]+]]:_(s8) = G_SUB [[IN0]]:_, [[IN6:%[0-9]+]]:_ +# CHECK-WORKLIST-DAG: Changed: [[IN5]]:_(s8) = G_SUB [[NEW2:%[0-9]+]]:_, [[IN6]]:_ +# CHECK-WORKLIST-DAG: Erased: [[IN2]]:_(s32) = G_SEXT [[IN0]]:_(s8) +# CHECK-WORKLIST-DAG: Changed: [[IN2]]:_(s32) = G_SEXTLOAD [[IN1]]:_(p0) :: (load 1 from %ir.addr) +# CHECK-WORKLIST-DAG: Created: [[NEW1]]:_(s8) = G_TRUNC [[IN2]]:_(s32) +# CHECK-WORKLIST-DAG: Created: [[NEW2]]:_(s8) = G_TRUNC [[IN2]]:_(s32) +# CHECK-WORKLIST: Try combining ... --- @@ -157,37 +177,52 @@ %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) - %3:_(s32) = G_SEXT %2 - %4:_(s32) = G_CONSTANT i32 1 - %5:_(s1) = G_ICMP intpred(ne), %1:_(s32), %4:_ - G_BRCOND %5:_(s1), %bb.1 + %3:_(s32) = G_CONSTANT i32 1 + %4:_(s1) = G_ICMP intpred(ne), %1:_(s32), %3:_ + G_BRCOND %4:_(s1), %bb.1 G_BR %bb.2.else bb.1.if: ; CHECK: bb.1.if: successors: %bb.3(0x80000000) - %10:_(s8) = G_CONSTANT i8 1 + %5:_(s8) = G_CONSTANT i8 1 ; CHECK: [[T1:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32) - %6:_(s8) = G_ADD %2, %10 + %6:_(s8) = G_ADD %2, %5 ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_ADD [[T1]], {{.*}} G_BR %bb.3.exit bb.2.else: ; CHECK: bb.2.else: successors: %bb.3(0x80000000) - %11:_(s8) = G_CONSTANT i8 1 + %7:_(s8) = G_CONSTANT i8 1 ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32) - %7:_(s8) = G_SUB %2, %11 + %8:_(s8) = G_SUB %2, %7 ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}} G_BR %bb.3.exit bb.3.exit: ; CHECK: bb.3.exit: - %8:_(s8) = G_PHI %6:_(s8), %bb.1, %7:_(s8), %bb.2 + %9:_(s8) = G_PHI %6:_(s8), %bb.1, %8:_(s8), %bb.2 ; CHECK: [[T5:%[0-9]+]]:_(s8) = G_PHI [[T2]](s8), %bb.1, [[T4]](s8) - %9:_(s32) = G_ZEXT %8 + %10:_(s32) = G_SEXT %2 + %11:_(s32) = G_ZEXT %9 ; CHECK: [[T6:%[0-9]+]]:_(s32) = G_ZEXT [[T5]](s8) ; CHECK: $w0 = COPY [[T0]](s32) ; CHECK: $w1 = COPY [[T6]](s32) - $w0 = COPY %3 - $w1 = COPY %9 + $w0 = COPY %10 + $w1 = COPY %11 +# CHECK-WORKLIST-LABEL: Generic MI Combiner for: sink_to_phi_nondominating +# CHECK-WORKLIST: Try combining [[IN0:%[0-9]+]]:_(s8) = G_LOAD [[IN1:%[0-9]+]]:_(p0) :: (load 1 from %ir.addr) +# CHECK-WORKLIST: Preferred use is: [[IN2:%[0-9]+]]:_(s32) = G_SEXT [[IN0]]:_(s8) +# CHECK-WORKLIST-DAG: Changing: [[IN0]]:_(s8) = G_LOAD [[IN1]]:_(p0) :: (load 1 from %ir.addr) +# CHECK-WORKLIST-DAG: Creating: G_TRUNC +# CHECK-WORKLIST-DAG: Changing: [[IN3:%[0-9]+]]:_(s8) = G_ADD [[IN0]]:_, [[IN4:%[0-9]+]]:_ +# CHECK-WORKLIST-DAG: Changed: [[IN3]]:_(s8) = G_ADD [[OUT1:%[0-9]+]]:_, [[IN4]]:_ +# CHECK-WORKLIST-DAG: Creating: G_TRUNC +# CHECK-WORKLIST-DAG: Changing: [[IN5:%[0-9]+]]:_(s8) = G_SUB [[IN0]]:_, [[IN6:%[0-9]+]]:_ +# CHECK-WORKLIST-DAG: Changed: [[IN5]]:_(s8) = G_SUB [[OUT2:%[0-9]+]]:_, [[IN6]]:_ +# CHECK-WORKLIST-DAG: Erased: [[IN2]]:_(s32) = G_SEXT [[IN0]]:_(s8) +# CHECK-WORKLIST-DAG: Changed: [[IN2]]:_(s32) = G_SEXTLOAD [[IN1]]:_(p0) :: (load 1 from %ir.addr) +# CHECK-WORKLIST-DAG: Created: [[OUT1]]:_(s8) = G_TRUNC [[IN2]]:_(s32) +# CHECK-WORKLIST-DAG: Created: [[OUT2]]:_(s8) = G_TRUNC [[IN2]]:_(s32) +# CHECK-WORKLIST: Try combining ... ---