Index: llvm/trunk/include/llvm/CodeGen/Passes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/Passes.h +++ llvm/trunk/include/llvm/CodeGen/Passes.h @@ -470,6 +470,9 @@ /// DeadMachineInstructionElim - This pass removes dead machine instructions. extern char &DeadMachineInstructionElimID; + /// This pass adds dead/undef flags after analyzing subregister lanes. + extern char &DetectDeadLanesID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -110,6 +110,7 @@ void initializeDeadMachineInstructionElimPass(PassRegistry&); void initializeDelinearizationPass(PassRegistry &); void initializeDependenceAnalysisPass(PassRegistry&); +void initializeDetectDeadLanesPass(PassRegistry&); void initializeDivergenceAnalysisPass(PassRegistry&); void initializeDomOnlyPrinterPass(PassRegistry&); void initializeDomOnlyViewerPass(PassRegistry&); Index: llvm/trunk/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/trunk/lib/CodeGen/CMakeLists.txt +++ llvm/trunk/lib/CodeGen/CMakeLists.txt @@ -12,6 +12,7 @@ CodeGenPrepare.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp + DetectDeadLanes.cpp DFAPacketizer.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp Index: llvm/trunk/lib/CodeGen/CodeGen.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGen.cpp +++ llvm/trunk/lib/CodeGen/CodeGen.cpp @@ -24,6 +24,7 @@ initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandISelPseudosPass(Registry); Index: llvm/trunk/lib/CodeGen/DetectDeadLanes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/DetectDeadLanes.cpp +++ llvm/trunk/lib/CodeGen/DetectDeadLanes.cpp @@ -0,0 +1,530 @@ +//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Analysis that tracks defined/used subregister lanes across COPY instructions +/// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE, +/// INSERT_SUBREG, EXTRACT_SUBREG). +/// The information is used to detect dead definitions and the usage of +/// (completely) undefined values and mark the operands as such. +/// This pass is necessary because the dead/undef status is not obvious anymore +/// when subregisters are involved. +/// +/// Example: +/// %vreg0 = some definition +/// %vreg1 = IMPLICIT_DEF +/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1 +/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1 +/// = use %vreg3 +/// The %vreg0 definition is dead and %vreg3 contains an undefined value. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "detect-dead-lanes" + +namespace { + +/// Contains a bitmask of which lanes of a given virtual register are +/// defined and which ones are actually used. +struct VRegInfo { + LaneBitmask UsedLanes; + LaneBitmask DefinedLanes; +}; + +class DetectDeadLanes : public MachineFunctionPass { +public: + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + DetectDeadLanes() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { return "Detect Dead Lanes"; } + +private: + /// Add used lane bits on the register used by operand \p MO. This translates + /// the bitmask based on the operands subregister, and puts the register into + /// the worklist if any new bits were added. + void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes); + + /// Given a bitmask \p UsedLanes for the used lanes on a def output of a + /// COPY-like instruction determine the lanes used on the use operands + /// and call addUsedLanesOnOperand() for them. + void transferUsedLanesStep(const MachineOperand &Def, LaneBitmask UsedLanes); + + /// Given a use regiser operand \p Use and a mask of defined lanes, check + /// if the operand belongs to a lowerToCopies() instruction, transfer the + /// mask to the def and put the instruction into the worklist. + void transferDefinedLanesStep(const MachineOperand &Use, + LaneBitmask DefinedLanes); + + /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum + /// of COPY-like instruction, determine which lanes are defined at the output + /// operand \p Def. + LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum, + LaneBitmask DefinedLanes); + + LaneBitmask determineInitialDefinedLanes(unsigned Reg); + LaneBitmask determineInitialUsedLanes(unsigned Reg); + + const MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + + void PutInWorklist(unsigned RegIdx) { + if (WorklistMembers.test(RegIdx)) + return; + WorklistMembers.set(RegIdx); + Worklist.push_back(RegIdx); + } + + VRegInfo *VRegInfos; + /// Worklist containing virtreg indexes. + std::deque Worklist; + BitVector WorklistMembers; + /// This bitvector is set for each vreg index where the vreg is defined + /// by an instruction where lowersToCopies()==true. + BitVector DefinedByCopy; +}; + +} // end anonymous namespace + +char DetectDeadLanes::ID = 0; +char &llvm::DetectDeadLanesID = DetectDeadLanes::ID; + +INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes", + false, false); + +/// Returns true if \p MI will get lowered to a series of COPY instructions. +/// We call this a COPY-like instruction. +static bool lowersToCopies(const MachineInstr &MI) { + // Note: We could support instructions with MCInstrDesc::isRegSequenceLike(), + // isExtractSubRegLike(), isInsertSubregLike() in the future even though they + // are not lowered to a COPY. + switch (MI.getOpcode()) { + case TargetOpcode::COPY: + case TargetOpcode::PHI: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::EXTRACT_SUBREG: + return true; + } + return false; +} + +static bool isCrossCopy(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + const TargetRegisterClass *DstRC, + const MachineOperand &MO) { + assert(lowersToCopies(MI)); + unsigned SrcReg = MO.getReg(); + const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); + if (DstRC == SrcRC) + return false; + + unsigned SrcSubIdx = MO.getSubReg(); + + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + unsigned DstSubIdx = 0; + switch (MI.getOpcode()) { + case TargetOpcode::INSERT_SUBREG: + if (MI.getOperandNo(&MO) == 2) + DstSubIdx = MI.getOperand(3).getImm(); + break; + case TargetOpcode::REG_SEQUENCE: { + unsigned OpNum = MI.getOperandNo(&MO); + DstSubIdx = MI.getOperand(OpNum+1).getImm(); + break; + } + case TargetOpcode::EXTRACT_SUBREG: { + unsigned SubReg = MI.getOperand(2).getImm(); + SrcSubIdx = TRI.composeSubRegIndices(SubReg, SrcSubIdx); + } + } + + unsigned PreA, PreB; // Unused. + if (SrcSubIdx && DstSubIdx) + return !TRI.getCommonSuperRegClass(SrcRC, SrcSubIdx, DstRC, DstSubIdx, PreA, + PreB); + if (SrcSubIdx) + return !TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSubIdx); + if (DstSubIdx) + return !TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSubIdx); + return !TRI.getCommonSubClass(SrcRC, DstRC); +} + +void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, + LaneBitmask UsedLanes) { + if (!MO.readsReg()) + return; + unsigned MOReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(MOReg)) + return; + + unsigned MOSubReg = MO.getSubReg(); + if (MOSubReg != 0) + UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes); + UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg); + + unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg); + VRegInfo &MORegInfo = VRegInfos[MORegIdx]; + LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes; + // Any change at all? + if ((UsedLanes & ~PrevUsedLanes) == 0) + return; + + // Set UsedLanes and remember instruction for further propagation. + MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes; + if (DefinedByCopy.test(MORegIdx)) + PutInWorklist(MORegIdx); +} + +void DetectDeadLanes::transferUsedLanesStep(const MachineOperand &Def, + LaneBitmask UsedLanes) { + const MachineInstr &MI = *Def.getParent(); + switch (MI.getOpcode()) { + case TargetOpcode::COPY: + case TargetOpcode::PHI: + for (const MachineOperand &MO : MI.uses()) { + if (MO.isReg() && MO.isUse()) + addUsedLanesOnOperand(MO, UsedLanes); + } + break; + case TargetOpcode::REG_SEQUENCE: { + // Note: This loop makes the conservative assumption that subregister + // indices do not overlap or that we do not know how the overlap is + // resolved when lowering to copies. + for (unsigned I = 1, N = MI.getNumOperands(); I < N; I += 2) { + const MachineOperand &MO = MI.getOperand(I); + unsigned SubIdx = MI.getOperand(I + 1).getImm(); + LaneBitmask MOUsedLanes = + TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); + + addUsedLanesOnOperand(MO, MOUsedLanes); + } + break; + } + case TargetOpcode::INSERT_SUBREG: { + const MachineOperand &MO2 = MI.getOperand(2); + unsigned SubIdx = MI.getOperand(3).getImm(); + LaneBitmask MO2UsedLanes = + TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); + addUsedLanesOnOperand(MO2, MO2UsedLanes); + + const MachineOperand &MO1 = MI.getOperand(1); + unsigned DefReg = Def.getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + LaneBitmask MO1UsedLanes; + if (RC->CoveredBySubRegs) + MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx); + else + MO1UsedLanes = RC->LaneMask; + addUsedLanesOnOperand(MO1, MO1UsedLanes); + break; + } + case TargetOpcode::EXTRACT_SUBREG: { + const MachineOperand &MO = MI.getOperand(1); + unsigned SubIdx = MI.getOperand(2).getImm(); + LaneBitmask MOUsedLanes = + TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes); + addUsedLanesOnOperand(MO, MOUsedLanes); + break; + } + default: + llvm_unreachable("function must be called with COPY-like instruction"); + } +} + +void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use, + LaneBitmask DefinedLanes) { + if (!Use.readsReg()) + return; + // Check whether the operand writes a vreg and is part of a COPY-like + // instruction. + const MachineInstr &MI = *Use.getParent(); + if (MI.getDesc().getNumDefs() != 1) + return; + // FIXME: PATCHPOINT instructions announce a Def that does not always exist, + // they really need to be modeled differently! + if (MI.getOpcode() == TargetOpcode::PATCHPOINT) + return; + const MachineOperand &Def = *MI.defs().begin(); + unsigned DefReg = Def.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + return; + unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + if (!DefinedByCopy.test(DefRegIdx)) + return; + + unsigned OpNum = MI.getOperandNo(&Use); + DefinedLanes = + TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes); + DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes); + + VRegInfo &RegInfo = VRegInfos[DefRegIdx]; + LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes; + // Any change at all? + if ((DefinedLanes & ~PrevDefinedLanes) == 0) + return; + + RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes; + PutInWorklist(DefRegIdx); +} + +LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def, + unsigned OpNum, + LaneBitmask DefinedLanes) { + const MachineInstr &MI = *Def.getParent(); + // Translate DefinedLanes if necessary. + switch (MI.getOpcode()) { + case TargetOpcode::REG_SEQUENCE: { + unsigned SubIdx = MI.getOperand(OpNum + 1).getImm(); + DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes); + DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx); + break; + } + case TargetOpcode::INSERT_SUBREG: { + unsigned SubIdx = MI.getOperand(3).getImm(); + if (OpNum == 2) { + DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes); + DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx); + } else { + assert(OpNum == 1 && "INSERT_SUBREG must have two operands"); + // Ignore lanes defined by operand 2. + DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx); + } + break; + } + case TargetOpcode::EXTRACT_SUBREG: { + unsigned SubIdx = MI.getOperand(2).getImm(); + assert(OpNum == 1 && "EXTRACT_SUBREG must have one register operand only"); + DefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes); + break; + } + case TargetOpcode::COPY: + case TargetOpcode::PHI: + break; + default: + llvm_unreachable("function must be called with COPY-like instruction"); + } + + unsigned SubIdx = Def.getSubReg(); + DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes); + DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg()); + return DefinedLanes; +} + +LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { + // Live-In or unused registers have no definition but are considered fully + // defined. + if (!MRI->hasOneDef(Reg)) + return ~0u; + + const MachineOperand &Def = *MRI->def_begin(Reg); + const MachineInstr &DefMI = *Def.getParent(); + if (lowersToCopies(DefMI)) { + // Start optimisatically with no used or defined lanes for copy + // instructions. The following dataflow analysis will add more bits. + unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + DefinedByCopy.set(RegIdx); + PutInWorklist(RegIdx); + + if (Def.isDead()) + return 0; + + // COPY/PHI can copy across unrelated register classes (example: float/int) + // with incompatible subregister structure. Do not include these in the + // dataflow analysis since we cannot transfer lanemasks in a meaningful way. + const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); + + // Determine initially DefinedLanes. + LaneBitmask DefinedLanes = 0; + for (const MachineOperand &MO : DefMI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + + LaneBitmask MODefinedLanes; + if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + MODefinedLanes = ~0u; + } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) { + MODefinedLanes = ~0u; + } else { + assert(TargetRegisterInfo::isVirtualRegister(MOReg)); + if (MRI->hasOneDef(MOReg)) { + const MachineOperand &MODef = *MRI->def_begin(MOReg); + const MachineInstr &MODefMI = *MODef.getParent(); + // Bits from copy-like operations will be added later. + if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef()) + continue; + } + unsigned MOSubReg = MO.getSubReg(); + MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg); + MODefinedLanes = TRI->reverseComposeSubRegIndexLaneMask( + MOSubReg, MODefinedLanes); + } + + unsigned OpNum = DefMI.getOperandNo(&MO); + DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes); + } + return DefinedLanes; + } + if (DefMI.isImplicitDef() || Def.isDead()) + return 0; + + unsigned SubReg = Def.getSubReg(); + return SubReg != 0 ? TRI->getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); +} + +LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { + LaneBitmask UsedLanes = 0; + for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + if (!MO.readsReg()) + continue; + + const MachineInstr &UseMI = *MO.getParent(); + if (UseMI.isKill()) + continue; + + unsigned SubReg = MO.getSubReg(); + if (lowersToCopies(UseMI)) { + assert(UseMI.getDesc().getNumDefs() == 1); + const MachineOperand &Def = *UseMI.defs().begin(); + unsigned DefReg = Def.getReg(); + // The used lanes of COPY-like instruction operands are determined by the + // following dataflow analysis. + if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + // But ignore copies across incompatible register classes. + bool CrossCopy = false; + if (lowersToCopies(UseMI)) { + const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); + CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO); + } + + if (!CrossCopy) + continue; + } + } + + // Shortcut: All lanes are used. + if (SubReg == 0) + return MRI->getMaxLaneMaskForVReg(Reg); + + UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg); + } + return UsedLanes; +} + +bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { + // Don't bother if we won't track subregister liveness later. This pass is + // required for correctness if subregister liveness is enabled because the + // register coalescer cannot deal with hidden dead defs. However without + // subregister liveness enabled, the expected benefits of this pass are small + // so we safe the compile time. + if (!MF.getSubtarget().enableSubRegLiveness()) { + DEBUG(dbgs() << "Skipping Detect dead lanes pass\n"); + return false; + } + + MRI = &MF.getRegInfo(); + TRI = MRI->getTargetRegisterInfo(); + + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + VRegInfos = new VRegInfo[NumVirtRegs]; + WorklistMembers.resize(NumVirtRegs); + DefinedByCopy.resize(NumVirtRegs); + + // First pass: Populate defs/uses of vregs with initial values + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + + // Determine used/defined lanes and add copy instructions to worklist. + VRegInfo &Info = VRegInfos[RegIdx]; + Info.DefinedLanes = determineInitialDefinedLanes(Reg); + Info.UsedLanes = determineInitialUsedLanes(Reg); + } + + // Iterate as long as defined lanes/used lanes keep changing. + while (!Worklist.empty()) { + unsigned RegIdx = Worklist.front(); + Worklist.pop_front(); + WorklistMembers.reset(RegIdx); + VRegInfo &Info = VRegInfos[RegIdx]; + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + + // Transfer UsedLanes to operands of DefMI (backwards dataflow). + MachineOperand &Def = *MRI->def_begin(Reg); + transferUsedLanesStep(Def, Info.UsedLanes); + // Transfer DefinedLanes to users of Reg (forward dataflow). + for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) + transferDefinedLanesStep(MO, Info.DefinedLanes); + } + + DEBUG( + dbgs() << "Defined/Used lanes:\n"; + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + const VRegInfo &Info = VRegInfos[RegIdx]; + dbgs() << PrintReg(Reg, nullptr) + << " Used: " << PrintLaneMask(Info.UsedLanes) + << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; + } + dbgs() << "\n"; + ); + + // Mark operands as dead/unused. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + unsigned SubReg = MO.getSubReg(); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); + unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + const VRegInfo &RegInfo = VRegInfos[RegIdx]; + if (RegInfo.UsedLanes == 0 && MO.isDef() && !MO.isDead()) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); + MO.setIsDead(); + } + if (((RegInfo.UsedLanes & Mask) == 0 || + (RegInfo.DefinedLanes & Mask) == 0) && MO.readsReg()) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI); + MO.setIsUndef(); + } + } + } + } + + DefinedByCopy.clear(); + WorklistMembers.clear(); + delete[] VRegInfos; + return true; +} Index: llvm/trunk/lib/CodeGen/Passes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/Passes.cpp +++ llvm/trunk/lib/CodeGen/Passes.cpp @@ -736,6 +736,8 @@ /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addPass(&DetectDeadLanesID, false); + addPass(&ProcessImplicitDefsID, false); // LiveVariables currently requires pure SSA form. Index: llvm/trunk/test/CodeGen/AMDGPU/detect-dead-lanes.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/detect-dead-lanes.mir +++ llvm/trunk/test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -0,0 +1,408 @@ +# RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o /dev/null %s 2>&1 | FileCheck %s +--- | + define void @test0() { ret void } + define void @test1() { ret void } + define void @test2() { ret void } + define void @test3() { ret void } + define void @test4() { ret void } + define void @loop0() { ret void } + define void @loop1() { ret void } + define void @loop2() { ret void } +... +--- +# Combined use/def transfer check, the basics. +# CHECK-LABEL: name: test0 +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %3:sub0 +# CHECK: S_NOP 0, implicit %3:sub1 +# CHECK: S_NOP 0, implicit undef %3:sub2 +# CHECK: %4 = COPY %3:sub0_sub1 +# CHECK: %5 = COPY %3:sub2_sub3 +# CHECK: S_NOP 0, implicit %4:sub0 +# CHECK: S_NOP 0, implicit %4:sub1 +# CHECK: S_NOP 0, implicit undef %5:sub0 +name: test0 +isSSA: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub3 + S_NOP 0, implicit %3:sub0 + S_NOP 0, implicit %3:sub1 + S_NOP 0, implicit %3:sub2 + %4 = COPY %3:sub0_sub1 + %5 = COPY %3:sub2_sub3 + S_NOP 0, implicit %4:sub0 + S_NOP 0, implicit %4:sub1 + S_NOP 0, implicit %5:sub0 +... +--- +# Check defined lanes transfer; Includes checking for some special cases like +# undef operands or IMPLICIT_DEF definitions. +# CHECK-LABEL: name: test1 +# CHECK: %0 = REG_SEQUENCE %sgpr0, {{[0-9]+}}, %sgpr0, {{[0-9]+}} +# CHECK: %1 = INSERT_SUBREG %0, %sgpr1, {{[0-9]+}} +# CHECK: %2 = INSERT_SUBREG %0:sub2_sub3, %sgpr42, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %1:sub0 +# CHECK: S_NOP 0, implicit undef %1:sub1 +# CHECK: S_NOP 0, implicit %1:sub2 +# CHECK: S_NOP 0, implicit %1:sub3 +# CHECK: S_NOP 0, implicit %2:sub0 +# CHECK: S_NOP 0, implicit undef %2:sub1 + +# CHECK: %3 = IMPLICIT_DEF +# CHECK: %4 = INSERT_SUBREG %0, undef %3, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %4:sub0 +# CHECK: S_NOP 0, implicit undef %4:sub1 +# CHECK: S_NOP 0, implicit %4:sub2 +# CHECK: S_NOP 0, implicit undef %4:sub3 + +# CHECK: %5 = EXTRACT_SUBREG %0, {{[0-9]+}} +# CHECK: %6 = EXTRACT_SUBREG %5, {{[0-9]+}} +# CHECK: %7 = EXTRACT_SUBREG %5, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %5 +# CHECK: S_NOP 0, implicit %6 +# CHECK: S_NOP 0, implicit undef %7 + +# CHECK: %8 = IMPLICIT_DEF +# CHECK: %9 = EXTRACT_SUBREG undef %8, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %9 + +# CHECK: %10 = EXTRACT_SUBREG undef %0, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %10 +name: test1 +isSSA: true +registers: + - { id: 0, class: sreg_128 } + - { id: 1, class: sreg_128 } + - { id: 2, class: sreg_64 } + - { id: 3, class: sreg_32 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_64 } + - { id: 6, class: sreg_32 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_64 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } +body: | + bb.0: + %0 = REG_SEQUENCE %sgpr0, %subreg.sub0, %sgpr0, %subreg.sub2 + %1 = INSERT_SUBREG %0, %sgpr1, %subreg.sub3 + %2 = INSERT_SUBREG %0:sub2_sub3, %sgpr42, %subreg.sub0 + S_NOP 0, implicit %1:sub0 + S_NOP 0, implicit %1:sub1 + S_NOP 0, implicit %1:sub2 + S_NOP 0, implicit %1:sub3 + S_NOP 0, implicit %2:sub0 + S_NOP 0, implicit %2:sub1 + + %3 = IMPLICIT_DEF + %4 = INSERT_SUBREG %0, %3, %subreg.sub0 + S_NOP 0, implicit %4:sub0 + S_NOP 0, implicit %4:sub1 + S_NOP 0, implicit %4:sub2 + S_NOP 0, implicit %4:sub3 + + %5 = EXTRACT_SUBREG %0, %subreg.sub0_sub1 + %6 = EXTRACT_SUBREG %5, %subreg.sub0 + %7 = EXTRACT_SUBREG %5, %subreg.sub1 + S_NOP 0, implicit %5 + S_NOP 0, implicit %6 + S_NOP 0, implicit %7 + + %8 = IMPLICIT_DEF + %9 = EXTRACT_SUBREG %8, %subreg.sub1 + S_NOP 0, implicit %9 + + %10 = EXTRACT_SUBREG undef %0, %subreg.sub2_sub3 + S_NOP 0, implicit %10 +... +--- +# Check used lanes transfer; Includes checking for some special cases like +# undef operands. +# CHECK-LABEL: name: test2 +# CHECK: S_NOP 0, implicit-def dead %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def %2 +# CHECK: %3 = REG_SEQUENCE undef %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %3:sub1 +# CHECK: S_NOP 0, implicit %3:sub3 + +# CHECK: S_NOP 0, implicit-def %4 +# CHECK: S_NOP 0, implicit-def dead %5 +# CHECK: %6 = REG_SEQUENCE %4, {{[0-9]+}}, undef %5, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %6 + +# CHECK: S_NOP 0, implicit-def dead %7 +# CHECK: S_NOP 0, implicit-def %8 +# CHECK: %9 = INSERT_SUBREG undef %7, %8, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %9:sub2 + +# CHECK: S_NOP 0, implicit-def %10 +# CHECK: S_NOP 0, implicit-def dead %11 +# CHECK: %12 = INSERT_SUBREG %10, undef %11, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %12:sub3 + +# CHECK: S_NOP 0, implicit-def %13 +# CHECK: S_NOP 0, implicit-def dead %14 +# CHECK: %15 = REG_SEQUENCE %13, {{[0-9]+}}, undef %14, {{[0-9]+}} +# CHECK: %16 = EXTRACT_SUBREG %15, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %16:sub1 + +name: test2 +isSSA: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_64 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_32 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_128 } + - { id: 8, class: sreg_64 } + - { id: 9, class: sreg_128 } + - { id: 10, class: sreg_128 } + - { id: 11, class: sreg_64 } + - { id: 12, class: sreg_128 } + - { id: 13, class: sreg_64 } + - { id: 14, class: sreg_64 } + - { id: 15, class: sreg_128 } + - { id: 16, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2_sub3 + S_NOP 0, implicit %3:sub1 + S_NOP 0, implicit %3:sub3 + + S_NOP 0, implicit-def %4 + S_NOP 0, implicit-def %5 + %6 = REG_SEQUENCE %4, %subreg.sub0, undef %5, %subreg.sub1 + S_NOP 0, implicit %6 + + S_NOP 0, implicit-def %7 + S_NOP 0, implicit-def %8 + %9 = INSERT_SUBREG %7, %8, %subreg.sub2_sub3 + S_NOP 0, implicit %9:sub2 + + S_NOP 0, implicit-def %10 + S_NOP 0, implicit-def %11 + %12 = INSERT_SUBREG %10, %11, %subreg.sub0_sub1 + S_NOP 0, implicit %12:sub3 + + S_NOP 0, implicit-def %13 + S_NOP 0, implicit-def %14 + %15 = REG_SEQUENCE %13, %subreg.sub0_sub1, %14, %subreg.sub2_sub3 + %16 = EXTRACT_SUBREG %15, %subreg.sub0_sub1 + S_NOP 0, implicit %16:sub1 +... +--- +# Check that copies to physregs use all lanes, copies from physregs define all +# lanes. So we should not get a dead/undef flag here. +# CHECK-LABEL: name: test3 +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: %vcc = COPY %0 +# CHECK: %1 = COPY %vcc +# CHECK: S_NOP 0, implicit %1 +name: test3 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + %vcc = COPY %0 + + %1 = COPY %vcc + S_NOP 0, implicit %1 +... +--- +# Check that implicit-def/kill do not count as def/uses. +# CHECK-LABEL: name: test4 +# CHECK: S_NOP 0, implicit-def dead %0 +# CHECK: KILL undef %0 +# CHECK: %1 = IMPLICIT_DEF +# CHECK: S_NOP 0, implicit undef %1 +name: test4 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + KILL %0 + + %1 = IMPLICIT_DEF + S_NOP 0, implicit %1 +... +--- +# Check "optimistic" dataflow fixpoint in phi-loops. +# CHECK-LABEL: name: loop0 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def dead %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, undef %1, {{[0-9]+}}, undef %2, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %4 = PHI %3, %bb.0, %5, %bb.1 + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %4:sub0 +# CHECK: S_NOP 0, implicit undef %4:sub3 +name: loop0 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %4 = PHI %3, %bb.0, %5, %bb.1 + + ; let's swiffle some lanes around for fun... + %5 = REG_SEQUENCE %4:sub0, %subreg.sub0, %4:sub2, %subreg.sub1, %4:sub1, %subreg.sub2, %4:sub3, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %4:sub0 + S_NOP 0, implicit %4:sub3 +... +--- +# Check a loop that needs to be traversed multiple times to reach the fixpoint +# for the used lanes. The example reads sub3 lane at the end, however with each +# loop iteration we should get 1 more lane marked as we cycles the sublanes +# along. Sublanes sub0, sub1 and sub3 are rotate in the loop so only sub2 +# should be dead. +# CHECK-LABEL: name: loop1 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: S_NOP 0, implicit-def %3 +# CHECK: %4 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}, %3, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %5 = PHI %4, %bb.0, %6, %bb.1 + +# CHECK: %6 = REG_SEQUENCE %5:sub1, {{[0-9]+}}, %5:sub3, {{[0-9]+}}, undef %5:sub2, {{[0-9]+}}, %5:sub0, {{[0-9]+}} + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %6:sub3 +name: loop1 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } + - { id: 3, class: sreg_32 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_128 } + - { id: 6, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def dead %2 + S_NOP 0, implicit-def %3 + %4 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %5 = PHI %4, %bb.0, %6, %bb.1 + + ; rotate lanes, but skip sub2 lane... + %6 = REG_SEQUENCE %5:sub1, %subreg.sub0, %5:sub3, %subreg.sub1, %5:sub2, %subreg.sub2, %5:sub0, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %6:sub3 +... +--- +# Similar to loop1 test, but check for fixpoint of defined lanes. +# Lanes are rotate between sub0, sub2, sub3 so only sub1 should be dead/undef. +# CHECK-LABEL: name: loop2 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: %1 = REG_SEQUENCE %0, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %2 = PHI %1, %bb.0, %3, %bb.1 + +# CHECK: %3 = REG_SEQUENCE %2:sub3, {{[0-9]+}}, undef %2:sub1, {{[0-9]+}}, %2:sub0, {{[0-9]+}}, %2:sub2, {{[0-9]+}} + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %2:sub0 +# CHECK: S_NOP 0, implicit undef %2:sub1 +# CHECK: S_NOP 0, implicit %2:sub2 +# CHECK: S_NOP 0, implicit %2:sub3 +name: loop2 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_128 } + - { id: 2, class: sreg_128 } + - { id: 3, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + %1 = REG_SEQUENCE %0, %subreg.sub0 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %2 = PHI %1, %bb.0, %3, %bb.1 + + ; rotate subreg lanes, skipping sub1 + %3 = REG_SEQUENCE %2:sub3, %subreg.sub0, %2:sub1, %subreg.sub1, %2:sub0, %subreg.sub2, %2:sub2, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %2:sub0 + S_NOP 0, implicit undef %2:sub1 + S_NOP 0, implicit %2:sub2 + S_NOP 0, implicit %2:sub3 +...