Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -461,6 +461,9 @@ /// DeadMachineInstructionElim - This pass removes dead machine instructions. extern char &DeadMachineInstructionElimID; + /// This pass adds dead/undef flags after analyzing subregister lanes. + extern char &DetectDeadLanesID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -110,6 +110,7 @@ void initializeDeadMachineInstructionElimPass(PassRegistry&); void initializeDelinearizationPass(PassRegistry &); void initializeDependenceAnalysisPass(PassRegistry&); +void initializeDetectDeadLanesPass(PassRegistry&); void initializeDivergenceAnalysisPass(PassRegistry&); void initializeDomOnlyPrinterPass(PassRegistry&); void initializeDomOnlyViewerPass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -12,6 +12,7 @@ CodeGenPrepare.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp + DetectDeadLanes.cpp DFAPacketizer.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -24,6 +24,7 @@ initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandISelPseudosPass(Registry); Index: lib/CodeGen/DetectDeadLanes.cpp =================================================================== --- /dev/null +++ lib/CodeGen/DetectDeadLanes.cpp @@ -0,0 +1,400 @@ +//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Analysis that tracks defined/used subregister lanes across COPY instructions +/// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE, +/// INSERT_SUBREG, EXTRACT_SUBREG). +/// The information is used to detect dead definitions and the usage of +/// (completely) undefined values and mark the operands as such. +/// This pass is necessary because the dead/undef status is not obvious anymore +/// when subregisters are involved. +/// +/// Example: +/// %vreg0 = some definition +/// %vreg1 = IMPLICIT_DEF +/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1 +/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1 +/// use %vreg3 +/// The %vreg0 definition is dead and %vreg3 contains an undefined value. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "detect-dead-lanes" + +namespace { + /// Contains a bitmask of which lanes of a given virtual register are + /// defined and which ones are actually used. + struct VRegInfo { + LaneBitmask UsedLanes; + LaneBitmask DefinedLanes; + }; + + class DetectDeadLanes : public MachineFunctionPass { + bool runOnMachineFunction(MachineFunction &MF) override; + + public: + static char ID; + DetectDeadLanes() : MachineFunctionPass(ID) { + } + + private: + void addToWorklist(unsigned VRegIndex) { + WorkList.push_back(VRegIndex); + InWorkList.set(VRegIndex); + } + void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes); + void addDefinedLanesOnReg(unsigned Reg, LaneBitmask DefinedLanes); + + /// Transfer UsedLanes to the operands of \p MI. + void transferUsedLanesStep(const MachineOperand &Def, + LaneBitmask UsedLanes); + /// Given a use regiser operand \p Use and a mask of defined lanes, check + /// if the operand belongs to a lowerToCopies() instruction, transfer the + /// mask to the def and put the instruction into the worklist. + void transferDefinedLanesStep(const MachineOperand &Use, + LaneBitmask DefinedLanes); + /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum + /// of an instruction with lowersToCopies()==true, determine which lanes + /// are defined at the output operand \p Def. + LaneBitmask transferDefinedLanes(const MachineOperand &Def, + unsigned OpNum, LaneBitmask DefinedLanes); + + const MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + + std::vector VRegInfos; + /// Worklist containins registers to process. + std::deque WorkList; + BitVector InWorkList; + /// This bitvector is set for each vreg index where the vreg is defined + /// by an instruction where lowersToCopies()==true. + BitVector DefinedByCopy; + }; +} +char DetectDeadLanes::ID = 0; +char &llvm::DetectDeadLanesID = DetectDeadLanes::ID; + +INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", + "Detect dead lanes", false, false); + +static bool lowersToCopies(const MachineInstr &MI) { + // Note: We could support instructions with MCInstrDesc::isRegSeuqenceLike(), + // isExtractSubRegLike(), isInsertSubregLike() in the future even though they + // are not lowered to a COPY. + return MI.isCopy() || MI.isPHI() || MI.isInsertSubreg() || + MI.isRegSequence() || MI.isExtractSubreg(); +} + +void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, + LaneBitmask UsedLanes) { + if (!MO.readsReg()) + return; + unsigned MOReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(MOReg)) + return; + + unsigned MOSubReg = MO.getSubReg(); + if (MOSubReg != 0) + UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes); + UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg); + + unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg); + VRegInfo &MORegInfo = VRegInfos[MORegIdx]; + LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes; + // Any change at all? + if ((UsedLanes & ~PrevUsedLanes) == 0) + return; + + // Set UsedLanes and remember instruction for further propagation. + MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes; + if (DefinedByCopy[MORegIdx] && !InWorkList[MORegIdx]) + addToWorklist(MORegIdx); +} + +void DetectDeadLanes::transferUsedLanesStep(const MachineOperand &Def, + LaneBitmask UsedLanes) { + const MachineInstr &MI = *Def.getParent(); + if (MI.isCopy() || MI.isPHI()) { + for (const MachineOperand &MO : MI.uses()) { + if (MO.isReg() && MO.isUse()) + addUsedLanesOnOperand(MO, UsedLanes); + } + } else if (MI.isRegSequence()) { + unsigned I = MI.getNumOperands()-2; + for (;;) { + const MachineOperand &MO = MI.getOperand(I); + unsigned SubIdx = MI.getOperand(I+1).getImm(); + LaneBitmask MOUsedLanes + = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); + + addUsedLanesOnOperand(MO, MOUsedLanes); + + LaneBitmask SubIdxMask = TRI->getSubRegIndexLaneMask(SubIdx); + UsedLanes &= ~SubIdxMask; + if (I == 1) + break; + I -= 2; + } + } else if (MI.isInsertSubreg()) { + const MachineOperand &MO2 = MI.getOperand(2); + unsigned SubIdx = MI.getOperand(3).getImm(); + LaneBitmask MO2UsedLanes + = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); + addUsedLanesOnOperand(MO2, MO2UsedLanes); + + const MachineOperand &MO1 = MI.getOperand(1); + LaneBitmask MO1UsedLanes + = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx); + addUsedLanesOnOperand(MO1, MO1UsedLanes); + } else { + assert(MI.isExtractSubreg()); + const MachineOperand &MO = MI.getOperand(1); + unsigned SubIdx = MI.getOperand(2).getImm(); + LaneBitmask MOUsedLanes + = TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes); + addUsedLanesOnOperand(MO, MOUsedLanes); + } +} + +void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use, + LaneBitmask DefinedLanes) { + if (!Use.readsReg()) + return; + const MachineInstr &MI = *Use.getParent(); + if (MI.getDesc().getNumDefs() != 1) + return; + const MachineOperand &Def = *MI.defs().begin(); + unsigned DefReg = Def.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + return; + unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + if (!DefinedByCopy[DefRegIdx]) + return; + + unsigned OpNum = MI.getOperandNo(&Use); + DefinedLanes + = TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes); + DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes); + + VRegInfo &RegInfo = VRegInfos[DefRegIdx]; + LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes; + // Any change at all? + if ((DefinedLanes & ~PrevDefinedLanes) == 0) + return; + + RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes; + if (!InWorkList[DefRegIdx]) + addToWorklist(DefRegIdx); +} + +LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def, + unsigned OpNum, + LaneBitmask DefinedLanes) { + const MachineInstr &MI = *Def.getParent(); + // Translate DefinedLanes if necessary. + if (MI.isRegSequence()) { + unsigned SubIdx = MI.getOperand(OpNum+1).getImm(); + DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes); + DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx); + } else if (MI.isInsertSubreg()) { + unsigned SubIdx = MI.getOperand(3).getImm(); + if (OpNum == 2) { + DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes); + DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx); + } else { + assert(OpNum == 1); + // Ignore lanes defined by operand 2. + DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx); + } + } else if (MI.isExtractSubreg()) { + unsigned SubIdx = MI.getOperand(2).getImm(); + assert(OpNum == 1); + DefinedLanes + = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes); + } else { + assert(MI.isCopy() || MI.isPHI()); + } + + unsigned SubIdx = Def.getSubReg(); + DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes); + DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg()); + return DefinedLanes; +} + +bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { + // Don't bother if we won't track subregister liveness later. + if (!MF.getSubtarget().enableSubRegLiveness()) { + DEBUG(dbgs() << "Skipping Detect dead lanes pass\n"); + return false; + } + + MRI = &MF.getRegInfo(); + TRI = MRI->getTargetRegisterInfo(); + + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + VRegInfos.resize(NumVirtRegs); + InWorkList.resize(NumVirtRegs); + DefinedByCopy.resize(NumVirtRegs); + + // First pass: Populate defs/uses of vregs with initial values + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + VRegInfo &Info = VRegInfos[RegIdx]; + // Ignore special vregs without a definition and vregs where we won't track + // subregister liveness anyway. + if (!MRI->hasOneDef(Reg)) { + Info.UsedLanes = ~0u; + Info.DefinedLanes = ~0u; + continue; + } + + const MachineOperand &Def = *MRI->def_begin(Reg); + const MachineInstr &DefMI = *Def.getParent(); + LaneBitmask DefinedLanes; + if (lowersToCopies(DefMI)) { + // Start optimisatically with no used or defined lanes for copy + // instructions. The following dataflow analysis will add more bits. + DefinedLanes = 0; + DefinedByCopy.set(RegIdx); + + // Determine initially DefinedLanes. + for (const MachineOperand &MO : DefMI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned MOReg = MO.getReg(); + LaneBitmask MODefinedLanes; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) { + if (MRI->hasOneDef(MOReg)) { + const MachineOperand &MODef = *MRI->def_begin(MOReg); + const MachineInstr &MODefMI = *MODef.getParent(); + // Bits from copy-like operations will be added later. + if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef()) + continue; + } + MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg); + MODefinedLanes + = TRI->reverseComposeSubRegIndexLaneMask(MO.getSubReg(), + MODefinedLanes); + } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + MODefinedLanes = ~0u; + } else { + continue; + } + + unsigned OpNum = DefMI.getOperandNo(&MO); + DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes); + } + + addToWorklist(RegIdx); + } else if (DefMI.isImplicitDef()) { + DefinedLanes = 0; + } else { + unsigned SubReg = Def.getSubReg(); + DefinedLanes = SubReg != 0 ? TRI->getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); + } + + LaneBitmask UsedLanes = 0; + for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + if (MO.isUndef() || MO.isInternalRead()) + continue; + + const MachineInstr &UseMI = *MO.getParent(); + if (lowersToCopies(UseMI) || UseMI.isKill()) + continue; + + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0) { + UsedLanes = MRI->getMaxLaneMaskForVReg(Reg); + // Shortcut: All lanes are already set. + break; + } + UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg); + } + Info.UsedLanes = UsedLanes; + Info.DefinedLanes = DefinedLanes; + } + + // Iterate as long as defined lanes/used lanes keep changing. + while (!WorkList.empty()) { + unsigned RegIdx = WorkList.front(); + WorkList.pop_front(); + assert(InWorkList[RegIdx]); + InWorkList.reset(RegIdx); + VRegInfo &Info = VRegInfos[RegIdx]; + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + + // Transfer UsedLanes to operands of DefMI (backwards dataflow). + MachineOperand &Def = *MRI->def_begin(Reg); + transferUsedLanesStep(Def, Info.UsedLanes); + // Transfer DefinedLanes to users of Reg (forward dataflow). + for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + transferDefinedLanesStep(MO, Info.DefinedLanes); + } + } + + DEBUG( + dbgs() << "Defined/Used lanes:\n"; + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + const VRegInfo &Info = VRegInfos[RegIdx]; + dbgs() << PrintReg(Reg, nullptr) + << " Used: " << PrintLaneMask(Info.UsedLanes) + << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; + } + dbgs() << "\n"; + ); + + // Mark operands as dead/unused. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + unsigned SubReg = MO.getSubReg(); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); + unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + const VRegInfo &RegInfo = VRegInfos[RegIdx]; + if (MO.isDef() && RegInfo.UsedLanes == 0) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); + MO.setIsDead(); + } else if (MO.readsReg() && (RegInfo.DefinedLanes & Mask) == 0) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI); + MO.setIsUndef(); + } + } + } + } + + InWorkList.clear(); + DefinedByCopy.clear(); + VRegInfos.clear(); + return true; +} Index: lib/CodeGen/Passes.cpp =================================================================== --- lib/CodeGen/Passes.cpp +++ lib/CodeGen/Passes.cpp @@ -734,6 +734,8 @@ /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addPass(&DetectDeadLanesID, false); + addPass(&ProcessImplicitDefsID, false); // LiveVariables currently requires pure SSA form. Index: test/CodeGen/AMDGPU/detect-dead-lanes.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -0,0 +1,376 @@ +# RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o /dev/null %s 2>&1 | FileCheck %s +--- | + define void @test0() { ret void } + define void @test1() { ret void } + define void @test2() { ret void } + define void @loop0() { ret void } + define void @loop1() { ret void } + define void @loop2() { ret void } +... +--- +# Combined use/def transfer check, the basics. +# CHECK-LABEL: name: test0 +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %3:sub0 +# CHECK: S_NOP 0, implicit %3:sub1 +# CHECK: S_NOP 0, implicit undef %3:sub2 +# CHECK: %4 = COPY %3:sub0_sub1 +# CHECK: %5 = COPY %3:sub2_sub3 +# CHECK: S_NOP 0, implicit %4:sub0 +# CHECK: S_NOP 0, implicit %4:sub1 +# CHECK: S_NOP 0, implicit undef %5:sub0 +name: test0 +isSSA: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub3 + S_NOP 0, implicit %3:sub0 + S_NOP 0, implicit %3:sub1 + S_NOP 0, implicit %3:sub2 + %4 = COPY %3:sub0_sub1 + %5 = COPY %3:sub2_sub3 + S_NOP 0, implicit %4:sub0 + S_NOP 0, implicit %4:sub1 + S_NOP 0, implicit %5:sub0 +... +--- +# Check defined lanes transfer; Includes checking for some special cases like +# undef operands or IMPLICIT_DEF definitions. +# CHECK-LABEL: name: test1 +# CHECK: %0 = REG_SEQUENCE %sgpr0, {{[0-9]+}}, %sgpr0, {{[0-9]+}} +# CHECK: %1 = INSERT_SUBREG %0, %sgpr1, {{[0-9]+}} +# CHECK: %2 = INSERT_SUBREG %0:sub2_sub3, %sgpr42, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %1:sub0 +# CHECK: S_NOP 0, implicit undef %1:sub1 +# CHECK: S_NOP 0, implicit %1:sub2 +# CHECK: S_NOP 0, implicit %1:sub3 +# CHECK: S_NOP 0, implicit %2:sub0 +# CHECK: S_NOP 0, implicit undef %2:sub1 + +# CHECK: %3 = IMPLICIT_DEF +# CHECK: %4 = INSERT_SUBREG %0, undef %3, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %4:sub0 +# CHECK: S_NOP 0, implicit undef %4:sub1 +# CHECK: S_NOP 0, implicit %4:sub2 +# CHECK: S_NOP 0, implicit undef %4:sub3 + +# CHECK: %5 = EXTRACT_SUBREG %0, {{[0-9]+}} +# CHECK: %6 = EXTRACT_SUBREG %5, {{[0-9]+}} +# CHECK: %7 = EXTRACT_SUBREG %5, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %5 +# CHECK: S_NOP 0, implicit %6 +# CHECK: S_NOP 0, implicit undef %7 + +# CHECK: %8 = IMPLICIT_DEF +# CHECK: %9 = EXTRACT_SUBREG undef %8, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %9 + +# CHECK: %10 = EXTRACT_SUBREG undef %0, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %10 +name: test1 +isSSA: true +registers: + - { id: 0, class: sreg_128 } + - { id: 1, class: sreg_128 } + - { id: 2, class: sreg_64 } + - { id: 3, class: sreg_32 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_64 } + - { id: 6, class: sreg_32 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_64 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } +body: | + bb.0: + %0 = REG_SEQUENCE %sgpr0, %subreg.sub0, %sgpr0, %subreg.sub2 + %1 = INSERT_SUBREG %0, %sgpr1, %subreg.sub3 + %2 = INSERT_SUBREG %0:sub2_sub3, %sgpr42, %subreg.sub0 + S_NOP 0, implicit %1:sub0 + S_NOP 0, implicit %1:sub1 + S_NOP 0, implicit %1:sub2 + S_NOP 0, implicit %1:sub3 + S_NOP 0, implicit %2:sub0 + S_NOP 0, implicit %2:sub1 + + %3 = IMPLICIT_DEF + %4 = INSERT_SUBREG %0, %3, %subreg.sub0 + S_NOP 0, implicit %4:sub0 + S_NOP 0, implicit %4:sub1 + S_NOP 0, implicit %4:sub2 + S_NOP 0, implicit %4:sub3 + + %5 = EXTRACT_SUBREG %0, %subreg.sub0_sub1 + %6 = EXTRACT_SUBREG %5, %subreg.sub0 + %7 = EXTRACT_SUBREG %5, %subreg.sub1 + S_NOP 0, implicit %5 + S_NOP 0, implicit %6 + S_NOP 0, implicit %7 + + %8 = IMPLICIT_DEF + %9 = EXTRACT_SUBREG %8, %subreg.sub1 + S_NOP 0, implicit %9 + + %10 = EXTRACT_SUBREG undef %0, %subreg.sub2_sub3 + S_NOP 0, implicit %10 +... +--- +# Check used lanes transfer; Includes checking for some special cases like +# undef operands. +# CHECK-LABEL: name: test2 +# CHECK: S_NOP 0, implicit-def dead %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def %2 +# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %3:sub1 +# CHECK: S_NOP 0, implicit %3:sub3 + +# CHECK: S_NOP 0, implicit-def dead %4 +# CHECK: S_NOP 0, implicit-def %5 +# CHECK: %6 = REG_SEQUENCE %4, {{[0-9]+}}, %5, {{[0-9]+}}, %5, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %6 + +# CHECK: S_NOP 0, implicit-def %7 +# CHECK: S_NOP 0, implicit-def dead %8 +# CHECK: %9 = REG_SEQUENCE %7, {{[0-9]+}}, undef %8, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %9 + +# CHECK: S_NOP 0, implicit-def dead %10 +# CHECK: S_NOP 0, implicit-def %11 +# CHECK: %12 = INSERT_SUBREG %10, %11, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %12:sub2 + +# CHECK: S_NOP 0, implicit-def %13 +# CHECK: S_NOP 0, implicit-def dead %14 +# CHECK: %15 = INSERT_SUBREG %13, %14, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %15:sub3 + +# CHECK: S_NOP 0, implicit-def %16 +# CHECK: S_NOP 0, implicit-def dead %17 +# CHECK: %18 = REG_SEQUENCE %16, {{[0-9]+}}, %17, {{[0-9]+}} +# CHECK: %19 = EXTRACT_SUBREG %18, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %19:sub1 + +name: test2 +isSSA: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_64 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_32 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_64 } + - { id: 10, class: sreg_128 } + - { id: 11, class: sreg_64 } + - { id: 12, class: sreg_128 } + - { id: 13, class: sreg_128 } + - { id: 14, class: sreg_64 } + - { id: 15, class: sreg_128 } + - { id: 16, class: sreg_64 } + - { id: 17, class: sreg_64 } + - { id: 18, class: sreg_128 } + - { id: 19, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2_sub3 + S_NOP 0, implicit %3:sub1 + S_NOP 0, implicit %3:sub3 + + S_NOP 0, implicit-def %4 + S_NOP 0, implicit-def %5 + %6 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1, %5, %subreg.sub0 + S_NOP 0, implicit %6 + + S_NOP 0, implicit-def %7 + S_NOP 0, implicit-def %8 + %9 = REG_SEQUENCE %7, %subreg.sub0, undef %8, %subreg.sub1 + S_NOP 0, implicit %9 + + S_NOP 0, implicit-def %10 + S_NOP 0, implicit-def %11 + %12 = INSERT_SUBREG %10, %11, %subreg.sub2_sub3 + S_NOP 0, implicit %12:sub2 + + S_NOP 0, implicit-def %13 + S_NOP 0, implicit-def %14 + %15 = INSERT_SUBREG %13, %14, %subreg.sub0_sub1 + S_NOP 0, implicit %15:sub3 + + S_NOP 0, implicit-def %16 + S_NOP 0, implicit-def %17 + %18 = REG_SEQUENCE %16, %subreg.sub0_sub1, %17, %subreg.sub2_sub3 + %19 = EXTRACT_SUBREG %18, %subreg.sub0_sub1 + S_NOP 0, implicit %19:sub1 +... +--- +# Check "optimistic" dataflow fixpoint in phi-loops. +# CHECK-LABEL: name: loop0 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def dead %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %4 = PHI %3, %bb.0, %5, %bb.1 + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %4:sub0 +# CHECK: S_NOP 0, implicit undef %4:sub3 +name: loop0 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %4 = PHI %3, %bb.0, %5, %bb.1 + + ; let's swiffle some lanes around for fun... + %5 = REG_SEQUENCE %4:sub0, %subreg.sub0, %4:sub2, %subreg.sub1, %4:sub1, %subreg.sub2, %4:sub3, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %4:sub0 + S_NOP 0, implicit %4:sub3 +... +--- +# Check a loop that needs to be traversed multiple times to reach the fixpoint +# for the used lanes. The example reads sub3 lane at the end, however with each +# loop iteration we should get 1 more lane marked as we cycles the sublanes +# along. Sublanes sub0, sub1 and sub3 are rotate in the loop so only sub2 +# should be dead. +# CHECK-LABEL: name: loop1 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: S_NOP 0, implicit-def %3 +# CHECK: %4 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}}, %3, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %5 = PHI %4, %bb.0, %6, %bb.1 + +# CHECK: %6 = REG_SEQUENCE %5:sub1, {{[0-9]+}}, %5:sub3, {{[0-9]+}}, %5:sub2, {{[0-9]+}}, %5:sub0, {{[0-9]+}} + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %6:sub3 +name: loop1 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sreg_32 } + - { id: 3, class: sreg_32 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_128 } + - { id: 6, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def dead %2 + S_NOP 0, implicit-def %3 + %4 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %5 = PHI %4, %bb.0, %6, %bb.1 + + ; rotate lanes, but skip sub2 lane... + %6 = REG_SEQUENCE %5:sub1, %subreg.sub0, %5:sub3, %subreg.sub1, %5:sub2, %subreg.sub2, %5:sub0, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %6:sub3 +... +--- +# Similar to loop1 test, but check for fixpoint of defined lanes. +# Lanes are rotate between sub0, sub2, sub3 so only sub1 should be dead/undef. +# CHECK-LABEL: name: loop2 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: %1 = REG_SEQUENCE %0, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %2 = PHI %1, %bb.0, %3, %bb.1 + +# CHECK: %3 = REG_SEQUENCE %2:sub3, {{[0-9]+}}, undef %2:sub1, {{[0-9]+}}, %2:sub0, {{[0-9]+}}, %2:sub2, {{[0-9]+}} + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %2:sub0 +# CHECK: S_NOP 0, implicit undef %2:sub1 +# CHECK: S_NOP 0, implicit %2:sub2 +# CHECK: S_NOP 0, implicit %2:sub3 +name: loop2 +isSSA: true +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_128 } + - { id: 2, class: sreg_128 } + - { id: 3, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + %1 = REG_SEQUENCE %0, %subreg.sub0 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %2 = PHI %1, %bb.0, %3, %bb.1 + + ; rotate subreg lanes, skipping sub1 + %3 = REG_SEQUENCE %2:sub3, %subreg.sub0, %2:sub1, %subreg.sub1, %2:sub0, %subreg.sub2, %2:sub2, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %2:sub0 + S_NOP 0, implicit undef %2:sub1 + S_NOP 0, implicit %2:sub2 + S_NOP 0, implicit %2:sub3 +...