Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -79,6 +79,12 @@ int checkInlineAsmHazards(MachineInstr *IA); int checkAnyInstHazards(MachineInstr *MI); int checkReadM0Hazards(MachineInstr *SMovRel); + int checkNSAtoVMEMHazard(MachineInstr *MI); + bool fixVMEMtoScalarWriteHazards(MachineInstr *MI); + bool fixSMEMtoVectorWriteHazards(MachineInstr *MI); + bool fixVcmpxExecWARHazard(MachineInstr *MI); + bool fixLdsBranchVmemWARHazard(MachineInstr *MI); + public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/MC/MCInstrDesc.h" @@ -133,6 +134,12 @@ && checkVMEMHazards(MI) > 0) return NoopHazard; + if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0) + return NoopHazard; + + if (ST.hasNoDataDepHazard()) + return NoHazard; + if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) return NoopHazard; @@ -181,6 +188,12 @@ IsHazardRecognizerMode = true; CurrCycleInstr = MI; unsigned W = PreEmitNoopsCommon(MI); + + fixVMEMtoScalarWriteHazards(MI); + fixSMEMtoVectorWriteHazards(MI); + fixVcmpxExecWARHazard(MI); + fixLdsBranchVmemWARHazard(MI); + CurrCycleInstr = nullptr; return W; } @@ -191,12 +204,18 @@ if (SIInstrInfo::isSMRD(*MI)) return std::max(WaitStates, checkSMRDHazards(MI)); - if (SIInstrInfo::isVALU(*MI)) - WaitStates = std::max(WaitStates, checkVALUHazards(MI)); - if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); + if (ST.hasNSAtoVMEMBug()) + WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); + + if (ST.hasNoDataDepHazard()) + return WaitStates; + + if (SIInstrInfo::isVALU(*MI)) + WaitStates = std::max(WaitStates, checkVALUHazards(MI)); + if (SIInstrInfo::isDPP(*MI)) WaitStates = std::max(WaitStates, checkDPPHazards(MI)); @@ -775,3 +794,243 @@ return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, SMovRelWaitStates); } + +bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { + if (!ST.hasVMEMtoScalarWriteHazard()) + return false; + + if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI)) + return false; + + if (MI->getNumDefs() == 0) + return false; + + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + auto IsHazardFn = [TRI, MI] (MachineInstr *I) { + if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) && + !SIInstrInfo::isFLAT(*I)) + return false; + + for (const MachineOperand &Def : MI->defs()) { + MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI); + if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC)) + continue; + return true; + } + return false; + }; + + auto IsExpiredFn = [] (MachineInstr *MI, int) { + return MI && (SIInstrInfo::isVALU(*MI) || + (MI->getOpcode() == AMDGPU::S_WAITCNT && + !MI->getOperand(0).getImm())); + }; + + if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == + std::numeric_limits::max()) + return false; + + const SIInstrInfo *TII = ST.getInstrInfo(); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32)); + return true; +} + +bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { + if (!ST.hasSMEMtoVectorWriteHazard()) + return false; + + if (!SIInstrInfo::isVALU(*MI)) + return false; + + unsigned SDSTName; + switch (MI->getOpcode()) { + case AMDGPU::V_READLANE_B32: + case AMDGPU::V_READFIRSTLANE_B32: + SDSTName = AMDGPU::OpName::vdst; + break; + default: + SDSTName = AMDGPU::OpName::sdst; + break; + } + + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); + if (!SDST) { + for (auto MO : MI->implicit_operands()) { + if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) { + SDST = &MO; + break; + } + } + } + + if (!SDST) + return false; + + const unsigned SDSTReg = SDST->getReg(); + auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) { + return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI); + }; + + // This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent + // between any at risk SMEM and any SALU dependent on the SMEM results. + auto IsExpiredFn = [TII] (MachineInstr *MI, int) { + if (MI) { + if (TII->isSALU(*MI)) { + if (TII->isSOPP(*MI)) + return false; + switch (MI->getOpcode()) { + case AMDGPU::S_SETVSKIP: + case AMDGPU::S_VERSION: + case AMDGPU::S_WAITCNT_VSCNT: + case AMDGPU::S_WAITCNT_VMCNT: + case AMDGPU::S_WAITCNT_EXPCNT: + case AMDGPU::S_WAITCNT_LGKMCNT: + return false; + default: + return true; + } + } + } + return false; + }; + + if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == + std::numeric_limits::max()) + return false; + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL) + .addImm(0); + return true; +} + +bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { + if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI)) + return false; + + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + if (!MI->modifiesRegister(AMDGPU::EXEC, TRI)) + return false; + + auto IsHazardFn = [TRI] (MachineInstr *I) { + if (SIInstrInfo::isVALU(*I)) + return false; + return I->readsRegister(AMDGPU::EXEC, TRI); + }; + + const SIInstrInfo *TII = ST.getInstrInfo(); + auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) { + if (!MI) + return false; + if (SIInstrInfo::isVALU(*MI)) { + if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst)) + return true; + for (auto MO : MI->implicit_operands()) + if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) + return true; + } + if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && + (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe) + return true; + return false; + }; + + if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == + std::numeric_limits::max()) + return false; + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0xfffe); + return true; +} + +bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { + if (!ST.hasLdsBranchVmemWARHazard()) + return false; + + auto IsHazardInst = [] (const MachineInstr *MI) { + if (SIInstrInfo::isDS(*MI)) + return 1; + if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI)) + return 2; + return 0; + }; + + auto InstType = IsHazardInst(MI); + if (!InstType) + return false; + + auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) { + return I && (IsHazardInst(I) || + (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && + I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && + !I->getOperand(1).getImm())); + }; + + auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) { + if (!I->isBranch()) + return false; + + auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) { + auto InstType2 = IsHazardInst(I); + return InstType2 && InstType != InstType2; + }; + + auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) { + if (!I) + return false; + + auto InstType2 = IsHazardInst(I); + if (InstType == InstType2) + return true; + + return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && + I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && + !I->getOperand(1).getImm(); + }; + + return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) != + std::numeric_limits::max(); + }; + + if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == + std::numeric_limits::max()) + return false; + + const SIInstrInfo *TII = ST.getInstrInfo(); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_VSCNT)) + .addReg(AMDGPU::SGPR_NULL, RegState::Undef) + .addImm(0); + + return true; +} + +int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { + int NSAtoVMEMWaitStates = 1; + + if (!ST.hasNSAtoVMEMBug()) + return 0; + + if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI)) + return 0; + + const SIInstrInfo *TII = ST.getInstrInfo(); + const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); + if (!Offset || (Offset->getImm() & 6) == 0) + return 0; + + auto IsHazardFn = [TII] (MachineInstr *I) { + if (!SIInstrInfo::isMIMG(*I)) + return false; + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode()); + return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA && + TII->getInstSizeInBytes(*I) >= 16; + }; + + return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); +} Index: llvm/trunk/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir +++ llvm/trunk/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir @@ -0,0 +1,276 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: hazard_lds_branch_buf +# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: hazard_lds_branch_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_buf_branch_lds +# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: DS_READ_B32 +--- +name: hazard_buf_branch_lds +body: | + bb.0: + successors: %bb.1 + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_lds_branch_lds +# GCN: bb.1: +# GCN-NEXT: DS_READ_B32 +--- +name: no_hazard_lds_branch_lds +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_buf_branch_buf +# GCN: bb.1: +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: no_hazard_buf_branch_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_lds_branch_buf_fallthrough +# GCN: bb.1: +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: no_hazard_lds_branch_buf_fallthrough +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_lds_branch_buf_samebb +# GCN: DS_READ_B32 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: no_hazard_lds_branch_buf_samebb +body: | + bb.0: + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_lds_branch_buf_loop +# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: DS_READ_B32 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: hazard_lds_branch_buf_loop +body: | + bb.0: + successors: %bb.0 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.0 +... + +# GCN-LABEL: name: single_hazard_lds_branch_buf +# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: single_hazard_lds_branch_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_lds_branch_lds_buf +# GCN: bb.1: +# GCN-NEXT: DS_READ_B32 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: no_hazard_lds_branch_lds_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_lds_buf_branch_buf +# GCN: bb.1: +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: no_hazard_lds_buf_branch_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_lds_branch_vscnt_1_buf +# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: hazard_lds_branch_vscnt_1_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_WAITCNT_VSCNT undef $sgpr_null, 1 + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_lds_branch_vscnt_0_buf +# GCN: bb.1: +# GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: no_hazard_lds_branch_vscnt_0_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_WAITCNT_VSCNT undef $sgpr_null, 0 + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_lds_branch_vscnt_s0_buf +# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: hazard_lds_branch_vscnt_s0_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_WAITCNT_VSCNT undef $sgpr0, 0 + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_lds_vscnt_0_branch_buf +# GCN: bb.1: +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN +--- +name: no_hazard_lds_vscnt_0_branch_buf +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_WAITCNT_VSCNT undef $sgpr_null, 0 + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_lds_branch_global +# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: GLOBAL_LOAD_DWORD +--- +name: hazard_lds_branch_global +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_lds_branch_scratch +# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0 +# GCN-NEXT: SCRATCH_LOAD_DWORD +--- +name: hazard_lds_branch_scratch +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_lds_branch_flat +# GCN: bb.1: +# GCN-NEXT: FLAT_LOAD_DWORD +--- +name: no_hazard_lds_branch_flat +body: | + bb.0: + successors: %bb.1 + $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $vgpr1 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +... Index: llvm/trunk/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir +++ llvm/trunk/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir @@ -0,0 +1,61 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: hazard_image_sample_d_buf_off6 +# GCN: IMAGE_SAMPLE +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET +--- +name: hazard_image_sample_d_buf_off6 +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec +... + +# GCN-LABEL: name: no_hazard_image_sample_d_buf_off1 +# GCN: IMAGE_SAMPLE +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET +--- +name: no_hazard_image_sample_d_buf_off1 +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, 0, implicit $exec +... + +# GCN-LABEL: name: no_hazard_image_sample_d_buf_far +# GCN: IMAGE_SAMPLE +# GCN-NEXT: V_NOP_e32 +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET +--- +name: no_hazard_image_sample_d_buf_far +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + V_NOP_e32 implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec +... + +# Non-NSA +# GCN-LABEL: name: no_hazard_image_sample_v4_v2_buf_off6 +# GCN: IMAGE_SAMPLE +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET +--- +name: no_hazard_image_sample_v4_v2_buf_off6 +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec +... + +# Less than 4 dwords +# GCN-LABEL: name: no_hazard_image_sample_v4_v3_buf_off6 +# GCN: IMAGE_SAMPLE +# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET +--- +name: no_hazard_image_sample_v4_v3_buf_off6 +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec +... Index: llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir +++ llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir @@ -0,0 +1,193 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: hazard_smem_war +# GCN: S_LOAD_DWORD_IMM +# GCN: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_no_hazard +# GCN: S_LOAD_DWORD_IMM +# GCN-NEXT: S_ADD_U32 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_no_hazard +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + $sgpr3 = S_ADD_U32 $sgpr4, $sgpr5, implicit-def $scc + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_related_clause +# GCN: S_LOAD_DWORD_IMM +# GCN: S_WAITCNT +# GCN: S_ADD_U32 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_related_clause +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_WAITCNT 0 + $sgpr3 = S_ADD_U32 $sgpr2, $sgpr4, implicit-def $scc + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_branch +# GCN: S_LOAD_DWORD_IMM +# GCN: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_branch +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1 + successors: %bb.1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_BRANCH %bb.1 + + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_cbranch +# GCN: S_AND_B64 +# GCN: S_LOAD_DWORD_IMM +# GCN: S_CBRANCH_VCCZ +# GCN-NOT: $sgpr_null = S_MOV_B32 0 +# GCN: V_CMP_EQ_F32 +# GCN: S_ENDPGM 0 +# GCN: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_cbranch +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + successors: %bb.1, %bb.2 + $vcc = S_AND_B64 $sgpr4_sgpr5, $sgpr4_sgpr5, implicit-def $scc + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_CBRANCH_VCCZ %bb.2, implicit killed $vcc + + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 + + bb.2: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_cbranch_carry +# GCN: S_AND_B64 +# GCN: S_LOAD_DWORD_IMM +# GCN: S_CBRANCH_VCCZ +# GCN-NOT: $sgpr_null = S_MOV_B32 0 +# GCN: V_CMP_EQ_F32 +# GCN-NEXT: S_ENDPGM 0 +# GCN-NOT: $sgpr_null = S_MOV_B32 0 +# GCN: V_CMP_EQ_F32 +# GCN: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_cbranch_carry +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + successors: %bb.1, %bb.2 + $vcc = S_AND_B64 $sgpr4_sgpr5, $sgpr4_sgpr5, implicit-def $scc + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_CBRANCH_VCCZ %bb.2, implicit killed $vcc + + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 + + bb.2: + successors: %bb.3 + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + + bb.3: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_backedge +# GCN: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +# GCN: S_LOAD_DWORD_IMM +--- +name: hazard_smem_war_backedge +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + successors: %bb.1 + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + S_BRANCH %bb.0 +... + +# GCN-LABEL: name: hazard_smem_war_impdef +# GCN: S_LOAD_DWORD_IMM +# GCN: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_CMP_EQ_F32 +--- +name: hazard_smem_war_impdef +body: | + bb.0: + liveins: $vcc, $vgpr0 + $sgpr0 = S_LOAD_DWORD_IMM $vcc, 0, 0, 0 + V_CMP_EQ_F32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_readlane +# GCN: S_LOAD_DWORD_IMM +# GCN: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_READLANE_B32 +--- +name: hazard_smem_war_readlane +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr3, $vgpr0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + $sgpr0 = V_READLANE_B32 $vgpr0, $sgpr3 + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_smem_war_readfirstlane +# GCN: S_LOAD_DWORD_IMM +# GCN: $sgpr_null = S_MOV_B32 0 +# GCN-NEXT: V_READFIRSTLANE_B32 +--- +name: hazard_smem_war_readfirstlane +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + $sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec + S_ENDPGM 0 +... Index: llvm/trunk/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir +++ llvm/trunk/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir @@ -0,0 +1,164 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-skips,post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: hazard_vcmpx_smov_exec_lo +# GCN: $sgpr0 = S_MOV_B32 $exec_lo +# GCN-NEXT: S_WAITCNT_DEPCTR 65534 +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: hazard_vcmpx_smov_exec_lo +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $exec_lo + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_vcmpx_smov_exec +# GCN: $sgpr0_sgpr1 = S_MOV_B64 $exec +# GCN-NEXT: S_WAITCNT_DEPCTR 65534 +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: hazard_vcmpx_smov_exec +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $sgpr0_sgpr1 = S_MOV_B64 $exec + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_vcmpx_vmov_exec_lo +# GCN: $vgpr0 = V_MOV_B32_e32 $exec_lo, implicit $exec +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: no_hazard_vcmpx_vmov_exec_lo +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 $exec_lo, implicit $exec + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_vcmpx_valu_impuse_exec +# GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: no_hazard_vcmpx_valu_impuse_exec +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_imp +# GCN: $sgpr0 = S_MOV_B32 $exec_lo +# GCN-NEXT: $vgpr0 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_imp +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $exec_lo + $vgpr0 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_exp +# GCN: $sgpr0 = S_MOV_B32 $exec_lo +# GCN-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_exp +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $exec_lo + $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_depctr_fffe +# GCN: $sgpr0 = S_MOV_B32 $exec_lo +# GCN-NEXT: S_WAITCNT_DEPCTR 65534 +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: no_hazard_vcmpx_smov_exec_lo_depctr_fffe +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $exec_lo + S_WAITCNT_DEPCTR 65534 + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... + +# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_depctr_ffff +# GCN: $sgpr0 = S_MOV_B32 $exec_lo +# GCN-NEXT: S_WAITCNT_DEPCTR 65535 +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: no_hazard_vcmpx_smov_exec_lo_depctr_ffff +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $exec_lo + S_WAITCNT_DEPCTR 65535 + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... + +# GCN-LABEL: name: hazard_vcmpx_smov_exec_lo_depctr_effe +# GCN: $sgpr0 = S_MOV_B32 $exec_lo +# GCN: S_WAITCNT_DEPCTR 65534 +# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 +--- +name: hazard_vcmpx_smov_exec_lo_depctr_effe +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + $sgpr0 = S_MOV_B32 $exec_lo + S_WAITCNT_DEPCTR 61438 + SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_ENDPGM 0 +... Index: llvm/trunk/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir +++ llvm/trunk/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir @@ -0,0 +1,210 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: vmem_write_sgpr +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_write_sgpr +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $sgpr0 = S_MOV_B32 0 +... +# GCN-LABEL: name: vmem_smem_write_sgpr +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: V_NOP +# GCN-NEXT: S_LOAD_DWORD_IMM +--- +name: vmem_smem_write_sgpr +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 +... +# GCN-LABEL: name: vmem_snop_write_sgpr +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: S_NOP +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_snop_write_sgpr +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_NOP 0 + $sgpr0 = S_MOV_B32 0 +... +# GCN-LABEL: name: vmem_valu_write_sgpr +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: V_ADD_F32 +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_valu_write_sgpr +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec + $sgpr0 = S_MOV_B32 0 +... +# GCN-LABEL: name: vmem_swait0_write_sgpr +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: S_WAITCNT +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_swait0_write_sgpr +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_WAITCNT 0 + $sgpr0 = S_MOV_B32 0 +... +# GCN-LABEL: name: vmem_swait_any_write_sgpr +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: S_WAITCNT +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_swait_any_write_sgpr +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_WAITCNT 1 + $sgpr0 = S_MOV_B32 0 +... +# GCN-LABEL: name: vmem_write_exec_impread +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: S_MOV_B64 +--- +name: vmem_write_exec_impread +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $exec = S_MOV_B64 7 +... +# GCN-LABEL: name: vmem_write_exec_expread +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B64 +--- +name: vmem_write_exec_expread +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $exec_lo, 0, 0, 0, 0, 0, implicit $exec + $exec = S_MOV_B64 7 +... +# GCN-LABEL: name: ds_write_m0 +# GCN: DS_READ_B32 +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: ds_write_m0 +body: | + bb.0: + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $m0 = S_MOV_B32 7 +... +# GCN-LABEL: name: vmem_write_sgpr_fall_through +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_write_sgpr_fall_through +body: | + bb.0: + successors: %bb.1 + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + + bb.1: + $sgpr0 = S_MOV_B32 0 +... +# GCN-LABEL: name: vmem_write_sgpr_branch +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: S_BRANCH +# GCN: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_write_sgpr_branch +body: | + bb.0: + successors: %bb.1 + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + $sgpr0 = S_MOV_B32 0 +... +# GCN-LABEL: name: vmem_write_sgpr_branch_around +# GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN-NEXT: S_BRANCH +# GCN: bb.2: +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_write_sgpr_branch_around +body: | + bb.0: + successors: %bb.2 + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + S_WAITCNT 0 + + bb.2: + $sgpr0 = S_MOV_B32 0 +... +# GCN-LABEL: name: vmem_write_sgpr_branch_backedge +# GCN: $vgpr0 = IMPLICIT_DEF +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: vmem_write_sgpr_branch_backedge +body: | + bb.0: + successors: %bb.1 + $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + $sgpr4 = IMPLICIT_DEF + $vgpr0 = IMPLICIT_DEF + $sgpr0 = S_MOV_B32 0 + + bb.1: + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + S_BRANCH %bb.0 +...