diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -704,6 +704,12 @@ "Has VOPD dual issue wave32 instructions" >; +def FeatureVALUMaskWriteHazard : SubtargetFeature<"valu-mask-write-hazard", + "HasVALUMaskWriteHazard", + "true", + "VALU mask read followed by SALU write to same SGPR can cause incorrect SALU execution" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -922,7 +928,8 @@ FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, + FeatureVALUMaskWriteHazard ] >; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -106,6 +106,7 @@ bool fixVALUTransUseHazard(MachineInstr *MI); bool fixWMMAHazards(MachineInstr *MI); bool fixShift64HighRegBug(MachineInstr *MI); + bool fixVALUMaskWriteHazard(MachineInstr *MI); int checkMAIHazards(MachineInstr *MI); int checkMAIHazards908(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1102,6 +1102,7 @@ fixVALUTransUseHazard(MI); fixWMMAHazards(MI); fixShift64HighRegBug(MI); + fixVALUMaskWriteHazard(MI); } bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { @@ -2706,3 +2707,105 @@ return false; } + +bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) { + if (!ST.isWave64()) + return false; + if (!ST.hasVALUMaskWriteHazard()) + return false; + if (!SIInstrInfo::isSALU(*MI)) + return false; + + const MachineOperand *SDSTOp = TII.getNamedOperand(*MI, AMDGPU::OpName::sdst); + if (!SDSTOp || !SDSTOp->isReg()) + return false; + + const Register HazardReg = SDSTOp->getReg(); + if (HazardReg == AMDGPU::EXEC || + HazardReg == AMDGPU::EXEC_LO || + HazardReg == AMDGPU::EXEC_HI || + HazardReg == AMDGPU::M0) + return false; + + auto IsHazardFn = [HazardReg, this](const MachineInstr &I) { + switch (I.getOpcode()) { + case AMDGPU::V_ADDC_U32_e32: + case AMDGPU::V_CNDMASK_B16_e32: + case AMDGPU::V_CNDMASK_B32_e32: + case AMDGPU::V_DIV_FMAS_F32_e64: + case AMDGPU::V_DIV_FMAS_F64_e64: + case AMDGPU::V_SUBB_U32_e32: + case AMDGPU::V_SUBBREV_U32_e32: + // These implicitly read VCC as mask source. + return HazardReg == AMDGPU::VCC || + HazardReg == AMDGPU::VCC_LO || + HazardReg == AMDGPU::VCC_HI; + case AMDGPU::V_ADDC_U32_e64: + case AMDGPU::V_CNDMASK_B16_e64: + case AMDGPU::V_CNDMASK_B32_e64: + case AMDGPU::V_SUBB_U32_e64: + case AMDGPU::V_SUBBREV_U32_e64: { + // Only check mask register overlaps. + const MachineOperand *SSRCOp = TII.getNamedOperand(I, AMDGPU::OpName::src2); + assert(SSRCOp); + return TRI.regsOverlap(SSRCOp->getReg(), HazardReg); + } + default: + return false; + } + }; + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + auto IsExpiredFn = [&MRI, this](const MachineInstr &I, int) { + // VALU access to any SGPR or literal constant other than HazardReg + // mitigates hazard. No need to check HazardReg here as this will + // only be called when !IsHazardFn. + if (!SIInstrInfo::isVALU(I)) + return false; + for (int OpNo = 0, End = I.getNumOperands(); OpNo < End; ++OpNo) { + const MachineOperand &Op = I.getOperand(OpNo); + if (Op.isReg()) { + Register OpReg = Op.getReg(); + // Only consider uses + if (!Op.isUse()) + continue; + // Ignore EXEC + if (OpReg == AMDGPU::EXEC || + OpReg == AMDGPU::EXEC_LO || + OpReg == AMDGPU::EXEC_HI) + continue; + // Ignore all implicit uses except VCC + if (Op.isImplicit()) { + if (OpReg == AMDGPU::VCC || + OpReg == AMDGPU::VCC_LO || + OpReg == AMDGPU::VCC_HI) + return true; + continue; + } + if (TRI.isSGPRReg(MRI, OpReg)) + return true; + } else { + const MCInstrDesc &InstDesc = I.getDesc(); + const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo]; + if (TII.isLiteralConstant(Op, OpInfo)) + return true; + } + } + return false; + }; + + // Check for hazard + if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == + std::numeric_limits::max()) + return false; + + // Add VALU which reads an SGPR before SGPR write to mitigate hazard. + // FIXME: we could select VGPR/SGPR used to avoid unintended dependency delays + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII.get(AMDGPU::V_XOR3_B32_e64), AMDGPU::VGPR0) + .addReg(AMDGPU::VGPR0) + .addReg(AMDGPU::SGPR0) + .addReg(AMDGPU::SGPR0); + + return true; +} diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -193,6 +193,8 @@ bool HasImageGather4D16Bug = false; bool HasVOPDInsts = false; + bool HasVALUMaskWriteHazard = false; + // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1050,6 +1052,8 @@ bool hasVALUTransUseHazard() const { return getGeneration() >= GFX11; } + bool hasVALUMaskWriteHazard() const { return HasVALUMaskWriteHazard; } + /// Return if operations acting on VGPR tuples require even alignment. bool needsAlignedVGPRs() const { return GFX90AInsts; } diff --git a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir @@ -0,0 +1,381 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: mask_hazard_getpc1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_getpc1 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $sgpr0_sgpr1 = S_GETPC_B64 + $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_getpc2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_getpc2 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 { + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc + ; GCN-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 12, implicit-def $scc, implicit $scc + ; GCN-NEXT: } + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + BUNDLE implicit-def $sgpr0_sgpr1 { + $sgpr0_sgpr1 = S_GETPC_B64 + $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc + $sgpr1 = S_ADDC_U32 $sgpr1, 12, implicit-def $scc, implicit $scc + } + S_ENDPGM 0 +... + +--- +name: mask_hazard_vcc1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_vcc1 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_vcc2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_vcc2 + ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vcc = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_addc1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_addc1 + ; GCN: $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec + $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_addc2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_addc2 + ; GCN: $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec + $vcc = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_subb1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subb1 + ; GCN: $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec + $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_subb2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subb2 + ; GCN: $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec + $vcc = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_subbrev1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subbrev1 + ; GCN: $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec + $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_subbrev2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subbrev2 + ; GCN: $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec + $vcc = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_div_fmas_f32 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_div_fmas_f32 + ; GCN: $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec + $vcc = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +--- +name: mask_hazard_div_fmas_f64 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_div_fmas_f64 + ; GCN: $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec + $vcc = S_CSELECT_B64 -1, 0, implicit $scc + S_ENDPGM 0 +... + +# Check low word overlap +--- +name: mask_hazard_subreg1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subreg1 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr2 = S_MOV_B32 0 + S_ENDPGM 0 +... + +# Check high word overlap +--- +name: mask_hazard_subreg2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subreg2 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr3 = S_MOV_B32 0 + S_ENDPGM 0 +... + +# Check multiple subreg overlap +--- +name: mask_hazard_subreg3 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subreg3 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr2 = S_MOV_B32 0 + $sgpr3 = S_MOV_B32 0 + S_ENDPGM 0 +... + +# Check vcc_lo overlap +--- +name: mask_hazard_subreg4 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subreg4 + ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $vcc_lo = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_lo + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vcc_lo = S_MOV_B32 0 + $sgpr2 = S_MOV_B32 $vcc_lo + S_ENDPGM 0 +... + +# Check vcc_hi overlap +--- +name: mask_hazard_subreg5 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_subreg5 + ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $vcc_hi = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_hi + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vcc_hi = S_MOV_B32 0 + $sgpr2 = S_MOV_B32 $vcc_hi + S_ENDPGM 0 +... + +# S_WAITCNT does not mitigate hazard +--- +name: mask_hazard_waitcnt +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_waitcnt + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: S_WAITCNT 0 + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + S_WAITCNT 0 + $sgpr0_sgpr1 = S_GETPC_B64 + $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + S_ENDPGM 0 +... + +# Check implicit $exec +--- +name: mask_hazard_gap1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_gap1 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + $vgpr3 = V_MOV_B32_e32 0, implicit $exec + $sgpr0_sgpr1 = S_GETPC_B64 + $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + S_ENDPGM 0 +... + +# Check implicit $mode +--- +name: mask_hazard_gap2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_gap2 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode + $sgpr0_sgpr1 = S_GETPC_B64 + $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + S_ENDPGM 0 +... + +# Check explicit $exec +--- +name: mask_hazard_gap3 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_gap3 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2 + ; GCN-NEXT: $vgpr0 = V_XOR3_B32_e64 $vgpr0, $sgpr0, $sgpr0, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 + ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec + $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2 + $sgpr0_sgpr1 = S_GETPC_B64 + $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + S_ENDPGM 0 +... + +# Different SGPR write +--- +name: mask_hazard_no_hazard1 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_no_hazard1 + ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec + $sgpr0 = S_MOV_B32 0 + S_ENDPGM 0 +... + +# Different SGPR write with mask read overlap +--- +name: mask_hazard_no_hazard2 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_no_hazard2 + ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $vcc + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $sgpr0_sgpr1 = S_MOV_B64 $vcc + S_ENDPGM 0 +... + +# Overlapping VGPR write +--- +name: mask_hazard_no_hazard3 +body: | + bb.0: + ; GCN-LABEL: name: mask_hazard_no_hazard3 + ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec + $vgpr2 = V_MOV_B32_e32 0, implicit $exec + S_ENDPGM 0 +...