Page MenuHomePhabricator

D62058.diff
No OneTemporary

File Metadata

Created
Sun, Aug 25, 8:18 AM

D62058.diff

Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -901,6 +901,7 @@
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
if (!SDST) {
for (const auto &MO : MI->implicit_operands()) {
@@ -919,22 +920,37 @@
return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
};
- // This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent
- // between any at risk SMEM and any SALU dependent on the SMEM results.
- auto IsExpiredFn = [TII] (MachineInstr *MI, int) {
+ auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
if (MI) {
if (TII->isSALU(*MI)) {
- if (TII->isSOPP(*MI))
- return false;
switch (MI->getOpcode()) {
case AMDGPU::S_SETVSKIP:
case AMDGPU::S_VERSION:
case AMDGPU::S_WAITCNT_VSCNT:
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_EXPCNT:
- case AMDGPU::S_WAITCNT_LGKMCNT:
+ // These instructions cannot not mitigate the hazard.
return false;
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ // Reducing lgkmcnt count to 0 always mitigates the hazard.
+ return (MI->getOperand(1).getImm() == 0) &&
+ (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+ case AMDGPU::S_WAITCNT: {
+ const int64_t Imm = MI->getOperand(0).getImm();
+ AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
+ return (Decoded.LgkmCnt == 0);
+ }
default:
+ // SOPP instructions cannot mitigate the hazard.
+ if (TII->isSOPP(*MI))
+ return false;
+ // At this point the SALU can be assumed to mitigate the hazard
+ // because either:
+ // (a) it is independent of the at risk SMEM (breaking chain),
+ // or
+ // (b) it is dependent on the SMEM, in which case an appropriate
+ // s_waitcnt lgkmcnt _must_ exist between it and the at risk
+ // SMEM instruction.
return true;
}
}
Index: llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir
+++ llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir
@@ -29,13 +29,13 @@
S_ENDPGM 0
...
-# GCN-LABEL: name: hazard_smem_war_related_clause
+# GCN-LABEL: name: hazard_smem_war_dependent_salu
# GCN: S_LOAD_DWORD_IMM
# GCN-NEXT: S_WAITCNT
# GCN-NEXT: S_ADD_U32
# GCN-NEXT: V_CMP_EQ_F32
---
-name: hazard_smem_war_related_clause
+name: hazard_smem_war_dependent_salu
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
@@ -46,19 +46,128 @@
S_ENDPGM 0
...
-# GCN-LABEL: name: hazard_smem_war_related_clause_vmcnt
+# GCN-LABEL: name: hazard_smem_war_independent_salu
# GCN: S_LOAD_DWORD_IMM
-# GCN-NEXT: S_WAITCNT 3952{{$}}
+# GCN-NEXT: S_WAITCNT
# GCN-NEXT: S_ADD_U32
# GCN-NEXT: V_CMP_EQ_F32
---
-name: hazard_smem_war_related_clause_vmcnt
+name: hazard_smem_war_independent_salu
body: |
bb.0:
- liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
+ liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_WAITCNT 0
+ $sgpr3 = S_ADD_U32 $sgpr5, $sgpr4, implicit-def $scc
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_smem
+# GCN: S_LOAD_DWORD_IMM
+# GCN-NEXT: S_LOAD_DWORD_IMM
+# GCN-NEXT: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_smem
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr6, $sgpr7, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr5 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_waitcnt_0
+# GCN: S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_waitcnt_0
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_WAITCNT 0
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_vmcnt_0
+# GCN: S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT 3952{{$}}
+# GCN-NEXT: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_vmcnt_0
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
S_WAITCNT 3952
- $sgpr3 = S_ADD_U32 $sgpr2, $sgpr4, implicit-def $scc
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_expcnt_0
+# GCN: S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT 53007{{$}}
+# GCN-NEXT: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_expcnt_0
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_WAITCNT 53007
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_lgkmcnt_0
+# GCN: S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT 49279{{$}}
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_lgkmcnt_0
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_WAITCNT 49279
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_waitcnt_lgkmcnt_0
+# GCN: S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT_LGKMCNT
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_waitcnt_lgkmcnt_0
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_WAITCNT_LGKMCNT $sgpr_null, 0
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_only_waitcnt_lgkmcnt_1
+# GCN: S_LOAD_DWORD_IMM
+# GCN-NEXT: S_WAITCNT_LGKMCNT
+# GCN-NEXT: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_only_waitcnt_lgkmcnt_1
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_WAITCNT_LGKMCNT $sgpr_null, 1
$sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
S_ENDPGM 0
...

Event Timeline