diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp @@ -81,6 +81,32 @@ } } +static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) { + SmallVector ToAdd, ToRemove; + + for (const SDep &Pred : SU.Preds) { + SUnit *PredSU = Pred.getSUnit(); + if (Pred.isBarrier() && isExport(*PredSU)) { + ToRemove.push_back(Pred); + if (isExport(SU)) + continue; + + // If we remove a barrier we need to copy dependencies + // from the predecessor to maintain order. + for (const SDep &ExportPred : PredSU->Preds) { + SUnit *ExportPredSU = ExportPred.getSUnit(); + if (ExportPred.isBarrier() && !isExport(*ExportPredSU)) + ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier)); + } + } + } + + for (SDep Pred : ToRemove) + SU.removePred(Pred); + for (SDep Pred : ToAdd) + DAG->addEdge(&SU, Pred); +} + void ExportClustering::apply(ScheduleDAGInstrs *DAG) { const SIInstrInfo *TII = static_cast(DAG->TII); @@ -97,15 +123,7 @@ if (isPositionExport(TII, &SU)) PosCount++; } - - SmallVector ToRemove; - for (const SDep &Pred : SU.Preds) { - SUnit *PredSU = Pred.getSUnit(); - if (Pred.isBarrier() && isExport(*PredSU)) - ToRemove.push_back(Pred); - } - for (SDep Pred : ToRemove) - SU.removePred(Pred); + removeExportDependencies(DAG, SU); } // Apply clustering if there are multiple exports