diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1033,6 +1033,12 @@ addPass(createSIShrinkInstructionsPass()); addPass(createSIModeRegisterPass()); + if (getOptLevel() > CodeGenOpt::None) + addPass(&SIInsertHardClausesID); + + addPass(&SIRemoveShortExecBranchesID); + addPass(&SIInsertSkipsPassID); + addPass(&SIPreEmitPeepholeID); // The hazard recognizer that runs as part of the post-ra scheduler does not // guarantee to be able handle all hazards correctly. This is because if there // are multiple scheduling regions in a basic block, the regions are scheduled @@ -1045,12 +1051,6 @@ // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would // be better for it to emit S_NOP when possible. addPass(&PostRAHazardRecognizerID); - if (getOptLevel() > CodeGenOpt::None) - addPass(&SIInsertHardClausesID); - - addPass(&SIRemoveShortExecBranchesID); - addPass(&SIInsertSkipsPassID); - addPass(&SIPreEmitPeepholeID); addPass(&BranchRelaxationPassID); } diff --git a/llvm/test/CodeGen/AMDGPU/hazard-pass-ordering.mir b/llvm/test/CodeGen/AMDGPU/hazard-pass-ordering.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazard-pass-ordering.mir @@ -0,0 +1,24 @@ +# RUN: llc -march=amdgcn -mcpu=gfx908 -start-before=si-pre-emit-peephole %s -o - | FileCheck -check-prefix=GCN %s + +# Verify that the dedicated hazard recognizer pass is run after late peephole +# optimizations. New hazards can be introduced if instructions are removed by +# passes that are run before the final hazard recognizer. + +--- +# GCN-LABEL: {{^}}mai_hazard_pass_ordering_optimize_vcc_branch: +# GCN: v_accvgpr_read_b32 +# GCN-NEXT: s_nop +# GCN-NEXT: flat_load_dword +name: mai_hazard_pass_ordering_optimize_vcc_branch +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + $sgpr8_sgpr9 = S_MOV_B64 -1 + $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc + + bb.1: + S_ENDPGM 0 +...