diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1167,7 +1167,11 @@ if (OptExecMaskPreRA) insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID); - insertPass(&MachineSchedulerID, &SIFormMemoryClausesID); + + // This is not an essential optimization and it has a noticeable impact on + // compilation time, so we only enable it from O2. + if (TM->getOptLevel() > CodeGenOpt::Less) + insertPass(&MachineSchedulerID, &SIFormMemoryClausesID); // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -351,7 +351,6 @@ ; GCN-O1-NEXT: Live Register Matrix ; GCN-O1-NEXT: SI Pre-allocate WWM Registers ; GCN-O1-NEXT: SI optimize exec mask operations pre-RA -; GCN-O1-NEXT: SI Form memory clauses ; GCN-O1-NEXT: Machine Natural Loop Construction ; GCN-O1-NEXT: Machine Block Frequency Analysis ; GCN-O1-NEXT: Debug Variable Analysis @@ -635,7 +634,6 @@ ; GCN-O1-OPTS-NEXT: Live Register Matrix ; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers ; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA -; GCN-O1-OPTS-NEXT: SI Form memory clauses ; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Debug Variable Analysis