diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1167,7 +1167,11 @@
 
   if (OptExecMaskPreRA)
     insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
-  insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
+
+  // This is not an essential optimization and it has a noticeable impact on
+  // compilation time, so we only enable it from O2.
+  if (TM->getOptLevel() > CodeGenOpt::Less)
+    insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
 
   // This must be run immediately after phi elimination and before
   // TwoAddressInstructions, otherwise the processing of the tied operand of
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -351,7 +351,6 @@
 ; GCN-O1-NEXT:         Live Register Matrix
 ; GCN-O1-NEXT:         SI Pre-allocate WWM Registers
 ; GCN-O1-NEXT:         SI optimize exec mask operations pre-RA
-; GCN-O1-NEXT:         SI Form memory clauses
 ; GCN-O1-NEXT:         Machine Natural Loop Construction
 ; GCN-O1-NEXT:         Machine Block Frequency Analysis
 ; GCN-O1-NEXT:         Debug Variable Analysis
@@ -635,7 +634,6 @@
 ; GCN-O1-OPTS-NEXT:         Live Register Matrix
 ; GCN-O1-OPTS-NEXT:         SI Pre-allocate WWM Registers
 ; GCN-O1-OPTS-NEXT:         SI optimize exec mask operations pre-RA
-; GCN-O1-OPTS-NEXT:         SI Form memory clauses
 ; GCN-O1-OPTS-NEXT:         Machine Natural Loop Construction
 ; GCN-O1-OPTS-NEXT:         Machine Block Frequency Analysis
 ; GCN-O1-OPTS-NEXT:         Debug Variable Analysis