Index: lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- lib/CodeGen/PeepholeOptimizer.cpp +++ lib/CodeGen/PeepholeOptimizer.cpp @@ -1236,14 +1236,13 @@ // If there exists an instruction which belongs to the following // categories, we will discard the load candidates. + if (MI->mayStore() || MI->isCall() || MI->hasUnmodeledSideEffects()) + FoldAsLoadDefCandidates.clear(); + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || - MI->hasUnmodeledSideEffects()) { - FoldAsLoadDefCandidates.clear(); + MI->hasUnmodeledSideEffects()) continue; - } - if (MI->mayStore() || MI->isCall()) - FoldAsLoadDefCandidates.clear(); if ((isUncoalescableCopy(*MI) && optimizeUncoalescableCopy(MI, LocalMIs)) || Index: test/CodeGen/X86/avx-cvt.ll =================================================================== --- test/CodeGen/X86/avx-cvt.ll +++ test/CodeGen/X86/avx-cvt.ll @@ -113,8 +113,7 @@ define void @fpext() nounwind uwtable { ; CHECK-LABEL: fpext: ; CHECK: # BB#0: -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vcvtss2sd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 ; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq %f = alloca float, align 4 Index: test/CodeGen/X86/shift-bmi2.ll =================================================================== --- test/CodeGen/X86/shift-bmi2.ll +++ test/CodeGen/X86/shift-bmi2.ll @@ -30,11 +30,10 @@ %x = load i32, i32* %p %shl = shl i32 %x, %shamt ; BMI2: shl32p -; Source order scheduling prevents folding, rdar:14208996. -; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: shl32p -; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -75,7 +74,7 @@ %x = load i64, i64* %p %shl = shl i64 %x, %shamt ; BMI264: shl64p -; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl } @@ -107,11 +106,10 @@ %x = load i32, i32* %p %shl = lshr i32 %x, %shamt ; BMI2: lshr32p -; Source order scheduling prevents folding, rdar:14208996. -; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: lshr32p -; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -130,7 +128,7 @@ %x = load i64, i64* %p %shl = lshr i64 %x, %shamt ; BMI264: lshr64p -; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl } @@ -153,10 +151,10 @@ %shl = ashr i32 %x, %shamt ; BMI2: ashr32p ; Source order scheduling prevents folding, rdar:14208996. -; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI2: ret ; BMI264: ashr32p -; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i32 %shl } @@ -175,7 +173,7 @@ %x = load i64, i64* %p %shl = ashr i64 %x, %shamt ; BMI264: ashr64p -; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}} +; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}} ; BMI264: ret ret i64 %shl }