Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9529,6 +9529,13 @@ const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + // fold (fadd x, undef) -> undef + if (N0.isUndef()) + return N0; + + if (N1.isUndef()) + return N1; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -9764,6 +9771,13 @@ const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + // fold (fmul x, undef) -> undef + if (N0.isUndef()) + return N0; + + if (N1.isUndef()) + return N1; + // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. Index: test/CodeGen/X86/2012-04-26-sdglue.ll =================================================================== --- test/CodeGen/X86/2012-04-26-sdglue.ll +++ test/CodeGen/X86/2012-04-26-sdglue.ll @@ -7,25 +7,14 @@ define void @func() nounwind ssp { ; CHECK-LABEL: func: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups 0, %xmm0 -; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 -; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] -; CHECK-NEXT: vbroadcastss 32, %xmm3 -; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; CHECK-NEXT: vmulps %ymm0, %ymm2, %ymm2 -; CHECK-NEXT: vmulps %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 -; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] -; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vhaddps %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vsubps %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vhaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vhaddps %ymm0, %ymm0, %ymm1 +; CHECK-NEXT: vsubps %ymm0, %ymm1, %ymm1 +; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vmovaps %ymm0, (%rax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %tmp = load <4 x float>, <4 x float>* null, align 1 %tmp14 = getelementptr <4 x float>, <4 x float>* null, i32 2 %tmp15 = load <4 x float>, <4 x float>* %tmp14, align 1 Index: test/CodeGen/X86/pr23103.ll =================================================================== --- test/CodeGen/X86/pr23103.ll +++ test/CodeGen/X86/pr23103.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx < %s | FileCheck %s ; When commuting a VADDSDrr instruction, verify that the 'IsUndef' flag is @@ -8,11 +9,14 @@ define <1 x double> @pr23103(<1 x double>* align 8 %Vp) { ; CHECK-LABEL: pr23103: -; CHECK: vmovsd (%rdi), %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{.*}}(%rsp) {{.*#+}} 8-byte Spill +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: callq foo -; CHECK-NEXT: vaddsd {{.*}}(%rsp), %xmm0, %xmm0 {{.*#+}} 8-byte Folded Reload -; CHECK: retq +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %V = load <1 x double>, <1 x double>* %Vp, align 8 %call = call zeroext i1 @foo(<1 x double> %V) Index: test/CodeGen/X86/sse3-avx-addsub-2.ll =================================================================== --- test/CodeGen/X86/sse3-avx-addsub-2.ll +++ test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -412,14 +412,12 @@ ; SSE-NEXT: movaps %xmm1, %xmm4 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: subss %xmm4, %xmm3 -; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; SSE-NEXT: addss %xmm0, %xmm4 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE-NEXT: addss %xmm0, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,2,1] +; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -429,13 +427,10 @@ ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] ; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] ; AVX-NEXT: vsubss %xmm4, %xmm3, %xmm3 -; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; AVX-NEXT: vaddss %xmm0, %xmm4, %xmm4 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm4[0],xmm2[2,3] -; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] +; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm3[0],xmm2[3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX-NEXT: retq %1 = extractelement <4 x float> %A, i32 0