Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -11217,6 +11217,8 @@ (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (VMOVDDUPZ128rm addr:$src)>; +def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))), + (VMOVDDUPZ128rm addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), (v2f64 VR128X:$src0)), Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -4669,12 +4669,16 @@ let Predicates = [HasAVX, NoVLX] in { def : Pat<(X86Movddup (loadv2f64 addr:$src)), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; } let Predicates = [UseSSE3] in { // No need for aligned memory as this only loads 64-bits. def : Pat<(X86Movddup (loadv2f64 addr:$src)), (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))), + (MOVDDUPrm addr:$src)>; } //===---------------------------------------------------------------------===// @@ -8034,6 +8038,8 @@ (VMOVDDUPrr VR128:$src)>; def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (VMOVDDUPrm addr:$src)>; + def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))), + (VMOVDDUPrm addr:$src)>; } let Predicates = [HasAVX1Only] in { Index: test/CodeGen/X86/build-vector-128.ll =================================================================== --- test/CodeGen/X86/build-vector-128.ll +++ test/CodeGen/X86/build-vector-128.ll @@ -556,8 +556,7 @@ ; ; AVX2-32-LABEL: PR37502: ; AVX2-32: # %bb.0: -; AVX2-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX2-32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX2-32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; AVX2-32-NEXT: retl ; ; AVX2-64-LABEL: PR37502: Index: test/CodeGen/X86/movddup-load-fold.ll =================================================================== --- test/CodeGen/X86/movddup-load-fold.ll +++ test/CodeGen/X86/movddup-load-fold.ll @@ -9,14 +9,12 @@ define <4 x float> @movddup_load_fold(float %x, float %y) { ; SSE-LABEL: movddup_load_fold: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] +; SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] ; SSE-NEXT: retl ; ; AVX-LABEL: movddup_load_fold: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; AVX-NEXT: retl %i0 = insertelement <4 x float> zeroinitializer, float %x, i32 0 %i1 = insertelement <4 x float> %i0, float %y, i32 1 Index: test/CodeGen/X86/vector-shuffle-combining-xop.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -332,8 +332,7 @@ ; X86AVX2-LABEL: buildvector_v4f32_0404: ; X86AVX2: # %bb.0: ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; X86AVX2-NEXT: vmovapd %xmm0, (%eax) ; X86AVX2-NEXT: retl ;