Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1037,6 +1037,11 @@ bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override; + bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, + unsigned AddrSpace) const override { + return true; + } + /// Intel processors have a unified instruction and data cache const char * getClearCacheBuiltinName() const override { return nullptr; // nothing to do, move along. Index: test/CodeGen/X86/avx512-regcall-Mask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-Mask.ll +++ test/CodeGen/X86/avx512-regcall-Mask.ll @@ -96,34 +96,21 @@ } ; X32-LABEL: caller_argv64i1: -; X32: movl $2, %eax -; X32: movl $1, %ecx -; X32: movl $2, %edx -; X32: movl $1, %edi -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: pushl ${{1|2}} -; X32: call{{.*}} _test_argv64i1 - +; X32: pushl %edi +; X32: subl $88, %esp +; X32: vmovaps __xmm@00000001000000020000000100000002, %xmm0 # xmm0 = [2,1,2,1] +; X32: vmovups %xmm0, 64(%esp) +; X32: vmovaps LCPI1_1, %zmm0 # zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1] +; X32: vmovups %zmm0, (%esp) +; X32: movl $1, 84(%esp) +; X32: movl $2, 80(%esp) +; X32: movl $2, %eax +; X32: movl $1, %ecx +; X32: movl $2, %edx +; X32: movl $1, %edi +; X32: vzeroupper +; X32: calll _test_argv64i1 + ; WIN64-LABEL: caller_argv64i1: ; WIN64: movabsq $4294967298, %rax ; WIN64: movq %rax, (%rsp) Index: test/CodeGen/X86/fold-vector-sext-crash2.ll =================================================================== --- test/CodeGen/X86/fold-vector-sext-crash2.ll +++ test/CodeGen/X86/fold-vector-sext-crash2.ll @@ -28,13 +28,13 @@ ; ; X64-LABEL: test_sext1: ; X64: # BB#0: +; X64-NEXT: pcmpeqd %xmm0, %xmm0 +; X64-NEXT: movdqa %xmm0, 48(%rdi) +; X64-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551517,18446744073709551615] +; X64-NEXT: movaps %xmm0, 32(%rdi) ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 56(%rdi) -; X64-NEXT: movq $-1, 48(%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-99, 32(%rdi) ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Se = sext <2 x i8> to <2 x i256> @@ -66,13 +66,13 @@ ; ; X64-LABEL: test_sext2: ; X64: # BB#0: +; X64-NEXT: pcmpeqd %xmm0, %xmm0 +; X64-NEXT: movdqa %xmm0, 48(%rdi) +; X64-NEXT: movaps {{.*#+}} xmm0 = [18446744073709549617,18446744073709551615] +; X64-NEXT: movaps %xmm0, 32(%rdi) ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 56(%rdi) -; X64-NEXT: movq $-1, 48(%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-1999, 32(%rdi) # imm = 0xF831 ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Se = sext <2 x i128> to <2 x i256> @@ -106,10 +106,11 @@ ; X64: # BB#0: ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 48(%rdi) +; X64-NEXT: movl $254, %eax +; X64-NEXT: movq %rax, %xmm1 +; X64-NEXT: movdqa %xmm1, 32(%rdi) ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $0, 40(%rdi) -; X64-NEXT: movq $254, 32(%rdi) ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Se = zext <2 x i8> to <2 x i256> @@ -143,10 +144,10 @@ ; X64: # BB#0: ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, 48(%rdi) +; X64-NEXT: movaps {{.*#+}} xmm1 = [18446744073709551614,18446744073709551615] +; X64-NEXT: movaps %xmm1, 32(%rdi) ; X64-NEXT: movaps %xmm0, 16(%rdi) ; X64-NEXT: movaps %xmm0, (%rdi) -; X64-NEXT: movq $-1, 40(%rdi) -; X64-NEXT: movq $-2, 32(%rdi) ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: retq %Se = zext <2 x i128> to <2 x i256> Index: test/CodeGen/X86/merge-store-constants.ll =================================================================== --- test/CodeGen/X86/merge-store-constants.ll +++ test/CodeGen/X86/merge-store-constants.ll @@ -6,18 +6,14 @@ ; X32-LABEL: big_nonzero_16_bytes: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $1, (%eax) -; X32-NEXT: movl $2, 4(%eax) -; X32-NEXT: movl $3, 8(%eax) -; X32-NEXT: movl $4, 12(%eax) +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,3,4] +; X32-NEXT: vmovups %xmm0, (%eax) ; X32-NEXT: retl ; ; X64-LABEL: big_nonzero_16_bytes: ; X64: # BB#0: -; X64-NEXT: movabsq $8589934593, %rax # imm = 0x200000001 -; X64-NEXT: movq %rax, (%rdi) -; X64-NEXT: movabsq $17179869187, %rax # imm = 0x400000003 -; X64-NEXT: movq %rax, 8(%rdi) +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,3,4] +; X64-NEXT: vmovups %xmm0, (%rdi) ; X64-NEXT: retq %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2 @@ -36,23 +32,16 @@ ; X32-LABEL: big_nonzero_32_bytes_splat: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $42, (%eax) -; X32-NEXT: movl $42, 4(%eax) -; X32-NEXT: movl $42, 8(%eax) -; X32-NEXT: movl $42, 12(%eax) -; X32-NEXT: movl $42, 16(%eax) -; X32-NEXT: movl $42, 20(%eax) -; X32-NEXT: movl $42, 24(%eax) -; X32-NEXT: movl $42, 28(%eax) +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42] +; X32-NEXT: vmovups %ymm0, (%eax) +; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: big_nonzero_32_bytes_splat: ; X64: # BB#0: -; X64-NEXT: movabsq $180388626474, %rax # imm = 0x2A0000002A -; X64-NEXT: movq %rax, (%rdi) -; X64-NEXT: movq %rax, 8(%rdi) -; X64-NEXT: movq %rax, 16(%rdi) -; X64-NEXT: movq %rax, 24(%rdi) +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42] +; X64-NEXT: vmovups %ymm0, (%rdi) +; X64-NEXT: vzeroupper ; X64-NEXT: retq %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 2 @@ -79,37 +68,29 @@ ; X32-LABEL: big_nonzero_63_bytes: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $0, 4(%eax) -; X32-NEXT: movl $1, (%eax) -; X32-NEXT: movl $0, 12(%eax) -; X32-NEXT: movl $2, 8(%eax) -; X32-NEXT: movl $0, 20(%eax) -; X32-NEXT: movl $3, 16(%eax) -; X32-NEXT: movl $0, 28(%eax) -; X32-NEXT: movl $4, 24(%eax) -; X32-NEXT: movl $0, 36(%eax) -; X32-NEXT: movl $5, 32(%eax) -; X32-NEXT: movl $0, 44(%eax) -; X32-NEXT: movl $6, 40(%eax) +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [1,0,2,0,3,0,4,0] +; X32-NEXT: vmovups %ymm0, (%eax) +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [5,0,6,0] +; X32-NEXT: vmovups %xmm0, 32(%eax) ; X32-NEXT: movl $0, 52(%eax) ; X32-NEXT: movl $7, 48(%eax) ; X32-NEXT: movl $8, 56(%eax) ; X32-NEXT: movw $9, 60(%eax) ; X32-NEXT: movb $10, 62(%eax) +; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: big_nonzero_63_bytes: ; X64: # BB#0: -; X64-NEXT: movq $1, (%rdi) -; X64-NEXT: movq $2, 8(%rdi) -; X64-NEXT: movq $3, 16(%rdi) -; X64-NEXT: movq $4, 24(%rdi) -; X64-NEXT: movq $5, 32(%rdi) -; X64-NEXT: movq $6, 40(%rdi) +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,3,4] +; X64-NEXT: vmovups %ymm0, (%rdi) +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [5,6] +; X64-NEXT: vmovups %xmm0, 32(%rdi) ; X64-NEXT: movq $7, 48(%rdi) ; X64-NEXT: movl $8, 56(%rdi) ; X64-NEXT: movw $9, 60(%rdi) ; X64-NEXT: movb $10, 62(%rdi) +; X64-NEXT: vzeroupper ; X64-NEXT: retq %a8 = bitcast i8* %a to i64* %arrayidx8 = getelementptr inbounds i64, i64* %a8, i64 1 Index: test/CodeGen/X86/mod128.ll =================================================================== --- test/CodeGen/X86/mod128.ll +++ test/CodeGen/X86/mod128.ll @@ -9,14 +9,13 @@ ; X86-64: callq __modti3 ; X86-64-NOT: movd %xmm0, %rax - ; WIN64-NOT: movl $3, %r8d - ; WIN64-NOT: xorl %r9d, %r9d - ; WIN64-DAG: movq %rdx, 56(%rsp) - ; WIN64-DAG: movq %rcx, 48(%rsp) - ; WIN64-DAG: leaq 48(%rsp), %rcx - ; WIN64-DAG: leaq 32(%rsp), %rdx - ; WIN64-DAG: movq $0, 40(%rsp) - ; WIN64-DAG: movq $3, 32(%rsp) + ; WIN64-DAG: movq %rdx, 40(%rsp) + ; WIN64-DAG: movq %rcx, 32(%rsp) + ; WIN64-DAG: movl $3, %eax + ; WIN64-DAG: movq %rax, %xmm0 + ; WIN64-DAG: movdqa %xmm0, 48(%rsp) + ; WIN64-DAG: leaq 48(%rsp), %rdx + ; WIN64-DAG: leaq 32(%rsp), %rcx ; WIN64: callq __modti3 ; WIN64: movq %xmm0, %rax