diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2056,6 +2056,19 @@ Depth + 1)) return true; + // If all bits of the extracted element are known, simplify it to a + // constant. + if (Known2.isConstant()) { + APInt KnownVal = Known2.getConstant().zextOrTrunc(BitWidth); + if (VT.isInteger()) + return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownVal, dl, VT)); + if (VT.isFloatingPoint()) + return TLO.CombineTo( + Op, + TLO.DAG.getConstantFP( + APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), KnownVal), dl, VT)); + } + // Attempt to avoid multi-use ops if we don't need anything from them. if (!DemandedSrcBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) { diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll --- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll +++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll @@ -24,17 +24,8 @@ define void @test2(float * %p1, i32 %v1) { ; CHECK-LABEL: test2: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: sub sp, sp, #16 ; =16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: movi.16b v0, #63 -; CHECK-NEXT: and x8, x1, #0x3 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x9, x8, #2, #2 -; CHECK-NEXT: ldr s0, [x9] -; CHECK-NEXT: str s0, [x0] -; CHECK-NEXT: add sp, sp, #16 ; =16 +; CHECK-NEXT: mov w8, #1061109567 +; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret entry: %v2 = extractelement <3 x float> , i32 %v1 diff --git a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll --- a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll +++ b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll @@ -30,10 +30,10 @@ define [1 x <4 x float>] @test2() { ; CHECK-LABEL: .p2align 4 ; -- Begin function test2 ; CHECK-NEXT: lCPI1_0: -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x3f800000 ; float 1 +; CHECK-NEXT: .long 0x80000000 ; float -0 +; CHECK-NEXT: .long 0x80000000 ; float -0 +; CHECK-NEXT: .long 0x80000000 ; float -0 +; CHECK-NEXT: .long 0xbf800000 ; float -1 ; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions ; CHECK-NEXT: .globl _test2 ; CHECK-NEXT: .p2align 2 @@ -43,17 +43,7 @@ ; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x8, lCPI1_0@PAGE ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] -; CHECK-NEXT: mov s2, v1[1] -; CHECK-NEXT: fneg s0, s1 -; CHECK-NEXT: mov s3, v1[2] -; CHECK-NEXT: fneg s2, s2 -; CHECK-NEXT: mov s1, v1[3] -; CHECK-NEXT: fneg s3, s3 -; CHECK-NEXT: mov.s v0[1], v2[0] -; CHECK-NEXT: mov.s v0[2], v3[0] -; CHECK-NEXT: fneg s1, s1 -; CHECK-NEXT: mov.s v0[3], v1[0] +; CHECK-NEXT: ldr q0, [x8, lCPI1_0@PAGEOFF] ; CHECK-NEXT: ret ; ret [1 x <4 x float>] [<4 x float> diff --git a/llvm/test/CodeGen/ARM/func-argpassing-endian.ll b/llvm/test/CodeGen/ARM/func-argpassing-endian.ll --- a/llvm/test/CodeGen/ARM/func-argpassing-endian.ll +++ b/llvm/test/CodeGen/ARM/func-argpassing-endian.ll @@ -102,31 +102,33 @@ define <4 x i32> @return_v4i32() { ; CHECK-LE-LABEL: return_v4i32: ; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: adr r0, .LCPI6_0 -; CHECK-LE-NEXT: vld1.64 {d16, d17}, [r0:128] +; CHECK-LE-NEXT: vldr d16, .LCPI6_0 +; CHECK-LE-NEXT: vldr d17, .LCPI6_1 ; CHECK-LE-NEXT: vmov r0, r1, d16 ; CHECK-LE-NEXT: vmov r2, r3, d17 ; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .p2align 4 +; CHECK-LE-NEXT: .p2align 3 ; CHECK-LE-NEXT: @ %bb.1: ; CHECK-LE-NEXT: .LCPI6_0: ; CHECK-LE-NEXT: .long 42 @ double 9.1245819032257467E-313 ; CHECK-LE-NEXT: .long 43 +; CHECK-LE-NEXT: .LCPI6_1: ; CHECK-LE-NEXT: .long 44 @ double 9.5489810615176143E-313 ; CHECK-LE-NEXT: .long 45 ; ; CHECK-BE-LABEL: return_v4i32: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: adr r0, .LCPI6_0 -; CHECK-BE-NEXT: vld1.64 {d16, d17}, [r0:128] +; CHECK-BE-NEXT: vldr d16, .LCPI6_0 +; CHECK-BE-NEXT: vldr d17, .LCPI6_1 ; CHECK-BE-NEXT: vmov r1, r0, d16 ; CHECK-BE-NEXT: vmov r3, r2, d17 ; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: .p2align 3 ; CHECK-BE-NEXT: @ %bb.1: ; CHECK-BE-NEXT: .LCPI6_0: ; CHECK-BE-NEXT: .long 42 @ double 8.912382324178626E-313 ; CHECK-BE-NEXT: .long 43 +; CHECK-BE-NEXT: .LCPI6_1: ; CHECK-BE-NEXT: .long 44 @ double 9.3367814824704935E-313 ; CHECK-BE-NEXT: .long 45 ret < 4 x i32> < i32 42, i32 43, i32 44, i32 45 > diff --git a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll --- a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll +++ b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll @@ -49,14 +49,12 @@ define void @zero_test() { ; X32-LABEL: zero_test: ; X32: # %bb.0: # %entry -; X32-NEXT: xorps %xmm0, %xmm0 -; X32-NEXT: movlps %xmm0, (%eax) +; X32-NEXT: movl $0, (%eax) ; X32-NEXT: retl ; ; X64-LABEL: zero_test: ; X64: # %bb.0: # %entry -; X64-NEXT: xorps %xmm0, %xmm0 -; X64-NEXT: movlps %xmm0, (%rax) +; X64-NEXT: movq $0, (%rax) ; X64-NEXT: retq entry: %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer diff --git a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll --- a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll +++ b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll @@ -29,8 +29,8 @@ ; X86-LABEL: store_64: ; X86: # %bb.0: # %BB ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorps %xmm0, %xmm0 -; X86-NEXT: movlps %xmm0, (%eax) +; X86-NEXT: movl $0, 4(%eax) +; X86-NEXT: movl $0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: store_64: diff --git a/llvm/test/CodeGen/X86/fold-load-vec.ll b/llvm/test/CodeGen/X86/fold-load-vec.ll --- a/llvm/test/CodeGen/X86/fold-load-vec.ll +++ b/llvm/test/CodeGen/X86/fold-load-vec.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: subq $24, %rsp ; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movlps %xmm0, (%rsp) ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movlps %xmm0, (%rsp) ; CHECK-NEXT: movlps %xmm0, (%rsi) diff --git a/llvm/test/CodeGen/X86/nontemporal-3.ll b/llvm/test/CodeGen/X86/nontemporal-3.ll --- a/llvm/test/CodeGen/X86/nontemporal-3.ll +++ b/llvm/test/CodeGen/X86/nontemporal-3.ll @@ -195,33 +195,14 @@ } define void @test_zero_v8f32_align1(<8 x float>* %dst) nounwind { -; SSE2-LABEL: test_zero_v8f32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v8f32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: movntiq %rax, 24(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v8f32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v8f32_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v8f32_align1: ; AVX: # %bb.0: @@ -245,32 +226,14 @@ } define void @test_zero_v4i64_align1(<4 x i64>* %dst) nounwind { -; SSE2-LABEL: test_zero_v4i64_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v4i64_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v4i64_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v4i64_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v4i64_align1: ; AVX: # %bb.0: @@ -294,32 +257,14 @@ } define void @test_zero_v8i32_align1(<8 x i32>* %dst) nounwind { -; SSE2-LABEL: test_zero_v8i32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v8i32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v8i32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v8i32_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v8i32_align1: ; AVX: # %bb.0: @@ -343,32 +288,14 @@ } define void @test_zero_v16i16_align1(<16 x i16>* %dst) nounwind { -; SSE2-LABEL: test_zero_v16i16_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v16i16_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v16i16_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v16i16_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v16i16_align1: ; AVX: # %bb.0: @@ -392,32 +319,14 @@ } define void @test_zero_v32i8_align1(<32 x i8>* %dst) nounwind { -; SSE2-LABEL: test_zero_v32i8_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v32i8_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v32i8_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v32i8_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v32i8_align1: ; AVX: # %bb.0: @@ -636,45 +545,18 @@ } define void @test_zero_v16f32_align1(<16 x float>* %dst) nounwind { -; SSE2-LABEL: test_zero_v16f32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v16f32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 24(%rdi) -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: movntiq %rax, 56(%rdi) -; SSE4A-NEXT: movntiq %rax, 40(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v16f32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v16f32_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 56(%rdi) +; SSE-NEXT: movntiq %rax, 48(%rdi) +; SSE-NEXT: movntiq %rax, 40(%rdi) +; SSE-NEXT: movntiq %rax, 32(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v16f32_align1: ; AVX: # %bb.0: @@ -706,44 +588,18 @@ } define void @test_zero_v8i64_align1(<8 x i64>* %dst) nounwind { -; SSE2-LABEL: test_zero_v8i64_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v8i64_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v8i64_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v8i64_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 56(%rdi) +; SSE-NEXT: movntiq %rax, 48(%rdi) +; SSE-NEXT: movntiq %rax, 40(%rdi) +; SSE-NEXT: movntiq %rax, 32(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v8i64_align1: ; AVX: # %bb.0: @@ -775,44 +631,18 @@ } define void @test_zero_v16i32_align1(<16 x i32>* %dst) nounwind { -; SSE2-LABEL: test_zero_v16i32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v16i32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v16i32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v16i32_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 56(%rdi) +; SSE-NEXT: movntiq %rax, 48(%rdi) +; SSE-NEXT: movntiq %rax, 40(%rdi) +; SSE-NEXT: movntiq %rax, 32(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v16i32_align1: ; AVX: # %bb.0: @@ -844,44 +674,18 @@ } define void @test_zero_v32i16_align1(<32 x i16>* %dst) nounwind { -; SSE2-LABEL: test_zero_v32i16_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v32i16_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v32i16_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v32i16_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 56(%rdi) +; SSE-NEXT: movntiq %rax, 48(%rdi) +; SSE-NEXT: movntiq %rax, 40(%rdi) +; SSE-NEXT: movntiq %rax, 32(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v32i16_align1: ; AVX: # %bb.0: @@ -913,44 +717,18 @@ } define void @test_zero_v64i8_align1(<64 x i8>* %dst) nounwind { -; SSE2-LABEL: test_zero_v64i8_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 24(%rdi) -; SSE2-NEXT: movntiq %rax, 16(%rdi) -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: movntiq %rax, 56(%rdi) -; SSE2-NEXT: movntiq %rax, 48(%rdi) -; SSE2-NEXT: movntiq %rax, 40(%rdi) -; SSE2-NEXT: movntiq %rax, 32(%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v64i8_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v64i8_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 24(%rdi) -; SSE41-NEXT: movntiq %rax, 16(%rdi) -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: movntiq %rax, 56(%rdi) -; SSE41-NEXT: movntiq %rax, 48(%rdi) -; SSE41-NEXT: movntiq %rax, 40(%rdi) -; SSE41-NEXT: movntiq %rax, 32(%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v64i8_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 24(%rdi) +; SSE-NEXT: movntiq %rax, 16(%rdi) +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: movntiq %rax, 56(%rdi) +; SSE-NEXT: movntiq %rax, 48(%rdi) +; SSE-NEXT: movntiq %rax, 40(%rdi) +; SSE-NEXT: movntiq %rax, 32(%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v64i8_align1: ; AVX: # %bb.0: diff --git a/llvm/test/CodeGen/X86/pr41619.ll b/llvm/test/CodeGen/X86/pr41619.ll --- a/llvm/test/CodeGen/X86/pr41619.ll +++ b/llvm/test/CodeGen/X86/pr41619.ll @@ -7,10 +7,9 @@ ; CHECK: ## %bb.0: ## %bb ; CHECK-NEXT: vmovq %xmm0, %rax ; CHECK-NEXT: vmovd %eax, %xmm0 -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vmovq %xmm0, %rax ; CHECK-NEXT: movl %eax, (%rax) -; CHECK-NEXT: vmovlps %xmm1, (%rax) +; CHECK-NEXT: movq $0, (%rax) ; CHECK-NEXT: retq bb: %tmp = bitcast double %arg to i64 diff --git a/llvm/test/CodeGen/X86/vec_zero_cse.ll b/llvm/test/CodeGen/X86/vec_zero_cse.ll --- a/llvm/test/CodeGen/X86/vec_zero_cse.ll +++ b/llvm/test/CodeGen/X86/vec_zero_cse.ll @@ -15,8 +15,8 @@ ; X32: # %bb.0: ; X32-NEXT: movl $0, M1+4 ; X32-NEXT: movl $0, M1 -; X32-NEXT: xorps %xmm0, %xmm0 -; X32-NEXT: movlps %xmm0, M2 +; X32-NEXT: movl $0, M2+4 +; X32-NEXT: movl $0, M2 ; X32-NEXT: retl ; ; X64-LABEL: test1: @@ -34,8 +34,8 @@ ; X32: # %bb.0: ; X32-NEXT: movl $-1, M1+4 ; X32-NEXT: movl $-1, M1 -; X32-NEXT: pcmpeqd %xmm0, %xmm0 -; X32-NEXT: movq %xmm0, M2 +; X32-NEXT: movl $-1, M2+4 +; X32-NEXT: movl $-1, M2 ; X32-NEXT: retl ; ; X64-LABEL: test2: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3061,52 +3061,18 @@ } define void @PR43024() { -; SSE2-LABEL: PR43024: -; SSE2: # %bb.0: -; SSE2-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSE2-NEXT: movaps %xmm0, (%rax) -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: addss %xmm0, %xmm1 -; SSE2-NEXT: movss %xmm1, (%rax) -; SSE2-NEXT: retq -; -; SSSE3-LABEL: PR43024: -; SSSE3: # %bb.0: -; SSSE3-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSSE3-NEXT: movaps %xmm0, (%rax) -; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: xorps %xmm0, %xmm0 -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: addss %xmm0, %xmm1 -; SSSE3-NEXT: movss %xmm1, (%rax) -; SSSE3-NEXT: retq -; -; SSE41-LABEL: PR43024: -; SSE41: # %bb.0: -; SSE41-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] -; SSE41-NEXT: movaps %xmm0, (%rax) -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: xorps %xmm0, %xmm0 -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: movss %xmm1, (%rax) -; SSE41-NEXT: retq +; SSE-LABEL: PR43024: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] +; SSE-NEXT: movaps %xmm0, (%rax) +; SSE-NEXT: movl $2143289344, (%rax) # imm = 0x7FC00000 +; SSE-NEXT: retq ; ; AVX-LABEL: PR43024: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [NaN,NaN,0.0E+0,0.0E+0] ; AVX-NEXT: vmovaps %xmm0, (%rax) -; AVX-NEXT: vaddss {{\.LCPI.*}}+{{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vaddss {{\.LCPI.*}}+{{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovss %xmm0, (%rax) +; AVX-NEXT: movl $2143289344, (%rax) # imm = 0x7FC00000 ; AVX-NEXT: retq store <4 x float> , <4 x float>* undef, align 16 %1 = load <4 x float>, <4 x float>* undef, align 16 diff --git a/llvm/test/CodeGen/X86/widen_shuffle-1.ll b/llvm/test/CodeGen/X86/widen_shuffle-1.ll --- a/llvm/test/CodeGen/X86/widen_shuffle-1.ll +++ b/llvm/test/CodeGen/X86/widen_shuffle-1.ll @@ -105,8 +105,8 @@ ; X86-LABEL: shuf5: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-NEXT: movsd %xmm0, (%eax) +; X86-NEXT: movl $555819297, 4(%eax) # imm = 0x21212121 +; X86-NEXT: movl $555819297, (%eax) # imm = 0x21212121 ; X86-NEXT: retl ; ; X64-LABEL: shuf5: