Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -9451,6 +9451,8 @@ { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr }, { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm }, { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr }, + { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr }, + { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr }, // AVX 128-bit support { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, @@ -9479,6 +9481,8 @@ { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr }, { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm }, { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr }, + { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr }, + { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr }, // AVX 256-bit support { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, @@ -9577,6 +9581,8 @@ { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr }, { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm }, { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr }, + { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr }, + { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr }, }; static const uint16_t ReplaceableInstrsAVX2[][3] = { Index: llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll +++ llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll @@ -49,9 +49,9 @@ define void @zero_test() { ; X32-LABEL: zero_test: ; X32: # BB#0: # %entry -; X32-NEXT: pxor %xmm0, %xmm0 -; X32-NEXT: pextrd $1, %xmm0, (%eax) -; X32-NEXT: movd %xmm0, (%eax) +; X32-NEXT: xorps %xmm0, %xmm0 +; X32-NEXT: extractps $1, %xmm0, (%eax) +; X32-NEXT: movss %xmm0, (%eax) ; X32-NEXT: retl ; ; X64-LABEL: zero_test: Index: llvm/trunk/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll +++ llvm/trunk/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll @@ -10,12 +10,12 @@ define <4 x i32> @test(<4 x i32>* %p) { ; CHECK-LABEL: test: ; CHECK: # BB#0: -; CHECK-NEXT: movdqa (%rdi), %xmm0 -; CHECK-NEXT: pextrd $2, %xmm0, %eax +; CHECK-NEXT: movaps (%rdi), %xmm0 +; CHECK-NEXT: extractps $2, %xmm0, %eax ; CHECK-NEXT: cmpl $3, %eax ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # BB#1: -; CHECK-NEXT: pxor %xmm0, %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: retq %v = load <4 x i32>, <4 x i32>* %p Index: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -792,14 +792,14 @@ ; X32-LABEL: test_mm256_extract_epi32: ; X32: # BB#0: ; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X32-NEXT: vpextrd $1, %xmm0, %eax +; X32-NEXT: vextractps $1, %xmm0, %eax ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_extract_epi32: ; X64: # BB#0: ; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X64-NEXT: vpextrd $1, %xmm0, %eax +; X64-NEXT: vextractps $1, %xmm0, %eax ; X64-NEXT: vzeroupper ; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <8 x i32> @@ -811,8 +811,8 @@ ; X32-LABEL: test_mm256_extract_epi64: ; X32: # BB#0: ; X32-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X32-NEXT: vpextrd $2, %xmm0, %eax -; X32-NEXT: vpextrd $3, %xmm0, %edx +; X32-NEXT: vextractps $2, %xmm0, %eax +; X32-NEXT: vextractps $3, %xmm0, %edx ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; Index: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll +++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll @@ -424,9 +424,9 @@ define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) { ; CHECK-LABEL: extract_v16i32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpextrd $1, %xmm0, %eax -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi) +; CHECK-NEXT: vextractps $1, %xmm0, %eax +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vextractps $1, %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %r1 = extractelement <16 x i32> %x, i32 1 @@ -438,9 +438,9 @@ define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { ; CHECK-LABEL: extract_v8i32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpextrd $1, %xmm0, %eax -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vpextrd $1, %xmm0, (%rdi) +; CHECK-NEXT: vextractps $1, %xmm0, %eax +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vextractps $1, %xmm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %r1 = extractelement <8 x i32> %x, i32 1 @@ -452,8 +452,8 @@ define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { ; CHECK-LABEL: extract_v4i32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpextrd $1, %xmm0, %eax -; CHECK-NEXT: vpextrd $3, %xmm0, (%rdi) +; CHECK-NEXT: vextractps $1, %xmm0, %eax +; CHECK-NEXT: vextractps $3, %xmm0, (%rdi) ; CHECK-NEXT: retq %r1 = extractelement <4 x i32> %x, i32 1 %r2 = extractelement <4 x i32> %x, i32 3 Index: llvm/trunk/test/CodeGen/X86/extract-store.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/extract-store.ll +++ llvm/trunk/test/CodeGen/X86/extract-store.ll @@ -285,23 +285,23 @@ ; SSE41-X32-LABEL: extract_i32_3: ; SSE41-X32: # BB#0: ; SSE41-X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE41-X32-NEXT: pextrd $3, %xmm0, (%eax) +; SSE41-X32-NEXT: extractps $3, %xmm0, (%eax) ; SSE41-X32-NEXT: retl ; ; SSE41-X64-LABEL: extract_i32_3: ; SSE41-X64: # BB#0: -; SSE41-X64-NEXT: pextrd $3, %xmm0, (%rdi) +; SSE41-X64-NEXT: extractps $3, %xmm0, (%rdi) ; SSE41-X64-NEXT: retq ; ; AVX-X32-LABEL: extract_i32_3: ; AVX-X32: # BB#0: ; AVX-X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-X32-NEXT: vpextrd $3, %xmm0, (%eax) +; AVX-X32-NEXT: vextractps $3, %xmm0, (%eax) ; AVX-X32-NEXT: retl ; ; AVX-X64-LABEL: extract_i32_3: ; AVX-X64: # BB#0: -; AVX-X64-NEXT: vpextrd $3, %xmm0, (%rdi) +; AVX-X64-NEXT: vextractps $3, %xmm0, (%rdi) ; AVX-X64-NEXT: retq ; ; SSE-F128-LABEL: extract_i32_3: Index: llvm/trunk/test/CodeGen/X86/extractelement-index.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/extractelement-index.ll +++ llvm/trunk/test/CodeGen/X86/extractelement-index.ll @@ -231,12 +231,12 @@ ; ; SSE41-LABEL: extractelement_v4i32_3: ; SSE41: # BB#0: -; SSE41-NEXT: pextrd $3, %xmm0, %eax +; SSE41-NEXT: extractps $3, %xmm0, %eax ; SSE41-NEXT: retq ; ; AVX-LABEL: extractelement_v4i32_3: ; AVX: # BB#0: -; AVX-NEXT: vpextrd $3, %xmm0, %eax +; AVX-NEXT: vextractps $3, %xmm0, %eax ; AVX-NEXT: retq %b = extractelement <4 x i32> %a, i256 3 ret i32 %b @@ -297,22 +297,15 @@ ; ; SSE41-LABEL: extractelement_v8i32_7: ; SSE41: # BB#0: -; SSE41-NEXT: pextrd $3, %xmm1, %eax +; SSE41-NEXT: extractps $3, %xmm1, %eax ; SSE41-NEXT: retq ; -; AVX1-LABEL: extractelement_v8i32_7: -; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpextrd $3, %xmm0, %eax -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: extractelement_v8i32_7: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpextrd $3, %xmm0, %eax -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: extractelement_v8i32_7: +; AVX: # BB#0: +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vextractps $3, %xmm0, %eax +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %b = extractelement <8 x i32> %a, i64 7 ret i32 %b } Index: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll +++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll @@ -76,7 +76,7 @@ ; X32-LABEL: signbits_ashr_extract_sitofp: ; X32: # BB#0: ; X32-NEXT: pushl %eax -; X32-NEXT: vpextrd $1, %xmm0, %eax +; X32-NEXT: vextractps $1, %xmm0, %eax ; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X32-NEXT: vmovss %xmm0, (%esp) ; X32-NEXT: flds (%esp) Index: llvm/trunk/test/CodeGen/X86/nontemporal-2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/nontemporal-2.ll +++ llvm/trunk/test/CodeGen/X86/nontemporal-2.ll @@ -541,19 +541,19 @@ ; ; SSE41-LABEL: test_extract_i32: ; SSE41: # BB#0: -; SSE41-NEXT: pextrd $1, %xmm0, %eax +; SSE41-NEXT: extractps $1, %xmm0, %eax ; SSE41-NEXT: movntil %eax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_extract_i32: ; AVX: # BB#0: -; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: vextractps $1, %xmm0, %eax ; AVX-NEXT: movntil %eax, (%rdi) ; AVX-NEXT: retq ; ; VLX-LABEL: test_extract_i32: ; VLX: # BB#0: -; VLX-NEXT: vpextrd $1, %xmm0, %eax +; VLX-NEXT: vextractps $1, %xmm0, %eax ; VLX-NEXT: movntil %eax, (%rdi) ; VLX-NEXT: retq %1 = extractelement <4 x i32> %arg, i32 1 Index: llvm/trunk/test/CodeGen/X86/oddshuffles.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/oddshuffles.ll +++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll @@ -112,10 +112,10 @@ ; ; AVX2-LABEL: v3i32: ; AVX2: # BB#0: -; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 -; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX2-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; AVX2-NEXT: vmovq %xmm1, (%rdi) +; AVX2-NEXT: vbroadcastss %xmm1, %xmm1 +; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3] +; AVX2-NEXT: vextractps $2, %xmm0, 8(%rdi) +; AVX2-NEXT: vmovlps %xmm1, (%rdi) ; AVX2-NEXT: retq ; ; XOP-LABEL: v3i32: @@ -199,18 +199,18 @@ ; AVX1: # BB#0: ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; AVX1-NEXT: vpextrd $3, %xmm0, 16(%rdi) +; AVX1-NEXT: vextractps $3, %xmm0, 16(%rdi) ; AVX1-NEXT: vmovaps %xmm1, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: v5i32: ; AVX2: # BB#0: ; AVX2-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u> -; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vpextrd $3, %xmm0, 16(%rdi) -; AVX2-NEXT: vmovdqa %xmm1, (%rdi) +; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u> +; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vextractps $3, %xmm0, 16(%rdi) +; AVX2-NEXT: vmovaps %xmm1, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -218,7 +218,7 @@ ; XOP: # BB#0: ; XOP-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] ; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; XOP-NEXT: vpextrd $3, %xmm0, 16(%rdi) +; XOP-NEXT: vextractps $3, %xmm0, 16(%rdi) ; XOP-NEXT: vmovaps %xmm1, (%rdi) ; XOP-NEXT: retq %r = shufflevector <4 x i32> %a, <4 x i32> %b, <5 x i32> Index: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -440,12 +440,12 @@ define i32 @test_mm_extract_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm_extract_epi32: ; X32: # BB#0: -; X32-NEXT: pextrd $1, %xmm0, %eax +; X32-NEXT: extractps $1, %xmm0, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_extract_epi32: ; X64: # BB#0: -; X64-NEXT: pextrd $1, %xmm0, %eax +; X64-NEXT: extractps $1, %xmm0, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %ext = extractelement <4 x i32> %arg0, i32 1 @@ -455,8 +455,8 @@ define i64 @test_mm_extract_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm_extract_epi64: ; X32: # BB#0: -; X32-NEXT: pextrd $2, %xmm0, %eax -; X32-NEXT: pextrd $3, %xmm0, %edx +; X32-NEXT: extractps $2, %xmm0, %eax +; X32-NEXT: extractps $3, %xmm0, %edx ; X32-NEXT: retl ; ; X64-LABEL: test_mm_extract_epi64: Index: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll @@ -949,61 +949,71 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; GENERIC-LABEL: test_pextrd: ; GENERIC: # BB#0: +; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] ; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pextrd: ; SLM: # BB#0: +; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] ; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00] ; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_pextrd: ; SANDY: # BB#0: +; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrd: ; HASWELL: # BB#0: +; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BROADWELL-LABEL: test_pextrd: ; BROADWELL: # BB#0: +; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_pextrd: ; SKYLAKE: # BB#0: +; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pextrd: ; SKX: # BB#0: +; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pextrd: ; BTVER2: # BB#0: +; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pextrd: ; ZNVER1: # BB#0: +; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25] ; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.25] ; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [8:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <4 x i32> %a0, i32 3 - %2 = extractelement <4 x i32> %a0, i32 1 - store i32 %2, i32 *%a1 - ret i32 %1 + %1 = add <4 x i32> %a0, %a0 + %2 = extractelement <4 x i32> %1, i32 3 + %3 = extractelement <4 x i32> %1, i32 1 + store i32 %3, i32 *%a1 + ret i32 %2 } define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { Index: llvm/trunk/test/CodeGen/X86/sse41.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41.ll +++ llvm/trunk/test/CodeGen/X86/sse41.ll @@ -108,6 +108,7 @@ %t = fadd float %s, 1.0 ret float %t } + define float @ext_2(<4 x float> %v) nounwind { ; X32-LABEL: ext_2: ; X32: ## BB#0: @@ -125,15 +126,16 @@ %s = extractelement <4 x float> %v, i32 3 ret float %s } + define i32 @ext_3(<4 x i32> %v) nounwind { ; X32-LABEL: ext_3: ; X32: ## BB#0: -; X32-NEXT: pextrd $3, %xmm0, %eax +; X32-NEXT: extractps $3, %xmm0, %eax ; X32-NEXT: retl ; ; X64-LABEL: ext_3: ; X64: ## BB#0: -; X64-NEXT: pextrd $3, %xmm0, %eax +; X64-NEXT: extractps $3, %xmm0, %eax ; X64-NEXT: retq %i = extractelement <4 x i32> %v, i32 3 ret i32 %i @@ -261,7 +263,6 @@ ret i32 %tmp1 } - declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone Index: llvm/trunk/test/CodeGen/X86/widen_load-3.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_load-3.ll +++ llvm/trunk/test/CodeGen/X86/widen_load-3.ll @@ -25,33 +25,19 @@ ; X86-SSE-NEXT: movaps %xmm0, (%eax) ; X86-SSE-NEXT: retl $4 ; -; X86-AVX1-LABEL: load7_aligned: -; X86-AVX1: # BB#0: -; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-AVX1-NEXT: vmovaps (%ecx), %ymm0 -; X86-AVX1-NEXT: vmovaps 32(%ecx), %ymm1 -; X86-AVX1-NEXT: vmovaps %ymm0, (%eax) -; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 -; X86-AVX1-NEXT: vpextrd $1, %xmm0, 52(%eax) -; X86-AVX1-NEXT: vmovd %xmm0, 48(%eax) -; X86-AVX1-NEXT: vmovaps %xmm1, 32(%eax) -; X86-AVX1-NEXT: vzeroupper -; X86-AVX1-NEXT: retl $4 -; -; X86-AVX2-LABEL: load7_aligned: -; X86-AVX2: # BB#0: -; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-AVX2-NEXT: vmovaps (%ecx), %ymm0 -; X86-AVX2-NEXT: vmovdqa 32(%ecx), %ymm1 -; X86-AVX2-NEXT: vmovaps %ymm0, (%eax) -; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 -; X86-AVX2-NEXT: vpextrd $1, %xmm0, 52(%eax) -; X86-AVX2-NEXT: vmovd %xmm0, 48(%eax) -; X86-AVX2-NEXT: vmovdqa %xmm1, 32(%eax) -; X86-AVX2-NEXT: vzeroupper -; X86-AVX2-NEXT: retl $4 +; X86-AVX-LABEL: load7_aligned: +; X86-AVX: # BB#0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: vmovaps (%ecx), %ymm0 +; X86-AVX-NEXT: vmovaps 32(%ecx), %ymm1 +; X86-AVX-NEXT: vmovaps %ymm0, (%eax) +; X86-AVX-NEXT: vextractf128 $1, %ymm1, %xmm0 +; X86-AVX-NEXT: vextractps $1, %xmm0, 52(%eax) +; X86-AVX-NEXT: vmovss %xmm0, 48(%eax) +; X86-AVX-NEXT: vmovaps %xmm1, 32(%eax) +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl $4 ; ; X64-SSE-LABEL: load7_aligned: ; X64-SSE: # BB#0: