Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
test/CodeGen/X86/masked_gather_scatter.ll
Show First 20 Lines • Show All 1,758 Lines • ▼ Show 20 Lines | |||||
; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 | ; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 | ||||
; KNL_32-NEXT: kshiftrw $8, %k1, %k2 | ; KNL_32-NEXT: kshiftrw $8, %k1, %k2 | ||||
; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} | ; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} | ||||
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 | ; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 | ||||
; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2} | ; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2} | ||||
; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 | ; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 | ||||
; KNL_32-NEXT: movl %ebp, %esp | ; KNL_32-NEXT: movl %ebp, %esp | ||||
; KNL_32-NEXT: popl %ebp | ; KNL_32-NEXT: popl %ebp | ||||
; KNL_32-NEXT: .Lcfi3: | |||||
; KNL_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; KNL_32-NEXT: retl | ; KNL_32-NEXT: retl | ||||
; | ; | ||||
; SKX-LABEL: test_gather_16i64: | ; SKX-LABEL: test_gather_16i64: | ||||
; SKX: # BB#0: | ; SKX: # BB#0: | ||||
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 | ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 | ||||
; SKX-NEXT: vpslld $31, %zmm2, %zmm2 | ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 | ||||
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 | ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 | ||||
; SKX-NEXT: kshiftrw $8, %k1, %k2 | ; SKX-NEXT: kshiftrw $8, %k1, %k2 | ||||
; SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1} | ; SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1} | ||||
; SKX-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2} | ; SKX-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2} | ||||
; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 | ; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 | ||||
; SKX-NEXT: vmovdqa64 %zmm4, %zmm1 | ; SKX-NEXT: vmovdqa64 %zmm4, %zmm1 | ||||
; SKX-NEXT: retq | ; SKX-NEXT: retq | ||||
; | ; | ||||
; SKX_32-LABEL: test_gather_16i64: | ; SKX_32-LABEL: test_gather_16i64: | ||||
; SKX_32: # BB#0: | ; SKX_32: # BB#0: | ||||
; SKX_32-NEXT: pushl %ebp | ; SKX_32-NEXT: pushl %ebp | ||||
; SKX_32-NEXT: .Lcfi1: | |||||
; SKX_32-NEXT: .cfi_def_cfa_offset 8 | |||||
; SKX_32-NEXT: .Lcfi2: | ; SKX_32-NEXT: .Lcfi2: | ||||
; SKX_32-NEXT: .cfi_def_cfa_offset 8 | |||||
; SKX_32-NEXT: .Lcfi3: | |||||
; SKX_32-NEXT: .cfi_offset %ebp, -8 | ; SKX_32-NEXT: .cfi_offset %ebp, -8 | ||||
; SKX_32-NEXT: movl %esp, %ebp | ; SKX_32-NEXT: movl %esp, %ebp | ||||
; SKX_32-NEXT: .Lcfi3: | ; SKX_32-NEXT: .Lcfi4: | ||||
; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; SKX_32-NEXT: andl $-64, %esp | ; SKX_32-NEXT: andl $-64, %esp | ||||
; SKX_32-NEXT: subl $64, %esp | ; SKX_32-NEXT: subl $64, %esp | ||||
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ||||
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 | ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 | ||||
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ||||
; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 | ; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 | ||||
; SKX_32-NEXT: kshiftrw $8, %k1, %k2 | ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 | ||||
; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} | ; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} | ||||
; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 | ; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 | ||||
; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2} | ; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2} | ||||
; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 | ; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 | ||||
; SKX_32-NEXT: movl %ebp, %esp | ; SKX_32-NEXT: movl %ebp, %esp | ||||
; SKX_32-NEXT: popl %ebp | ; SKX_32-NEXT: popl %ebp | ||||
; SKX_32-NEXT: .Lcfi5: | |||||
; SKX_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; SKX_32-NEXT: retl | ; SKX_32-NEXT: retl | ||||
%res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0) | %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0) | ||||
ret <16 x i64> %res | ret <16 x i64> %res | ||||
} | } | ||||
declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0) | declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0) | ||||
define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { | define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { | ||||
; KNL_64-LABEL: test_gather_16f32: | ; KNL_64-LABEL: test_gather_16f32: | ||||
; KNL_64: # BB#0: | ; KNL_64: # BB#0: | ||||
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | |||||
; KNL_64-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} | ; KNL_64-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} | ||||
; KNL_64-NEXT: vmovapd %zmm3, %zmm0 | ; KNL_64-NEXT: vmovapd %zmm3, %zmm0 | ||||
; KNL_64-NEXT: vmovapd %zmm4, %zmm1 | ; KNL_64-NEXT: vmovapd %zmm4, %zmm1 | ||||
; KNL_64-NEXT: retq | ; KNL_64-NEXT: retq | ||||
; | ; | ||||
; KNL_32-LABEL: test_gather_16f64: | ; KNL_32-LABEL: test_gather_16f64: | ||||
; KNL_32: # BB#0: | ; KNL_32: # BB#0: | ||||
; KNL_32-NEXT: pushl %ebp | ; KNL_32-NEXT: pushl %ebp | ||||
; KNL_32-NEXT: .Lcfi3: | |||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8 | |||||
; KNL_32-NEXT: .Lcfi4: | ; KNL_32-NEXT: .Lcfi4: | ||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8 | |||||
; KNL_32-NEXT: .Lcfi5: | |||||
; KNL_32-NEXT: .cfi_offset %ebp, -8 | ; KNL_32-NEXT: .cfi_offset %ebp, -8 | ||||
; KNL_32-NEXT: movl %esp, %ebp | ; KNL_32-NEXT: movl %esp, %ebp | ||||
; KNL_32-NEXT: .Lcfi5: | ; KNL_32-NEXT: .Lcfi6: | ||||
; KNL_32-NEXT: .cfi_def_cfa_register %ebp | ; KNL_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; KNL_32-NEXT: andl $-64, %esp | ; KNL_32-NEXT: andl $-64, %esp | ||||
; KNL_32-NEXT: subl $64, %esp | ; KNL_32-NEXT: subl $64, %esp | ||||
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ||||
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 | ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 | ||||
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ||||
; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 | ; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 | ||||
; KNL_32-NEXT: kshiftrw $8, %k1, %k2 | ; KNL_32-NEXT: kshiftrw $8, %k1, %k2 | ||||
; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} | ; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} | ||||
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 | ; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 | ||||
; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} | ; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} | ||||
; KNL_32-NEXT: vmovapd %zmm2, %zmm0 | ; KNL_32-NEXT: vmovapd %zmm2, %zmm0 | ||||
; KNL_32-NEXT: movl %ebp, %esp | ; KNL_32-NEXT: movl %ebp, %esp | ||||
; KNL_32-NEXT: popl %ebp | ; KNL_32-NEXT: popl %ebp | ||||
; KNL_32-NEXT: .Lcfi7: | |||||
; KNL_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; KNL_32-NEXT: retl | ; KNL_32-NEXT: retl | ||||
; | ; | ||||
; SKX-LABEL: test_gather_16f64: | ; SKX-LABEL: test_gather_16f64: | ||||
; SKX: # BB#0: | ; SKX: # BB#0: | ||||
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 | ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 | ||||
; SKX-NEXT: vpslld $31, %zmm2, %zmm2 | ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 | ||||
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 | ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 | ||||
; SKX-NEXT: kshiftrw $8, %k1, %k2 | ; SKX-NEXT: kshiftrw $8, %k1, %k2 | ||||
; SKX-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1} | ; SKX-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1} | ||||
; SKX-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} | ; SKX-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} | ||||
; SKX-NEXT: vmovapd %zmm3, %zmm0 | ; SKX-NEXT: vmovapd %zmm3, %zmm0 | ||||
; SKX-NEXT: vmovapd %zmm4, %zmm1 | ; SKX-NEXT: vmovapd %zmm4, %zmm1 | ||||
; SKX-NEXT: retq | ; SKX-NEXT: retq | ||||
; | ; | ||||
; SKX_32-LABEL: test_gather_16f64: | ; SKX_32-LABEL: test_gather_16f64: | ||||
; SKX_32: # BB#0: | ; SKX_32: # BB#0: | ||||
; SKX_32-NEXT: pushl %ebp | ; SKX_32-NEXT: pushl %ebp | ||||
; SKX_32-NEXT: .Lcfi4: | ; SKX_32-NEXT: .Lcfi6: | ||||
; SKX_32-NEXT: .cfi_def_cfa_offset 8 | ; SKX_32-NEXT: .cfi_def_cfa_offset 8 | ||||
; SKX_32-NEXT: .Lcfi5: | ; SKX_32-NEXT: .Lcfi7: | ||||
; SKX_32-NEXT: .cfi_offset %ebp, -8 | ; SKX_32-NEXT: .cfi_offset %ebp, -8 | ||||
; SKX_32-NEXT: movl %esp, %ebp | ; SKX_32-NEXT: movl %esp, %ebp | ||||
; SKX_32-NEXT: .Lcfi6: | ; SKX_32-NEXT: .Lcfi8: | ||||
; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; SKX_32-NEXT: andl $-64, %esp | ; SKX_32-NEXT: andl $-64, %esp | ||||
; SKX_32-NEXT: subl $64, %esp | ; SKX_32-NEXT: subl $64, %esp | ||||
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ||||
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 | ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 | ||||
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ||||
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 | ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 | ||||
; SKX_32-NEXT: kshiftrw $8, %k1, %k2 | ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 | ||||
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} | ; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} | ||||
; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 | ; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 | ||||
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} | ; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} | ||||
; SKX_32-NEXT: vmovapd %zmm2, %zmm0 | ; SKX_32-NEXT: vmovapd %zmm2, %zmm0 | ||||
; SKX_32-NEXT: movl %ebp, %esp | ; SKX_32-NEXT: movl %ebp, %esp | ||||
; SKX_32-NEXT: popl %ebp | ; SKX_32-NEXT: popl %ebp | ||||
; SKX_32-NEXT: .Lcfi9: | |||||
; SKX_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; SKX_32-NEXT: retl | ; SKX_32-NEXT: retl | ||||
%res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0) | %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0) | ||||
ret <16 x double> %res | ret <16 x double> %res | ||||
} | } | ||||
declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0) | declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0) | ||||
define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { | define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { | ||||
; KNL_64-LABEL: test_scatter_16i32: | ; KNL_64-LABEL: test_scatter_16i32: | ||||
; KNL_64: # BB#0: | ; KNL_64: # BB#0: | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | |||||
; KNL_64-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} | ; KNL_64-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} | ||||
; KNL_64-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} | ; KNL_64-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} | ||||
; KNL_64-NEXT: vzeroupper | ; KNL_64-NEXT: vzeroupper | ||||
; KNL_64-NEXT: retq | ; KNL_64-NEXT: retq | ||||
; | ; | ||||
; KNL_32-LABEL: test_scatter_16i64: | ; KNL_32-LABEL: test_scatter_16i64: | ||||
; KNL_32: # BB#0: | ; KNL_32: # BB#0: | ||||
; KNL_32-NEXT: pushl %ebp | ; KNL_32-NEXT: pushl %ebp | ||||
; KNL_32-NEXT: .Lcfi6: | ; KNL_32-NEXT: .Lcfi8: | ||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8 | ; KNL_32-NEXT: .cfi_def_cfa_offset 8 | ||||
; KNL_32-NEXT: .Lcfi7: | ; KNL_32-NEXT: .Lcfi9: | ||||
; KNL_32-NEXT: .cfi_offset %ebp, -8 | ; KNL_32-NEXT: .cfi_offset %ebp, -8 | ||||
; KNL_32-NEXT: movl %esp, %ebp | ; KNL_32-NEXT: movl %esp, %ebp | ||||
; KNL_32-NEXT: .Lcfi8: | ; KNL_32-NEXT: .Lcfi10: | ||||
; KNL_32-NEXT: .cfi_def_cfa_register %ebp | ; KNL_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; KNL_32-NEXT: andl $-64, %esp | ; KNL_32-NEXT: andl $-64, %esp | ||||
; KNL_32-NEXT: subl $64, %esp | ; KNL_32-NEXT: subl $64, %esp | ||||
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ||||
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 | ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 | ||||
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ||||
; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 | ; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 | ||||
; KNL_32-NEXT: kshiftrw $8, %k1, %k2 | ; KNL_32-NEXT: kshiftrw $8, %k1, %k2 | ||||
; KNL_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} | ; KNL_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} | ||||
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 | ; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 | ||||
; KNL_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} | ; KNL_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} | ||||
; KNL_32-NEXT: movl %ebp, %esp | ; KNL_32-NEXT: movl %ebp, %esp | ||||
; KNL_32-NEXT: popl %ebp | ; KNL_32-NEXT: popl %ebp | ||||
; KNL_32-NEXT: .Lcfi11: | |||||
; KNL_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; KNL_32-NEXT: vzeroupper | ; KNL_32-NEXT: vzeroupper | ||||
; KNL_32-NEXT: retl | ; KNL_32-NEXT: retl | ||||
; | ; | ||||
; SKX-LABEL: test_scatter_16i64: | ; SKX-LABEL: test_scatter_16i64: | ||||
; SKX: # BB#0: | ; SKX: # BB#0: | ||||
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 | ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 | ||||
; SKX-NEXT: vpslld $31, %zmm2, %zmm2 | ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 | ||||
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 | ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 | ||||
; SKX-NEXT: kshiftrw $8, %k1, %k2 | ; SKX-NEXT: kshiftrw $8, %k1, %k2 | ||||
; SKX-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} | ; SKX-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} | ||||
; SKX-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} | ; SKX-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} | ||||
; SKX-NEXT: vzeroupper | ; SKX-NEXT: vzeroupper | ||||
; SKX-NEXT: retq | ; SKX-NEXT: retq | ||||
; | ; | ||||
; SKX_32-LABEL: test_scatter_16i64: | ; SKX_32-LABEL: test_scatter_16i64: | ||||
; SKX_32: # BB#0: | ; SKX_32: # BB#0: | ||||
; SKX_32-NEXT: pushl %ebp | ; SKX_32-NEXT: pushl %ebp | ||||
; SKX_32-NEXT: .Lcfi7: | ; SKX_32-NEXT: .Lcfi10: | ||||
; SKX_32-NEXT: .cfi_def_cfa_offset 8 | ; SKX_32-NEXT: .cfi_def_cfa_offset 8 | ||||
; SKX_32-NEXT: .Lcfi8: | ; SKX_32-NEXT: .Lcfi11: | ||||
; SKX_32-NEXT: .cfi_offset %ebp, -8 | ; SKX_32-NEXT: .cfi_offset %ebp, -8 | ||||
; SKX_32-NEXT: movl %esp, %ebp | ; SKX_32-NEXT: movl %esp, %ebp | ||||
; SKX_32-NEXT: .Lcfi9: | ; SKX_32-NEXT: .Lcfi12: | ||||
; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; SKX_32-NEXT: andl $-64, %esp | ; SKX_32-NEXT: andl $-64, %esp | ||||
; SKX_32-NEXT: subl $64, %esp | ; SKX_32-NEXT: subl $64, %esp | ||||
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ||||
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 | ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 | ||||
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ||||
; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 | ; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 | ||||
; SKX_32-NEXT: kshiftrw $8, %k1, %k2 | ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 | ||||
; SKX_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} | ; SKX_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} | ||||
; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 | ; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 | ||||
; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} | ; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} | ||||
; SKX_32-NEXT: movl %ebp, %esp | ; SKX_32-NEXT: movl %ebp, %esp | ||||
; SKX_32-NEXT: popl %ebp | ; SKX_32-NEXT: popl %ebp | ||||
; SKX_32-NEXT: .Lcfi13: | |||||
; SKX_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; SKX_32-NEXT: vzeroupper | ; SKX_32-NEXT: vzeroupper | ||||
; SKX_32-NEXT: retl | ; SKX_32-NEXT: retl | ||||
call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask) | call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask) | ||||
ret void | ret void | ||||
} | } | ||||
declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask) | declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask) | ||||
define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { | define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { | ||||
; KNL_64-LABEL: test_scatter_16f32: | ; KNL_64-LABEL: test_scatter_16f32: | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | |||||
; KNL_64-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} | ; KNL_64-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} | ||||
; KNL_64-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} | ; KNL_64-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} | ||||
; KNL_64-NEXT: vzeroupper | ; KNL_64-NEXT: vzeroupper | ||||
; KNL_64-NEXT: retq | ; KNL_64-NEXT: retq | ||||
; | ; | ||||
; KNL_32-LABEL: test_scatter_16f64: | ; KNL_32-LABEL: test_scatter_16f64: | ||||
; KNL_32: # BB#0: | ; KNL_32: # BB#0: | ||||
; KNL_32-NEXT: pushl %ebp | ; KNL_32-NEXT: pushl %ebp | ||||
; KNL_32-NEXT: .Lcfi9: | ; KNL_32-NEXT: .Lcfi12: | ||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8 | ; KNL_32-NEXT: .cfi_def_cfa_offset 8 | ||||
; KNL_32-NEXT: .Lcfi10: | ; KNL_32-NEXT: .Lcfi13: | ||||
; KNL_32-NEXT: .cfi_offset %ebp, -8 | ; KNL_32-NEXT: .cfi_offset %ebp, -8 | ||||
; KNL_32-NEXT: movl %esp, %ebp | ; KNL_32-NEXT: movl %esp, %ebp | ||||
; KNL_32-NEXT: .Lcfi11: | ; KNL_32-NEXT: .Lcfi14: | ||||
; KNL_32-NEXT: .cfi_def_cfa_register %ebp | ; KNL_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; KNL_32-NEXT: andl $-64, %esp | ; KNL_32-NEXT: andl $-64, %esp | ||||
; KNL_32-NEXT: subl $64, %esp | ; KNL_32-NEXT: subl $64, %esp | ||||
; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ||||
; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 | ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 | ||||
; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ||||
; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 | ; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 | ||||
; KNL_32-NEXT: kshiftrw $8, %k1, %k2 | ; KNL_32-NEXT: kshiftrw $8, %k1, %k2 | ||||
; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} | ; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} | ||||
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 | ; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 | ||||
; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} | ; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} | ||||
; KNL_32-NEXT: movl %ebp, %esp | ; KNL_32-NEXT: movl %ebp, %esp | ||||
; KNL_32-NEXT: popl %ebp | ; KNL_32-NEXT: popl %ebp | ||||
; KNL_32-NEXT: .Lcfi15: | |||||
; KNL_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; KNL_32-NEXT: vzeroupper | ; KNL_32-NEXT: vzeroupper | ||||
; KNL_32-NEXT: retl | ; KNL_32-NEXT: retl | ||||
; | ; | ||||
; SKX-LABEL: test_scatter_16f64: | ; SKX-LABEL: test_scatter_16f64: | ||||
; SKX: # BB#0: | ; SKX: # BB#0: | ||||
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 | ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 | ||||
; SKX-NEXT: vpslld $31, %zmm2, %zmm2 | ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 | ||||
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 | ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 | ||||
; SKX-NEXT: kshiftrw $8, %k1, %k2 | ; SKX-NEXT: kshiftrw $8, %k1, %k2 | ||||
; SKX-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} | ; SKX-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} | ||||
; SKX-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} | ; SKX-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} | ||||
; SKX-NEXT: vzeroupper | ; SKX-NEXT: vzeroupper | ||||
; SKX-NEXT: retq | ; SKX-NEXT: retq | ||||
; | ; | ||||
; SKX_32-LABEL: test_scatter_16f64: | ; SKX_32-LABEL: test_scatter_16f64: | ||||
; SKX_32: # BB#0: | ; SKX_32: # BB#0: | ||||
; SKX_32-NEXT: pushl %ebp | ; SKX_32-NEXT: pushl %ebp | ||||
; SKX_32-NEXT: .Lcfi10: | ; SKX_32-NEXT: .Lcfi14: | ||||
; SKX_32-NEXT: .cfi_def_cfa_offset 8 | ; SKX_32-NEXT: .cfi_def_cfa_offset 8 | ||||
; SKX_32-NEXT: .Lcfi11: | ; SKX_32-NEXT: .Lcfi15: | ||||
; SKX_32-NEXT: .cfi_offset %ebp, -8 | ; SKX_32-NEXT: .cfi_offset %ebp, -8 | ||||
; SKX_32-NEXT: movl %esp, %ebp | ; SKX_32-NEXT: movl %esp, %ebp | ||||
; SKX_32-NEXT: .Lcfi12: | ; SKX_32-NEXT: .Lcfi16: | ||||
; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; SKX_32-NEXT: andl $-64, %esp | ; SKX_32-NEXT: andl $-64, %esp | ||||
; SKX_32-NEXT: subl $64, %esp | ; SKX_32-NEXT: subl $64, %esp | ||||
; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 | ||||
; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 | ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 | ||||
; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 | ||||
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 | ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 | ||||
; SKX_32-NEXT: kshiftrw $8, %k1, %k2 | ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 | ||||
; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} | ; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} | ||||
; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 | ; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 | ||||
; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} | ; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} | ||||
; SKX_32-NEXT: movl %ebp, %esp | ; SKX_32-NEXT: movl %ebp, %esp | ||||
; SKX_32-NEXT: popl %ebp | ; SKX_32-NEXT: popl %ebp | ||||
; SKX_32-NEXT: .Lcfi17: | |||||
; SKX_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; SKX_32-NEXT: vzeroupper | ; SKX_32-NEXT: vzeroupper | ||||
; SKX_32-NEXT: retl | ; SKX_32-NEXT: retl | ||||
call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask) | call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask) | ||||
ret void | ret void | ||||
} | } | ||||
declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask) | declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask) | ||||
define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) { | define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) { | ||||
Show All 10 Lines | |||||
; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} | ; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} | ||||
; KNL_64-NEXT: vpaddq %ymm1, %ymm1, %ymm0 | ; KNL_64-NEXT: vpaddq %ymm1, %ymm1, %ymm0 | ||||
; KNL_64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 | ; KNL_64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 | ||||
; KNL_64-NEXT: retq | ; KNL_64-NEXT: retq | ||||
; | ; | ||||
; KNL_32-LABEL: test_pr28312: | ; KNL_32-LABEL: test_pr28312: | ||||
; KNL_32: # BB#0: | ; KNL_32: # BB#0: | ||||
; KNL_32-NEXT: pushl %ebp | ; KNL_32-NEXT: pushl %ebp | ||||
; KNL_32-NEXT: .Lcfi12: | ; KNL_32-NEXT: .Lcfi16: | ||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8 | ; KNL_32-NEXT: .cfi_def_cfa_offset 8 | ||||
; KNL_32-NEXT: .Lcfi13: | ; KNL_32-NEXT: .Lcfi17: | ||||
; KNL_32-NEXT: .cfi_offset %ebp, -8 | ; KNL_32-NEXT: .cfi_offset %ebp, -8 | ||||
; KNL_32-NEXT: movl %esp, %ebp | ; KNL_32-NEXT: movl %esp, %ebp | ||||
; KNL_32-NEXT: .Lcfi14: | ; KNL_32-NEXT: .Lcfi18: | ||||
; KNL_32-NEXT: .cfi_def_cfa_register %ebp | ; KNL_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; KNL_32-NEXT: andl $-32, %esp | ; KNL_32-NEXT: andl $-32, %esp | ||||
; KNL_32-NEXT: subl $32, %esp | ; KNL_32-NEXT: subl $32, %esp | ||||
; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> | ; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> | ||||
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 | ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 | ||||
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 | ; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 | ||||
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 | ; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 | ||||
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2 | ; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2 | ||||
; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm2, %zmm1 | ; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm2, %zmm1 | ||||
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 | ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 | ||||
; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 | ; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 | ||||
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 | ; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 | ||||
; KNL_32-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} | ; KNL_32-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} | ||||
; KNL_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 | ; KNL_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 | ||||
; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 | ; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 | ||||
; KNL_32-NEXT: movl %ebp, %esp | ; KNL_32-NEXT: movl %ebp, %esp | ||||
; KNL_32-NEXT: popl %ebp | ; KNL_32-NEXT: popl %ebp | ||||
; KNL_32-NEXT: .Lcfi19: | |||||
; KNL_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; KNL_32-NEXT: retl | ; KNL_32-NEXT: retl | ||||
; | ; | ||||
; SKX-LABEL: test_pr28312: | ; SKX-LABEL: test_pr28312: | ||||
; SKX: # BB#0: | ; SKX: # BB#0: | ||||
; SKX-NEXT: vpslld $31, %xmm1, %xmm1 | ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 | ||||
; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 | ; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 | ||||
; SKX-NEXT: vpgatherqq (,%ymm0), %ymm1 {%k1} | ; SKX-NEXT: vpgatherqq (,%ymm0), %ymm1 {%k1} | ||||
; SKX-NEXT: vpaddq %ymm1, %ymm1, %ymm0 | ; SKX-NEXT: vpaddq %ymm1, %ymm1, %ymm0 | ||||
; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 | ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 | ||||
; SKX-NEXT: retq | ; SKX-NEXT: retq | ||||
; | ; | ||||
; SKX_32-LABEL: test_pr28312: | ; SKX_32-LABEL: test_pr28312: | ||||
; SKX_32: # BB#0: | ; SKX_32: # BB#0: | ||||
; SKX_32-NEXT: pushl %ebp | ; SKX_32-NEXT: pushl %ebp | ||||
; SKX_32-NEXT: .Lcfi13: | ; SKX_32-NEXT: .Lcfi18: | ||||
; SKX_32-NEXT: .cfi_def_cfa_offset 8 | ; SKX_32-NEXT: .cfi_def_cfa_offset 8 | ||||
; SKX_32-NEXT: .Lcfi14: | ; SKX_32-NEXT: .Lcfi19: | ||||
; SKX_32-NEXT: .cfi_offset %ebp, -8 | ; SKX_32-NEXT: .cfi_offset %ebp, -8 | ||||
; SKX_32-NEXT: movl %esp, %ebp | ; SKX_32-NEXT: movl %esp, %ebp | ||||
; SKX_32-NEXT: .Lcfi15: | ; SKX_32-NEXT: .Lcfi20: | ||||
; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ; SKX_32-NEXT: .cfi_def_cfa_register %ebp | ||||
; SKX_32-NEXT: andl $-32, %esp | ; SKX_32-NEXT: andl $-32, %esp | ||||
; SKX_32-NEXT: subl $32, %esp | ; SKX_32-NEXT: subl $32, %esp | ||||
; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 | ; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 | ||||
; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 | ; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 | ||||
; SKX_32-NEXT: vpgatherdq (,%xmm0), %ymm1 {%k1} | ; SKX_32-NEXT: vpgatherdq (,%xmm0), %ymm1 {%k1} | ||||
; SKX_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 | ; SKX_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 | ||||
; SKX_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 | ; SKX_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 | ||||
; SKX_32-NEXT: movl %ebp, %esp | ; SKX_32-NEXT: movl %ebp, %esp | ||||
; SKX_32-NEXT: popl %ebp | ; SKX_32-NEXT: popl %ebp | ||||
; SKX_32-NEXT: .Lcfi21: | |||||
; SKX_32-NEXT: .cfi_def_cfa %esp, 4 | |||||
; SKX_32-NEXT: retl | ; SKX_32-NEXT: retl | ||||
%g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) | %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) | ||||
%g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) | %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) | ||||
%g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) | %g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) | ||||
%a = add <4 x i64> %g1, %g2 | %a = add <4 x i64> %g1, %g2 | ||||
%b = add <4 x i64> %a, %g3 | %b = add <4 x i64> %a, %g3 | ||||
ret <4 x i64> %b | ret <4 x i64> %b | ||||
} | } | ||||
declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) | declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) |