Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
Show All 27 Lines | |||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1} | ; X32-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi1: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_broadcastd_epi32: | ; X64-LABEL: test_mm_mask_broadcastd_epi32: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1} | ; X64-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1} | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg0 = bitcast <2 x i64> %a0 to <4 x i32> | %arg0 = bitcast <2 x i64> %a0 to <4 x i32> | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%arg2 = bitcast <2 x i64> %a2 to <4 x i32> | %arg2 = bitcast <2 x i64> %a2 to <4 x i32> | ||||
%res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <4 x i32> zeroinitializer | %res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <4 x i32> zeroinitializer | ||||
%res1 = select <4 x i1> %arg1, <4 x i32> %res0, <4 x i32> %arg0 | %res1 = select <4 x i1> %arg1, <4 x i32> %res0, <4 x i32> %arg0 | ||||
%res2 = bitcast <4 x i32> %res1 to <2 x i64> | %res2 = bitcast <4 x i32> %res1 to <2 x i64> | ||||
ret <2 x i64> %res2 | ret <2 x i64> %res2 | ||||
} | } | ||||
define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) { | define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) { | ||||
; X32-LABEL: test_mm_maskz_broadcastd_epi32: | ; X32-LABEL: test_mm_maskz_broadcastd_epi32: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi1: | ; X32-NEXT: .Lcfi2: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} | ; X32-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi3: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_broadcastd_epi32: | ; X64-LABEL: test_mm_maskz_broadcastd_epi32: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer | %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer | ||||
ret <2 x i64> %res | ret <2 x i64> %res | ||||
} | } | ||||
define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) { | define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) { | ||||
; X32-LABEL: test_mm_mask_broadcastq_epi64: | ; X32-LABEL: test_mm_mask_broadcastq_epi64: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi2: | ; X32-NEXT: .Lcfi4: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $3, %al | ; X32-NEXT: andb $3, %al | ||||
; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} | ; X32-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi5: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_broadcastq_epi64: | ; X64-LABEL: test_mm_mask_broadcastq_epi64: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $3, %dil | ; X64-NEXT: andb $3, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} | ; X64-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i2 | %trn1 = trunc i8 %a1 to i2 | ||||
%arg1 = bitcast i2 %trn1 to <2 x i1> | %arg1 = bitcast i2 %trn1 to <2 x i1> | ||||
%res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <2 x i32> zeroinitializer | %res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <2 x i32> zeroinitializer | ||||
%res1 = select <2 x i1> %arg1, <2 x i64> %res0, <2 x i64> %a0 | %res1 = select <2 x i1> %arg1, <2 x i64> %res0, <2 x i64> %a0 | ||||
ret <2 x i64> %res1 | ret <2 x i64> %res1 | ||||
} | } | ||||
define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) { | define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) { | ||||
; X32-LABEL: test_mm_maskz_broadcastq_epi64: | ; X32-LABEL: test_mm_maskz_broadcastq_epi64: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi3: | ; X32-NEXT: .Lcfi6: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $3, %al | ; X32-NEXT: andb $3, %al | ||||
; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} | ; X32-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi7: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_broadcastq_epi64: | ; X64-LABEL: test_mm_maskz_broadcastq_epi64: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $3, %dil | ; X64-NEXT: andb $3, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer | %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer | ||||
ret <4 x i64> %res | ret <4 x i64> %res | ||||
} | } | ||||
define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) { | define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) { | ||||
; X32-LABEL: test_mm256_mask_broadcastq_epi64: | ; X32-LABEL: test_mm256_mask_broadcastq_epi64: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi4: | ; X32-NEXT: .Lcfi8: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} | ; X32-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi9: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_mask_broadcastq_epi64: | ; X64-LABEL: test_mm256_mask_broadcastq_epi64: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} | ; X64-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> zeroinitializer | %res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> zeroinitializer | ||||
%res1 = select <4 x i1> %arg1, <4 x i64> %res0, <4 x i64> %a0 | %res1 = select <4 x i1> %arg1, <4 x i64> %res0, <4 x i64> %a0 | ||||
ret <4 x i64> %res1 | ret <4 x i64> %res1 | ||||
} | } | ||||
define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) { | define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) { | ||||
; X32-LABEL: test_mm256_maskz_broadcastq_epi64: | ; X32-LABEL: test_mm256_maskz_broadcastq_epi64: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi5: | ; X32-NEXT: .Lcfi10: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} | ; X32-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi11: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_maskz_broadcastq_epi64: | ; X64-LABEL: test_mm256_maskz_broadcastq_epi64: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer | %res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer | ||||
ret <2 x double> %res | ret <2 x double> %res | ||||
} | } | ||||
define <2 x double> @test_mm_mask_broadcastsd_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) { | define <2 x double> @test_mm_mask_broadcastsd_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) { | ||||
; X32-LABEL: test_mm_mask_broadcastsd_pd: | ; X32-LABEL: test_mm_mask_broadcastsd_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi6: | ; X32-NEXT: .Lcfi12: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $3, %al | ; X32-NEXT: andb $3, %al | ||||
; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] | ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi13: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_broadcastsd_pd: | ; X64-LABEL: test_mm_mask_broadcastsd_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $3, %dil | ; X64-NEXT: andb $3, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] | ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i2 | %trn1 = trunc i8 %a1 to i2 | ||||
%arg1 = bitcast i2 %trn1 to <2 x i1> | %arg1 = bitcast i2 %trn1 to <2 x i1> | ||||
%res0 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer | %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer | ||||
%res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0 | %res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0 | ||||
ret <2 x double> %res1 | ret <2 x double> %res1 | ||||
} | } | ||||
define <2 x double> @test_mm_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) { | define <2 x double> @test_mm_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) { | ||||
; X32-LABEL: test_mm_maskz_broadcastsd_pd: | ; X32-LABEL: test_mm_maskz_broadcastsd_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi7: | ; X32-NEXT: .Lcfi14: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $3, %al | ; X32-NEXT: andb $3, %al | ||||
; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] | ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi15: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_broadcastsd_pd: | ; X64-LABEL: test_mm_maskz_broadcastsd_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $3, %dil | ; X64-NEXT: andb $3, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer | %res = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer | ||||
ret <4 x double> %res | ret <4 x double> %res | ||||
} | } | ||||
define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %a0, i8 %a1, <2 x double> %a2) { | define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %a0, i8 %a1, <2 x double> %a2) { | ||||
; X32-LABEL: test_mm256_mask_broadcastsd_pd: | ; X32-LABEL: test_mm256_mask_broadcastsd_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi8: | ; X32-NEXT: .Lcfi16: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} | ; X32-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi17: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_mask_broadcastsd_pd: | ; X64-LABEL: test_mm256_mask_broadcastsd_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} | ; X64-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <2 x double> %a2, <2 x double> undef, <4 x i32> zeroinitializer | %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <4 x i32> zeroinitializer | ||||
%res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0 | %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0 | ||||
ret <4 x double> %res1 | ret <4 x double> %res1 | ||||
} | } | ||||
define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) { | define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) { | ||||
; X32-LABEL: test_mm256_maskz_broadcastsd_pd: | ; X32-LABEL: test_mm256_maskz_broadcastsd_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi9: | ; X32-NEXT: .Lcfi18: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} | ; X32-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi19: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_maskz_broadcastsd_pd: | ; X64-LABEL: test_mm256_maskz_broadcastsd_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer | %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer | ||||
ret <4 x float> %res | ret <4 x float> %res | ||||
} | } | ||||
define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) { | define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) { | ||||
; X32-LABEL: test_mm_mask_broadcastss_ps: | ; X32-LABEL: test_mm_mask_broadcastss_ps: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi10: | ; X32-NEXT: .Lcfi20: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} | ; X32-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi21: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_broadcastss_ps: | ; X64-LABEL: test_mm_mask_broadcastss_ps: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} | ; X64-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> zeroinitializer | %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> zeroinitializer | ||||
%res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0 | %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0 | ||||
ret <4 x float> %res1 | ret <4 x float> %res1 | ||||
} | } | ||||
define <4 x float> @test_mm_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) { | define <4 x float> @test_mm_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) { | ||||
; X32-LABEL: test_mm_maskz_broadcastss_ps: | ; X32-LABEL: test_mm_maskz_broadcastss_ps: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi11: | ; X32-NEXT: .Lcfi22: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} | ; X32-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi23: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_broadcastss_ps: | ; X64-LABEL: test_mm_maskz_broadcastss_ps: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer | %res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer | ||||
ret <2 x double> %res | ret <2 x double> %res | ||||
} | } | ||||
define <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) { | define <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) { | ||||
; X32-LABEL: test_mm_mask_movddup_pd: | ; X32-LABEL: test_mm_mask_movddup_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi12: | ; X32-NEXT: .Lcfi24: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $3, %al | ; X32-NEXT: andb $3, %al | ||||
; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] | ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi25: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_movddup_pd: | ; X64-LABEL: test_mm_mask_movddup_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $3, %dil | ; X64-NEXT: andb $3, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] | ; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i2 | %trn1 = trunc i8 %a1 to i2 | ||||
%arg1 = bitcast i2 %trn1 to <2 x i1> | %arg1 = bitcast i2 %trn1 to <2 x i1> | ||||
%res0 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer | %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer | ||||
%res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0 | %res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0 | ||||
ret <2 x double> %res1 | ret <2 x double> %res1 | ||||
} | } | ||||
define <2 x double> @test_mm_maskz_movddup_pd(i8 %a0, <2 x double> %a1) { | define <2 x double> @test_mm_maskz_movddup_pd(i8 %a0, <2 x double> %a1) { | ||||
; X32-LABEL: test_mm_maskz_movddup_pd: | ; X32-LABEL: test_mm_maskz_movddup_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi13: | ; X32-NEXT: .Lcfi26: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $3, %al | ; X32-NEXT: andb $3, %al | ||||
; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] | ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi27: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_movddup_pd: | ; X64-LABEL: test_mm_maskz_movddup_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $3, %dil | ; X64-NEXT: andb $3, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | ||||
ret <4 x double> %res | ret <4 x double> %res | ||||
} | } | ||||
define <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) { | define <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) { | ||||
; X32-LABEL: test_mm256_mask_movddup_pd: | ; X32-LABEL: test_mm256_mask_movddup_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi14: | ; X32-NEXT: .Lcfi28: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2] | ; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi29: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_mask_movddup_pd: | ; X64-LABEL: test_mm256_mask_movddup_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2] | ; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <4 x double> %a2, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | %res0 = shufflevector <4 x double> %a2, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | ||||
%res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0 | %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0 | ||||
ret <4 x double> %res1 | ret <4 x double> %res1 | ||||
} | } | ||||
define <4 x double> @test_mm256_maskz_movddup_pd(i8 %a0, <4 x double> %a1) { | define <4 x double> @test_mm256_maskz_movddup_pd(i8 %a0, <4 x double> %a1) { | ||||
; X32-LABEL: test_mm256_maskz_movddup_pd: | ; X32-LABEL: test_mm256_maskz_movddup_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi15: | ; X32-NEXT: .Lcfi30: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] | ; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi31: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_maskz_movddup_pd: | ; X64-LABEL: test_mm256_maskz_movddup_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> | %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> | ||||
ret <4 x float> %res | ret <4 x float> %res | ||||
} | } | ||||
define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) { | define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) { | ||||
; X32-LABEL: test_mm_mask_movehdup_ps: | ; X32-LABEL: test_mm_mask_movehdup_ps: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi16: | ; X32-NEXT: .Lcfi32: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3] | ; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi33: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_movehdup_ps: | ; X64-LABEL: test_mm_mask_movehdup_ps: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3] | ; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> | %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> | ||||
%res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0 | %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0 | ||||
ret <4 x float> %res1 | ret <4 x float> %res1 | ||||
} | } | ||||
define <4 x float> @test_mm_maskz_movehdup_ps(i8 %a0, <4 x float> %a1) { | define <4 x float> @test_mm_maskz_movehdup_ps(i8 %a0, <4 x float> %a1) { | ||||
; X32-LABEL: test_mm_maskz_movehdup_ps: | ; X32-LABEL: test_mm_maskz_movehdup_ps: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi17: | ; X32-NEXT: .Lcfi34: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] | ; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi35: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_movehdup_ps: | ; X64-LABEL: test_mm_maskz_movehdup_ps: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | ||||
ret <4 x float> %res | ret <4 x float> %res | ||||
} | } | ||||
define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) { | define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) { | ||||
; X32-LABEL: test_mm_mask_moveldup_ps: | ; X32-LABEL: test_mm_mask_moveldup_ps: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi18: | ; X32-NEXT: .Lcfi36: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2] | ; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi37: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_moveldup_ps: | ; X64-LABEL: test_mm_mask_moveldup_ps: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2] | ; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> | ||||
%res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0 | %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0 | ||||
ret <4 x float> %res1 | ret <4 x float> %res1 | ||||
} | } | ||||
define <4 x float> @test_mm_maskz_moveldup_ps(i8 %a0, <4 x float> %a1) { | define <4 x float> @test_mm_maskz_moveldup_ps(i8 %a0, <4 x float> %a1) { | ||||
; X32-LABEL: test_mm_maskz_moveldup_ps: | ; X32-LABEL: test_mm_maskz_moveldup_ps: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi19: | ; X32-NEXT: .Lcfi38: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] | ; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi39: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_moveldup_ps: | ; X64-LABEL: test_mm_maskz_moveldup_ps: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> | %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> | ||||
ret <4 x i64> %res | ret <4 x i64> %res | ||||
} | } | ||||
define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64> %a2) { | define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64> %a2) { | ||||
; X32-LABEL: test_mm256_mask_permutex_epi64: | ; X32-LABEL: test_mm256_mask_permutex_epi64: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi20: | ; X32-NEXT: .Lcfi40: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] | ; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi41: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_mask_permutex_epi64: | ; X64-LABEL: test_mm256_mask_permutex_epi64: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] | ; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0> | %res0 = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0> | ||||
%res1 = select <4 x i1> %arg1, <4 x i64> %res0, <4 x i64> %a0 | %res1 = select <4 x i1> %arg1, <4 x i64> %res0, <4 x i64> %a0 | ||||
ret <4 x i64> %res1 | ret <4 x i64> %res1 | ||||
} | } | ||||
define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 %a0, <4 x i64> %a1) { | define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 %a0, <4 x i64> %a1) { | ||||
; X32-LABEL: test_mm256_maskz_permutex_epi64: | ; X32-LABEL: test_mm256_maskz_permutex_epi64: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi21: | ; X32-NEXT: .Lcfi42: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0] | ; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi43: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_maskz_permutex_epi64: | ; X64-LABEL: test_mm256_maskz_permutex_epi64: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> | %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0> | ||||
ret <4 x double> %res | ret <4 x double> %res | ||||
} | } | ||||
define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) { | define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) { | ||||
; X32-LABEL: test_mm256_mask_permutex_pd: | ; X32-LABEL: test_mm256_mask_permutex_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi22: | ; X32-NEXT: .Lcfi44: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] | ; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi45: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_mask_permutex_pd: | ; X64-LABEL: test_mm256_mask_permutex_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] | ; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <4 x double> %a2, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0> | %res0 = shufflevector <4 x double> %a2, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0> | ||||
%res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0 | %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0 | ||||
ret <4 x double> %res1 | ret <4 x double> %res1 | ||||
} | } | ||||
define <4 x double> @test_mm256_maskz_permutex_pd(i8 %a0, <4 x double> %a1) { | define <4 x double> @test_mm256_maskz_permutex_pd(i8 %a0, <4 x double> %a1) { | ||||
; X32-LABEL: test_mm256_maskz_permutex_pd: | ; X32-LABEL: test_mm256_maskz_permutex_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi23: | ; X32-NEXT: .Lcfi46: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0] | ; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi47: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_maskz_permutex_pd: | ; X64-LABEL: test_mm256_maskz_permutex_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> | %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> | ||||
ret <2 x double> %res | ret <2 x double> %res | ||||
} | } | ||||
define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2, <2 x double> %a3) { | define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2, <2 x double> %a3) { | ||||
; X32-LABEL: test_mm_mask_shuffle_pd: | ; X32-LABEL: test_mm_mask_shuffle_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi24: | ; X32-NEXT: .Lcfi48: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $3, %al | ; X32-NEXT: andb $3, %al | ||||
; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1] | ; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi49: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_shuffle_pd: | ; X64-LABEL: test_mm_mask_shuffle_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $3, %dil | ; X64-NEXT: andb $3, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1] | ; X64-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i2 | %trn1 = trunc i8 %a1 to i2 | ||||
%arg1 = bitcast i2 %trn1 to <2 x i1> | %arg1 = bitcast i2 %trn1 to <2 x i1> | ||||
%res0 = shufflevector <2 x double> %a2, <2 x double> %a3, <2 x i32> <i32 1, i32 3> | %res0 = shufflevector <2 x double> %a2, <2 x double> %a3, <2 x i32> <i32 1, i32 3> | ||||
%res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0 | %res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0 | ||||
ret <2 x double> %res1 | ret <2 x double> %res1 | ||||
} | } | ||||
define <2 x double> @test_mm_maskz_shuffle_pd(i8 %a0, <2 x double> %a1, <2 x double> %a2) { | define <2 x double> @test_mm_maskz_shuffle_pd(i8 %a0, <2 x double> %a1, <2 x double> %a2) { | ||||
; X32-LABEL: test_mm_maskz_shuffle_pd: | ; X32-LABEL: test_mm_maskz_shuffle_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi25: | ; X32-NEXT: .Lcfi50: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $3, %al | ; X32-NEXT: andb $3, %al | ||||
; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ; X32-NEXT: movb %al, {{[0-9]+}}(%esp) | ||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] | ; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi51: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_shuffle_pd: | ; X64-LABEL: test_mm_maskz_shuffle_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $3, %dil | ; X64-NEXT: andb $3, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 2, i32 6> | %res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 2, i32 6> | ||||
ret <4 x double> %res | ret <4 x double> %res | ||||
} | } | ||||
define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2, <4 x double> %a3) { | define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2, <4 x double> %a3) { | ||||
; X32-LABEL: test_mm256_mask_shuffle_pd: | ; X32-LABEL: test_mm256_mask_shuffle_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi26: | ; X32-NEXT: .Lcfi52: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2] | ; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi53: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_mask_shuffle_pd: | ; X64-LABEL: test_mm256_mask_shuffle_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2] | ; X64-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <4 x double> %a2, <4 x double> %a3, <4 x i32> <i32 1, i32 5, i32 2, i32 6> | %res0 = shufflevector <4 x double> %a2, <4 x double> %a3, <4 x i32> <i32 1, i32 5, i32 2, i32 6> | ||||
%res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0 | %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0 | ||||
ret <4 x double> %res1 | ret <4 x double> %res1 | ||||
} | } | ||||
define <4 x double> @test_mm256_maskz_shuffle_pd(i8 %a0, <4 x double> %a1, <4 x double> %a2) { | define <4 x double> @test_mm256_maskz_shuffle_pd(i8 %a0, <4 x double> %a1, <4 x double> %a2) { | ||||
; X32-LABEL: test_mm256_maskz_shuffle_pd: | ; X32-LABEL: test_mm256_maskz_shuffle_pd: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi27: | ; X32-NEXT: .Lcfi54: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2] | ; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi55: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm256_maskz_shuffle_pd: | ; X64-LABEL: test_mm256_maskz_shuffle_pd: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
Show All 19 Lines | ; X64-NEXT: retq | ||||
%res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 4> | %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 4> | ||||
ret <4 x float> %res | ret <4 x float> %res | ||||
} | } | ||||
define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2, <4 x float> %a3) { | define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2, <4 x float> %a3) { | ||||
; X32-LABEL: test_mm_mask_shuffle_ps: | ; X32-LABEL: test_mm_mask_shuffle_ps: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi28: | ; X32-NEXT: .Lcfi56: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0] | ; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi57: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_mask_shuffle_ps: | ; X64-LABEL: test_mm_mask_shuffle_ps: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
; X64-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0] | ; X64-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0] | ||||
; X64-NEXT: retq | ; X64-NEXT: retq | ||||
%trn1 = trunc i8 %a1 to i4 | %trn1 = trunc i8 %a1 to i4 | ||||
%arg1 = bitcast i4 %trn1 to <4 x i1> | %arg1 = bitcast i4 %trn1 to <4 x i1> | ||||
%res0 = shufflevector <4 x float> %a2, <4 x float> %a3, <4 x i32> <i32 0, i32 1, i32 4, i32 4> | %res0 = shufflevector <4 x float> %a2, <4 x float> %a3, <4 x i32> <i32 0, i32 1, i32 4, i32 4> | ||||
%res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0 | %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0 | ||||
ret <4 x float> %res1 | ret <4 x float> %res1 | ||||
} | } | ||||
define <4 x float> @test_mm_maskz_shuffle_ps(i8 %a0, <4 x float> %a1, <4 x float> %a2) { | define <4 x float> @test_mm_maskz_shuffle_ps(i8 %a0, <4 x float> %a1, <4 x float> %a2) { | ||||
; X32-LABEL: test_mm_maskz_shuffle_ps: | ; X32-LABEL: test_mm_maskz_shuffle_ps: | ||||
; X32: # BB#0: | ; X32: # BB#0: | ||||
; X32-NEXT: pushl %eax | ; X32-NEXT: pushl %eax | ||||
; X32-NEXT: .Lcfi29: | ; X32-NEXT: .Lcfi58: | ||||
; X32-NEXT: .cfi_def_cfa_offset 8 | ; X32-NEXT: .cfi_def_cfa_offset 8 | ||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ; X32-NEXT: movb {{[0-9]+}}(%esp), %al | ||||
; X32-NEXT: andb $15, %al | ; X32-NEXT: andb $15, %al | ||||
; X32-NEXT: movb %al, (%esp) | ; X32-NEXT: movb %al, (%esp) | ||||
; X32-NEXT: movzbl (%esp), %eax | ; X32-NEXT: movzbl (%esp), %eax | ||||
; X32-NEXT: kmovw %eax, %k1 | ; X32-NEXT: kmovw %eax, %k1 | ||||
; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0] | ; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0] | ||||
; X32-NEXT: popl %eax | ; X32-NEXT: popl %eax | ||||
; X32-NEXT: .Lcfi59: | |||||
; X32-NEXT: .cfi_def_cfa_offset 4 | |||||
; X32-NEXT: retl | ; X32-NEXT: retl | ||||
; | ; | ||||
; X64-LABEL: test_mm_maskz_shuffle_ps: | ; X64-LABEL: test_mm_maskz_shuffle_ps: | ||||
; X64: # BB#0: | ; X64: # BB#0: | ||||
; X64-NEXT: andb $15, %dil | ; X64-NEXT: andb $15, %dil | ||||
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp) | ||||
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax | ||||
; X64-NEXT: kmovw %eax, %k1 | ; X64-NEXT: kmovw %eax, %k1 | ||||
▲ Show 20 Lines • Show All 62 Lines • Show Last 20 Lines |