Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1242,8 +1242,9 @@ (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), - [(set _.RC:$dst, (X86select _.KRCWM:$mask, (_.VT _.RC:$src1), - (_.VT _.RC:$src2)))]>, EVEX_4V, EVEX_K; + [(set _.RC:$dst, (vselect _.KRCWM:$mask, + (_.VT _.RC:$src2), + (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K; let hasSideEffects = 0 in def rrkz : AVX5128I, + [(set _.RC:$dst, (vselect _.KRCWM:$mask, + (_.VT (bitconvert (_.LdFrag addr:$src2))), + (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>; let mayLoad = 1, hasSideEffects = 0 in def rmkz : AVX5128I, + [(set _.RC:$dst,(vselect _.KRCWM:$mask, + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; let mayLoad = 1, hasSideEffects = 0 in Index: test/CodeGen/X86/avx512-bugfix-23634.ll =================================================================== --- test/CodeGen/X86/avx512-bugfix-23634.ll +++ test/CodeGen/X86/avx512-bugfix-23634.ll @@ -1,13 +1,26 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK-LABEL: f_fu -; CHECK-NOT: vpblend -; CHECK: vmovdqa32 {{.*}} {%k1} - define void @f_fu(float* %ret, float* %aa, float %b) { +; CHECK-LABEL: f_fu: +; CHECK: ## BB#0: ## %allocas +; CHECK-NEXT: vcvttss2si %xmm0, %eax +; CHECK-NEXT: vpbroadcastd %eax, %zmm0 +; CHECK-NEXT: vcvttps2dq (%rsi), %zmm1 +; CHECK-NEXT: vpsrld $31, %zmm0, %zmm2 +; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm2 +; CHECK-NEXT: vpsrad $1, %zmm2, %zmm2 +; CHECK-NEXT: movw $-21846, %ax ## imm = 0xAAAA +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpblendmd {{.*}}(%rip), %zmm1, %zmm1 {%k1} +; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 +; CHECK-NEXT: vmovups %zmm0, (%rdi) +; CHECK-NEXT: retq allocas: %ptr_cast_for_load = bitcast float* %aa to <16 x float>* %ptr_masked_load.39 = load <16 x float>, <16 x float>* %ptr_cast_for_load, align 4 @@ -23,13 +36,13 @@ %v1.i = select <16 x i1> , <16 x i32> , <16 x i32> %a_load_to_int32 - %foo_test = add <16 x i32> %div_v019_load_, %b_load_to_int32_broadcast + %foo_test = add <16 x i32> %div_v019_load_, %b_load_to_int32_broadcast - %add_struct_offset_y_struct_offset33_x = add <16 x i32> %foo_test, %v1.i + %add_struct_offset_y_struct_offset33_x = add <16 x i32> %foo_test, %v1.i %val = sitofp <16 x i32> %add_struct_offset_y_struct_offset33_x to <16 x float> %ptrcast = bitcast float* %ret to <16 x float>* store <16 x float> %val, <16 x float>* %ptrcast, align 4 ret void -} \ No newline at end of file +} Index: test/CodeGen/X86/avx512-mov.ll =================================================================== --- test/CodeGen/X86/avx512-mov.ll +++ test/CodeGen/X86/avx512-mov.ll @@ -1,279 +1,320 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s -; CHECK-LABEL: @test1 -; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 -; CHECK: ret define i32 @test1(float %x) { +; CHECK-LABEL: test1: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] %res = bitcast float %x to i32 ret i32 %res } -; CHECK-LABEL: @test2 -; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 -; CHECK: ret define <4 x i32> @test2(i32 %x) { +; CHECK-LABEL: test2: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] +; CHECK-NEXT: retq ## encoding: [0xc3] %res = insertelement <4 x i32>undef, i32 %x, i32 0 ret <4 x i32>%res } -; CHECK-LABEL: @test3 -; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 -; CHECK: ret define <2 x i64> @test3(i64 %x) { +; CHECK-LABEL: test3: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7] +; CHECK-NEXT: retq ## encoding: [0xc3] %res = insertelement <2 x i64>undef, i64 %x, i32 0 ret <2 x i64>%res } -; CHECK-LABEL: @test4 -; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 -; CHECK: ret define <4 x i32> @test4(i32* %x) { +; CHECK-LABEL: test4: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] +; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq ## encoding: [0xc3] %y = load i32, i32* %x %res = insertelement <4 x i32>undef, i32 %y, i32 0 ret <4 x i32>%res } -; CHECK-LABEL: @test5 -; CHECK: vmovss %xmm0, (%rdi) ## encoding: [0x62 -; CHECK: ret define void @test5(float %x, float* %y) { +; CHECK-LABEL: test5: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovss %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x11,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] store float %x, float* %y, align 4 ret void } -; CHECK-LABEL: @test6 -; CHECK: vmovsd %xmm0, (%rdi) ## encoding: [0x62 -; CHECK: ret define void @test6(double %x, double* %y) { +; CHECK-LABEL: test6: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x11,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] store double %x, double* %y, align 8 ret void } -; CHECK-LABEL: @test7 -; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 -; CHECK: ret define float @test7(i32* %x) { +; CHECK-LABEL: test7: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] +; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq ## encoding: [0xc3] %y = load i32, i32* %x %res = bitcast i32 %y to float ret float %res } -; CHECK-LABEL: @test8 -; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 -; CHECK: ret define i32 @test8(<4 x i32> %x) { +; CHECK-LABEL: test8: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] %res = extractelement <4 x i32> %x, i32 0 ret i32 %res } -; CHECK-LABEL: @test9 -; CHECK: vmovq %xmm0, %rax ## encoding: [0x62 -; CHECK: ret define i64 @test9(<2 x i64> %x) { +; CHECK-LABEL: test9: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovq %xmm0, %rax ## encoding: [0x62,0xf1,0xfd,0x08,0x7e,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] %res = extractelement <2 x i64> %x, i32 0 ret i64 %res } -; CHECK-LABEL: @test10 -; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 -; CHECK: ret define <4 x i32> @test10(i32* %x) { +; CHECK-LABEL: test10: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] +; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq ## encoding: [0xc3] %y = load i32, i32* %x, align 4 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 ret <4 x i32>%res } -; CHECK-LABEL: @test11 -; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 -; CHECK: ret define <4 x float> @test11(float* %x) { +; CHECK-LABEL: test11: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] +; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq ## encoding: [0xc3] %y = load float, float* %x, align 4 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 ret <4 x float>%res } -; CHECK-LABEL: @test12 -; CHECK: vmovsd (%rdi), %xmm0 ## encoding: [0x62 -; CHECK: ret define <2 x double> @test12(double* %x) { +; CHECK-LABEL: test12: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovsd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x10,0x07] +; CHECK-NEXT: ## xmm0 = mem[0],zero +; CHECK-NEXT: retq ## encoding: [0xc3] %y = load double, double* %x, align 8 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 ret <2 x double>%res } -; CHECK-LABEL: @test13 -; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 -; CHECK: ret define <2 x i64> @test13(i64 %x) { +; CHECK-LABEL: test13: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7] +; CHECK-NEXT: retq ## encoding: [0xc3] %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 ret <2 x i64>%res } -; CHECK-LABEL: @test14 -; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 -; CHECK: ret define <4 x i32> @test14(i32 %x) { +; CHECK-LABEL: test14: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] +; CHECK-NEXT: retq ## encoding: [0xc3] %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 ret <4 x i32>%res } -; CHECK-LABEL: @test15 -; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 -; CHECK: ret define <4 x i32> @test15(i32* %x) { +; CHECK-LABEL: test15: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] +; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq ## encoding: [0xc3] %y = load i32, i32* %x, align 4 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 ret <4 x i32>%res } -; CHECK-LABEL: test16 -; CHECK: vmovdqu32 -; CHECK: ret define <16 x i32> @test16(i8 * %addr) { +; CHECK-LABEL: test16: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i32>* %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 ret <16 x i32>%res } -; CHECK-LABEL: test17 -; CHECK: vmovdqa32 -; CHECK: ret define <16 x i32> @test17(i8 * %addr) { +; CHECK-LABEL: test17: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i32>* %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 ret <16 x i32>%res } -; CHECK-LABEL: test18 -; CHECK: vmovdqa64 -; CHECK: ret define void @test18(i8 * %addr, <8 x i64> %data) { +; CHECK-LABEL: test18: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 64 ret void } -; CHECK-LABEL: test19 -; CHECK: vmovdqu32 -; CHECK: ret define void @test19(i8 * %addr, <16 x i32> %data) { +; CHECK-LABEL: test19: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 1 ret void } -; CHECK-LABEL: test20 -; CHECK: vmovdqa32 -; CHECK: ret define void @test20(i8 * %addr, <16 x i32> %data) { +; CHECK-LABEL: test20: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 64 ret void } -; CHECK-LABEL: test21 -; CHECK: vmovdqa64 -; CHECK: ret define <8 x i64> @test21(i8 * %addr) { +; CHECK-LABEL: test21: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i64>* %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 ret <8 x i64>%res } -; CHECK-LABEL: test22 -; CHECK: vmovdqu64 -; CHECK: ret define void @test22(i8 * %addr, <8 x i64> %data) { +; CHECK-LABEL: test22: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 1 ret void } -; CHECK-LABEL: test23 -; CHECK: vmovdqu64 -; CHECK: ret define <8 x i64> @test23(i8 * %addr) { +; CHECK-LABEL: test23: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i64>* %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 ret <8 x i64>%res } -; CHECK-LABEL: test24 -; CHECK: vmovapd -; CHECK: ret define void @test24(i8 * %addr, <8 x double> %data) { +; CHECK-LABEL: test24: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovapd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x double>* store <8 x double>%data, <8 x double>* %vaddr, align 64 ret void } -; CHECK-LABEL: test25 -; CHECK: vmovapd -; CHECK: ret define <8 x double> @test25(i8 * %addr) { +; CHECK-LABEL: test25: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x double>* %res = load <8 x double>, <8 x double>* %vaddr, align 64 ret <8 x double>%res } -; CHECK-LABEL: test26 -; CHECK: vmovaps -; CHECK: ret define void @test26(i8 * %addr, <16 x float> %data) { +; CHECK-LABEL: test26: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x float>* store <16 x float>%data, <16 x float>* %vaddr, align 64 ret void } -; CHECK-LABEL: test27 -; CHECK: vmovaps -; CHECK: ret define <16 x float> @test27(i8 * %addr) { +; CHECK-LABEL: test27: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x float>* %res = load <16 x float>, <16 x float>* %vaddr, align 64 ret <16 x float>%res } -; CHECK-LABEL: test28 -; CHECK: vmovupd -; CHECK: ret define void @test28(i8 * %addr, <8 x double> %data) { +; CHECK-LABEL: test28: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x double>* store <8 x double>%data, <8 x double>* %vaddr, align 1 ret void } -; CHECK-LABEL: test29 -; CHECK: vmovupd -; CHECK: ret define <8 x double> @test29(i8 * %addr) { +; CHECK-LABEL: test29: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x double>* %res = load <8 x double>, <8 x double>* %vaddr, align 1 ret <8 x double>%res } -; CHECK-LABEL: test30 -; CHECK: vmovups -; CHECK: ret define void @test30(i8 * %addr, <16 x float> %data) { +; CHECK-LABEL: test30: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x float>* store <16 x float>%data, <16 x float>* %vaddr, align 1 ret void } -; CHECK-LABEL: test31 -; CHECK: vmovups -; CHECK: ret define <16 x float> @test31(i8 * %addr) { +; CHECK-LABEL: test31: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x float>* %res = load <16 x float>, <16 x float>* %vaddr, align 1 ret <16 x float>%res } -; CHECK-LABEL: test32 -; CHECK: vmovdqa32{{.*{%k[1-7]} }} -; CHECK: ret define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { +; CHECK-LABEL: test32: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 @@ -281,10 +322,13 @@ ret <16 x i32>%res } -; CHECK-LABEL: test33 -; CHECK: vmovdqu32{{.*{%k[1-7]} }} -; CHECK: ret define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { +; CHECK-LABEL: test33: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 @@ -292,10 +336,13 @@ ret <16 x i32>%res } -; CHECK-LABEL: test34 -; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }} -; CHECK: ret define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) { +; CHECK-LABEL: test34: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 @@ -303,10 +350,13 @@ ret <16 x i32>%res } -; CHECK-LABEL: test35 -; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }} -; CHECK: ret define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) { +; CHECK-LABEL: test35: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 @@ -314,10 +364,13 @@ ret <16 x i32>%res } -; CHECK-LABEL: test36 -; CHECK: vmovdqa64{{.*{%k[1-7]} }} -; CHECK: ret define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { +; CHECK-LABEL: test36: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 @@ -325,10 +378,13 @@ ret <8 x i64>%res } -; CHECK-LABEL: test37 -; CHECK: vmovdqu64{{.*{%k[1-7]} }} -; CHECK: ret define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { +; CHECK-LABEL: test37: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 @@ -336,10 +392,13 @@ ret <8 x i64>%res } -; CHECK-LABEL: test38 -; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }} -; CHECK: ret define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) { +; CHECK-LABEL: test38: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 @@ -347,10 +406,13 @@ ret <8 x i64>%res } -; CHECK-LABEL: test39 -; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }} -; CHECK: ret define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) { +; CHECK-LABEL: test39: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 @@ -358,10 +420,14 @@ ret <8 x i64>%res } -; CHECK-LABEL: test40 -; CHECK: vmovaps{{.*{%k[1-7]} }} -; CHECK: ret define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { +; CHECK-LABEL: test40: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] +; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] +; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] +; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 64 @@ -369,10 +435,14 @@ ret <16 x float>%res } -; CHECK-LABEL: test41 -; CHECK: vmovups{{.*{%k[1-7]} }} -; CHECK: ret define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { +; CHECK-LABEL: test41: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] +; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] +; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] +; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 1 @@ -380,10 +450,14 @@ ret <16 x float>%res } -; CHECK-LABEL: test42 -; CHECK: vmovaps{{.*{%k[1-7]} {z} }} -; CHECK: ret define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) { +; CHECK-LABEL: test42: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] +; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07] +; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 64 @@ -391,10 +465,14 @@ ret <16 x float>%res } -; CHECK-LABEL: test43 -; CHECK: vmovups{{.*{%k[1-7]} {z} }} -; CHECK: ret define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) { +; CHECK-LABEL: test43: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] +; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07] +; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04] +; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 1 @@ -402,10 +480,14 @@ ret <16 x float>%res } -; CHECK-LABEL: test44 -; CHECK: vmovapd{{.*{%k[1-7]} }} -; CHECK: ret define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { +; CHECK-LABEL: test44: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] +; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] +; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] +; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 64 @@ -413,10 +495,14 @@ ret <8 x double>%res } -; CHECK-LABEL: test45 -; CHECK: vmovupd{{.*{%k[1-7]} }} -; CHECK: ret define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { +; CHECK-LABEL: test45: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] +; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] +; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] +; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 1 @@ -424,10 +510,14 @@ ret <8 x double>%res } -; CHECK-LABEL: test46 -; CHECK: vmovapd{{.*{%k[1-7]} {z} }} -; CHECK: ret define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) { +; CHECK-LABEL: test46: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] +; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07] +; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04] +; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 64 @@ -435,10 +525,14 @@ ret <8 x double>%res } -; CHECK-LABEL: test47 -; CHECK: vmovupd{{.*{%k[1-7]} {z} }} -; CHECK: ret define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) { +; CHECK-LABEL: test47: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] +; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07] +; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04] +; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 1 Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -6,8 +6,7 @@ ; CHECK-LABEL: test1: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = fcmp ole <16 x float> %x, %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y @@ -18,8 +17,7 @@ ; CHECK-LABEL: test2: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = fcmp ole <8 x double> %x, %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y @@ -30,8 +28,7 @@ ; CHECK-LABEL: test3: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %yp, align 4 %mask = icmp eq <16 x i32> %x, %y @@ -43,8 +40,7 @@ ; CHECK-LABEL: test4_unsigned: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp uge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y @@ -55,8 +51,7 @@ ; CHECK-LABEL: test5: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y @@ -67,8 +62,7 @@ ; CHECK-LABEL: test6_unsigned: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp ugt <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y @@ -87,8 +81,7 @@ ; SKX: ## BB#0: ; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 -; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = fcmp olt <4 x float> %a, zeroinitializer @@ -108,8 +101,7 @@ ; SKX: ## BB#0: ; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 -; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %mask = fcmp olt <2 x double> %a, zeroinitializer %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b @@ -126,8 +118,7 @@ ; SKX-LABEL: test9: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 -; SKX-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %mask = icmp eq <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y @@ -144,8 +135,7 @@ ; SKX-LABEL: test10: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 -; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %mask = fcmp oeq <8 x float> %x, %y @@ -248,8 +238,7 @@ ; CHECK-LABEL: test16: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp sge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y @@ -260,8 +249,7 @@ ; CHECK-LABEL: test17: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sgt <16 x i32> %x, %y @@ -273,8 +261,7 @@ ; CHECK-LABEL: test18: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sle <16 x i32> %x, %y @@ -286,8 +273,7 @@ ; CHECK-LABEL: test19: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp ule <16 x i32> %x, %y @@ -300,8 +286,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp eq <16 x i32> %x1, %y1 %mask0 = icmp eq <16 x i32> %x, %y @@ -315,8 +300,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i64> %x1, %y1 %mask0 = icmp sle <8 x i64> %x, %y @@ -330,8 +314,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sgt <8 x i64> %x1, %y1 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 @@ -346,8 +329,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i32> %x1, %y1 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 @@ -361,8 +343,7 @@ ; CHECK-LABEL: test24: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 @@ -376,8 +357,7 @@ ; CHECK-LABEL: test25: ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 @@ -392,8 +372,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <16 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -410,8 +389,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1 ; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %mask1 = icmp sge <8 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -481,8 +459,7 @@ ; SKX-LABEL: test30: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 -; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %mask = fcmp oeq <4 x double> %x, %y @@ -500,8 +477,7 @@ ; SKX-LABEL: test31: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 -; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %y = load <2 x double>, <2 x double>* %yp, align 4 @@ -520,8 +496,7 @@ ; SKX-LABEL: test32: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 -; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %y = load <4 x double>, <4 x double>* %yp, align 4 @@ -534,8 +509,7 @@ ; CHECK-LABEL: test33: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <8 x double>, <8 x double>* %yp, align 4 %mask = fcmp olt <8 x double> %x, %y @@ -553,8 +527,7 @@ ; SKX-LABEL: test34: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 -; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %y = load <4 x float>, <4 x float>* %yp, align 4 %mask = fcmp olt <4 x float> %x, %y @@ -573,8 +546,7 @@ ; SKX-LABEL: test35: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 -; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %y = load <8 x float>, <8 x float>* %yp, align 4 @@ -587,8 +559,7 @@ ; CHECK-LABEL: test36: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %y = load <16 x float>, <16 x float>* %yp, align 4 %mask = fcmp olt <16 x float> %x, %y @@ -600,8 +571,7 @@ ; CHECK-LABEL: test37: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %a = load double, double* %ptr @@ -624,8 +594,7 @@ ; SKX-LABEL: test38: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 -; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %a = load double, double* %ptr @@ -648,8 +617,7 @@ ; SKX-LABEL: test39: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 -; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %a = load double, double* %ptr @@ -666,8 +634,7 @@ ; CHECK-LABEL: test40: ; CHECK: ## BB#0: ; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 -; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %a = load float, float* %ptr @@ -690,8 +657,7 @@ ; SKX-LABEL: test41: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 -; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq %a = load float, float* %ptr @@ -714,8 +680,7 @@ ; SKX-LABEL: test42: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 -; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq %a = load float, float* %ptr @@ -734,8 +699,7 @@ ; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} -; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; KNL-NEXT: vmovaps %zmm1, %zmm0 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: retq ; ; SKX-LABEL: test43: @@ -743,8 +707,7 @@ ; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ; SKX-NEXT: vpmovw2m %xmm2, %k1 ; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} -; SKX-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq %a = load double, double* %ptr Index: test/CodeGen/X86/avx512bw-mov.ll =================================================================== --- test/CodeGen/X86/avx512bw-mov.ll +++ test/CodeGen/X86/avx512bw-mov.ll @@ -18,10 +18,13 @@ ret void } -; CHECK-LABEL: test3 -; CHECK: vmovdqu8{{.*{%k[1-7]}}} -; CHECK: ret define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) { +; CHECK-LABEL: test3: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqb %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpblendmb (%rdi), %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ne <64 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <64 x i8>* %r = load <64 x i8>, <64 x i8>* %vaddr, align 1 @@ -58,10 +61,13 @@ ret void } -; CHECK-LABEL: test7 -; CHECK: vmovdqu16{{.*{%k[1-7]}}} -; CHECK: ret define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) { +; CHECK-LABEL: test7: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 +; CHECK-NEXT: vpcmpneqw %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpblendmw (%rdi), %zmm0, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ne <32 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <32 x i16>* %r = load <32 x i16>, <32 x i16>* %vaddr, align 1 Index: test/CodeGen/X86/avx512bw-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512bw-vec-cmp.ll +++ test/CodeGen/X86/avx512bw-vec-cmp.ll @@ -1,94 +1,105 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -; CHECK-LABEL: test1 -; CHECK: vpcmpeqb {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <64 x i8> @test1(<64 x i8> %x, <64 x i8> %y) nounwind { +; CHECK-LABEL: test1: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp eq <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y ret <64 x i8> %max } -; CHECK-LABEL: test2 -; CHECK: vpcmpgtb {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind { +; CHECK-LABEL: test2: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtb %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sgt <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y ret <64 x i8> %max } -; CHECK-LABEL: @test3 -; CHECK: vpcmplew {{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <32 x i16> @test3(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1) nounwind { +; CHECK-LABEL: test3: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k1 +; CHECK-NEXT: vpblendmw %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sge <32 x i16> %x, %y %max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y ret <32 x i16> %max } -; CHECK-LABEL: test4 -; CHECK: vpcmpnleub {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind { +; CHECK-LABEL: test4: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpnleub %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ugt <64 x i8> %x, %y %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y ret <64 x i8> %max } -; CHECK-LABEL: test5 -; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwind { +; CHECK-LABEL: test5: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k1 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %yp, align 4 %mask = icmp eq <32 x i16> %x, %y %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1 ret <32 x i16> %max } -; CHECK-LABEL: @test6 -; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test6: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k1 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp sgt <32 x i16> %x, %y %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1 ret <32 x i16> %max } -; CHECK-LABEL: @test7 -; CHECK: vpcmplew (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test7: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew (%rdi), %zmm0, %k1 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp sle <32 x i16> %x, %y %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1 ret <32 x i16> %max } -; CHECK-LABEL: @test8 -; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test8: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask = icmp ule <32 x i16> %x, %y %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1 ret <32 x i16> %max } -; CHECK-LABEL: @test9 -; CHECK: vpcmpeqw %zmm{{.*{%k[1-7]}}} -; CHECK: vmovdqu16 -; CHECK: ret define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16> %y1) nounwind { +; CHECK-LABEL: test9: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpcmpeqw %zmm3, %zmm2, %k1 {%k1} +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp eq <32 x i16> %x1, %y1 %mask0 = icmp eq <32 x i16> %x, %y %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer @@ -96,11 +107,13 @@ ret <32 x i16> %max } -; CHECK-LABEL: @test10 -; CHECK: vpcmpleb %zmm{{.*{%k[1-7]}}} -; CHECK: vmovdqu8 -; CHECK: ret define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y1) nounwind { +; CHECK-LABEL: test10: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleb %zmm1, %zmm0, %k1 +; CHECK-NEXT: vpcmpleb %zmm2, %zmm3, %k1 {%k1} +; CHECK-NEXT: vpblendmb %zmm0, %zmm2, %zmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <64 x i8> %x1, %y1 %mask0 = icmp sle <64 x i8> %x, %y %mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer @@ -108,11 +121,13 @@ ret <64 x i8> %max } -; CHECK-LABEL: @test11 -; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqu8 -; CHECK: ret define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i8> %y1) nounwind { +; CHECK-LABEL: test11: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtb %zmm2, %zmm1, %k1 +; CHECK-NEXT: vpcmpgtb (%rdi), %zmm0, %k1 {%k1} +; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sgt <64 x i8> %x1, %y1 %y = load <64 x i8>, <64 x i8>* %y.ptr, align 4 %mask0 = icmp sgt <64 x i8> %x, %y @@ -121,11 +136,13 @@ ret <64 x i8> %max } -; CHECK-LABEL: @test12 -; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqu16 -; CHECK: ret define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 x i16> %y1) nounwind { +; CHECK-LABEL: test12: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew %zmm1, %zmm2, %k1 +; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 {%k1} +; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <32 x i16> %x1, %y1 %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4 %mask0 = icmp ule <32 x i16> %x, %y Index: test/CodeGen/X86/avx512bwvl-mov.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-mov.ll +++ test/CodeGen/X86/avx512bwvl-mov.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s ; CHECK-LABEL: test_256_1 @@ -18,10 +19,13 @@ ret void } -; CHECK-LABEL: test_256_3 -; CHECK: vmovdqu8{{.*{%k[1-7]} }}## encoding: [0x62 -; CHECK: ret define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) { +; CHECK-LABEL: test_256_3: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpcmpneqb %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xca,0x04] +; CHECK-NEXT: vpblendmb (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x66,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <32 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <32 x i8>* %r = load <32 x i8>, <32 x i8>* %vaddr, align 1 @@ -58,10 +62,13 @@ ret void } -; CHECK-LABEL: test_256_7 -; CHECK: vmovdqu16{{.*{%k[1-7]} }}## encoding: [0x62 -; CHECK: ret define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) { +; CHECK-LABEL: test_256_7: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpcmpneqw %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x3f,0xca,0x04] +; CHECK-NEXT: vpblendmw (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x66,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i16>* %r = load <16 x i16>, <16 x i16>* %vaddr, align 1 @@ -98,10 +105,13 @@ ret void } -; CHECK-LABEL: test_128_3 -; CHECK: vmovdqu8{{.*{%k[1-7]} }}## encoding: [0x62 -; CHECK: ret define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) { +; CHECK-LABEL: test_128_3: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqb %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x3f,0xca,0x04] +; CHECK-NEXT: vpblendmb (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x66,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <16 x i8> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i8>* %r = load <16 x i8>, <16 x i8>* %vaddr, align 1 @@ -138,10 +148,13 @@ ret void } -; CHECK-LABEL: test_128_7 -; CHECK: vmovdqu16{{.*{%k[1-7]} }}## encoding: [0x62 -; CHECK: ret define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) { +; CHECK-LABEL: test_128_7: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqw %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3f,0xca,0x04] +; CHECK-NEXT: vpblendmw (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x66,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i16> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i16>* %r = load <8 x i16>, <8 x i16>* %vaddr, align 1 Index: test/CodeGen/X86/avx512bwvl-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-vec-cmp.ll +++ test/CodeGen/X86/avx512bwvl-vec-cmp.ll @@ -1,94 +1,105 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -; CHECK-LABEL: test256_1 -; CHECK: vpcmpeqb {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <32 x i8> @test256_1(<32 x i8> %x, <32 x i8> %y) nounwind { +; CHECK-LABEL: test256_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask = icmp eq <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y ret <32 x i8> %max } -; CHECK-LABEL: test256_2 -; CHECK: vpcmpgtb {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <32 x i8> @test256_2(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind { +; CHECK-LABEL: test256_2: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sgt <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1 ret <32 x i8> %max } -; CHECK-LABEL: @test256_3 -; CHECK: vpcmplew {{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <16 x i16> @test256_3(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1) nounwind { +; CHECK-LABEL: test256_3: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k1 +; CHECK-NEXT: vpblendmw %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sge <16 x i16> %x, %y %max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y ret <16 x i16> %max } -; CHECK-LABEL: test256_4 -; CHECK: vpcmpnleub {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind { +; CHECK-LABEL: test256_4: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ugt <32 x i8> %x, %y %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1 ret <32 x i8> %max } -; CHECK-LABEL: test256_5 -; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nounwind { +; CHECK-LABEL: test256_5: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k1 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %yp, align 4 %mask = icmp eq <16 x i16> %x, %y %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1 ret <16 x i16> %max } -; CHECK-LABEL: @test256_6 -; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test256_6: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k1 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp sgt <16 x i16> %x, %y %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1 ret <16 x i16> %max } -; CHECK-LABEL: @test256_7 -; CHECK: vpcmplew (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test256_7: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew (%rdi), %ymm0, %k1 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp sle <16 x i16> %x, %y %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1 ret <16 x i16> %max } -; CHECK-LABEL: @test256_8 -; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test256_8: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask = icmp ule <16 x i16> %x, %y %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1 ret <16 x i16> %max } -; CHECK-LABEL: @test256_9 -; CHECK: vpcmpeqw %ymm{{.*{%k[1-7]}}} -; CHECK: vmovdqu16 -; CHECK: ret define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x i16> %y1) nounwind { +; CHECK-LABEL: test256_9: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpcmpeqw %ymm3, %ymm2, %k1 {%k1} +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp eq <16 x i16> %x1, %y1 %mask0 = icmp eq <16 x i16> %x, %y %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer @@ -96,11 +107,13 @@ ret <16 x i16> %max } -; CHECK-LABEL: @test256_10 -; CHECK: vpcmpleb %ymm{{.*{%k[1-7]}}} -; CHECK: vmovdqu8 -; CHECK: ret define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8> %y1) nounwind { +; CHECK-LABEL: test256_10: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpcmpleb %ymm2, %ymm3, %k1 {%k1} +; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <32 x i8> %x1, %y1 %mask0 = icmp sle <32 x i8> %x, %y %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer @@ -108,11 +121,13 @@ ret <32 x i8> %max } -; CHECK-LABEL: @test256_11 -; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqu8 -; CHECK: ret define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 x i8> %y1) nounwind { +; CHECK-LABEL: test256_11: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtb %ymm2, %ymm1, %k1 +; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k1 {%k1} +; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sgt <32 x i8> %x1, %y1 %y = load <32 x i8>, <32 x i8>* %y.ptr, align 4 %mask0 = icmp sgt <32 x i8> %x, %y @@ -121,11 +136,13 @@ ret <32 x i8> %max } -; CHECK-LABEL: @test256_12 -; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqu16 -; CHECK: ret define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, <16 x i16> %y1) nounwind { +; CHECK-LABEL: test256_12: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew %ymm1, %ymm2, %k1 +; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 {%k1} +; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <16 x i16> %x1, %y1 %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4 %mask0 = icmp ule <16 x i16> %x, %y @@ -134,95 +151,105 @@ ret <16 x i16> %max } -; CHECK-LABEL: test128_1 -; CHECK: vpcmpeqb {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <16 x i8> @test128_1(<16 x i8> %x, <16 x i8> %y) nounwind { +; CHECK-LABEL: test128_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp eq <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %max } -; CHECK-LABEL: test128_2 -; CHECK: vpcmpgtb {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <16 x i8> @test128_2(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind { +; CHECK-LABEL: test128_2: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sgt <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1 ret <16 x i8> %max } -; CHECK-LABEL: @test128_3 -; CHECK: vpcmplew {{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <8 x i16> @test128_3(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1) nounwind { +; CHECK-LABEL: test128_3: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k1 +; CHECK-NEXT: vpblendmw %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sge <8 x i16> %x, %y %max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y ret <8 x i16> %max } -; CHECK-LABEL: test128_4 -; CHECK: vpcmpnleub {{.*%k[0-7]}} -; CHECK: vmovdqu8 {{.*}}%k1 -; CHECK: ret define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind { +; CHECK-LABEL: test128_4: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ugt <16 x i8> %x, %y %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1 ret <16 x i8> %max } -; CHECK-LABEL: test128_5 -; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwind { +; CHECK-LABEL: test128_5: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %yp, align 4 %mask = icmp eq <8 x i16> %x, %y %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1 ret <8 x i16> %max } -; CHECK-LABEL: @test128_6 -; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test128_6: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp sgt <8 x i16> %x, %y %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1 ret <8 x i16> %max } -; CHECK-LABEL: @test128_7 -; CHECK: vpcmplew (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test128_7: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew (%rdi), %xmm0, %k1 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp sle <8 x i16> %x, %y %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1 ret <8 x i16> %max } -; CHECK-LABEL: @test128_8 -; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqu16 -; CHECK: ret define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind { +; CHECK-LABEL: test128_8: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask = icmp ule <8 x i16> %x, %y %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1 ret <8 x i16> %max } -; CHECK-LABEL: @test128_9 -; CHECK: vpcmpeqw %xmm{{.*{%k[1-7]}}} -; CHECK: vmovdqu16 -; CHECK: ret define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> %y1) nounwind { +; CHECK-LABEL: test128_9: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpcmpeqw %xmm3, %xmm2, %k1 {%k1} +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp eq <8 x i16> %x1, %y1 %mask0 = icmp eq <8 x i16> %x, %y %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer @@ -230,11 +257,13 @@ ret <8 x i16> %max } -; CHECK-LABEL: @test128_10 -; CHECK: vpcmpleb %xmm{{.*{%k[1-7]}}} -; CHECK: vmovdqu8 -; CHECK: ret define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8> %y1) nounwind { +; CHECK-LABEL: test128_10: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpcmpleb %xmm2, %xmm3, %k1 {%k1} +; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <16 x i8> %x1, %y1 %mask0 = icmp sle <16 x i8> %x, %y %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer @@ -242,11 +271,13 @@ ret <16 x i8> %max } -; CHECK-LABEL: @test128_11 -; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqu8 -; CHECK: ret define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 x i8> %y1) nounwind { +; CHECK-LABEL: test128_11: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtb %xmm2, %xmm1, %k1 +; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 {%k1} +; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sgt <16 x i8> %x1, %y1 %y = load <16 x i8>, <16 x i8>* %y.ptr, align 4 %mask0 = icmp sgt <16 x i8> %x, %y @@ -255,11 +286,13 @@ ret <16 x i8> %max } -; CHECK-LABEL: @test128_12 -; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqu16 -; CHECK: ret define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 x i16> %y1) nounwind { +; CHECK-LABEL: test128_12: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmplew %xmm1, %xmm2, %k1 +; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 {%k1} +; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <8 x i16> %x1, %y1 %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4 %mask0 = icmp ule <8 x i16> %x, %y Index: test/CodeGen/X86/avx512vl-mov.ll =================================================================== --- test/CodeGen/X86/avx512vl-mov.ll +++ test/CodeGen/X86/avx512vl-mov.ll @@ -1,153 +1,173 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s -; CHECK-LABEL: test_256_1 -; CHECK: vmovdqu32 -; CHECK: ret define <8 x i32> @test_256_1(i8 * %addr) { +; CHECK-LABEL: test_256_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* %res = load <8 x i32>, <8 x i32>* %vaddr, align 1 ret <8 x i32>%res } -; CHECK-LABEL: test_256_2 -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test_256_2(i8 * %addr) { +; CHECK-LABEL: test_256_2: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* %res = load <8 x i32>, <8 x i32>* %vaddr, align 32 ret <8 x i32>%res } -; CHECK-LABEL: test_256_3 -; CHECK: vmovdqa64 -; CHECK: ret define void @test_256_3(i8 * %addr, <4 x i64> %data) { +; CHECK-LABEL: test_256_3: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* store <4 x i64>%data, <4 x i64>* %vaddr, align 32 ret void } -; CHECK-LABEL: test_256_4 -; CHECK: vmovdqu32 -; CHECK: ret define void @test_256_4(i8 * %addr, <8 x i32> %data) { +; CHECK-LABEL: test_256_4: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* store <8 x i32>%data, <8 x i32>* %vaddr, align 1 ret void } -; CHECK-LABEL: test_256_5 -; CHECK: vmovdqa32 -; CHECK: ret define void @test_256_5(i8 * %addr, <8 x i32> %data) { +; CHECK-LABEL: test_256_5: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* store <8 x i32>%data, <8 x i32>* %vaddr, align 32 ret void } -; CHECK-LABEL: test_256_6 -; CHECK: vmovdqa64 -; CHECK: ret define <4 x i64> @test_256_6(i8 * %addr) { +; CHECK-LABEL: test_256_6: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* %res = load <4 x i64>, <4 x i64>* %vaddr, align 32 ret <4 x i64>%res } -; CHECK-LABEL: test_256_7 -; CHECK: vmovdqu64 -; CHECK: ret define void @test_256_7(i8 * %addr, <4 x i64> %data) { +; CHECK-LABEL: test_256_7: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* store <4 x i64>%data, <4 x i64>* %vaddr, align 1 ret void } -; CHECK-LABEL: test_256_8 -; CHECK: vmovdqu64 -; CHECK: ret define <4 x i64> @test_256_8(i8 * %addr) { +; CHECK-LABEL: test_256_8: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* %res = load <4 x i64>, <4 x i64>* %vaddr, align 1 ret <4 x i64>%res } -; CHECK-LABEL: test_256_9 -; CHECK: vmovapd {{.*}} ## encoding: [0x62 -; CHECK: ret define void @test_256_9(i8 * %addr, <4 x double> %data) { +; CHECK-LABEL: test_256_9: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovapd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x double>* store <4 x double>%data, <4 x double>* %vaddr, align 32 ret void } -; CHECK-LABEL: test_256_10 -; CHECK: vmovapd {{.*}} ## encoding: [0x62 -; CHECK: ret define <4 x double> @test_256_10(i8 * %addr) { +; CHECK-LABEL: test_256_10: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x double>* %res = load <4 x double>, <4 x double>* %vaddr, align 32 ret <4 x double>%res } -; CHECK-LABEL: test_256_11 -; CHECK: vmovaps {{.*}} ## encoding: [0x62 -; CHECK: ret define void @test_256_11(i8 * %addr, <8 x float> %data) { +; CHECK-LABEL: test_256_11: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x float>* store <8 x float>%data, <8 x float>* %vaddr, align 32 ret void } -; CHECK-LABEL: test_256_12 -; CHECK: vmovaps {{.*}} ## encoding: [0x62 -; CHECK: ret define <8 x float> @test_256_12(i8 * %addr) { +; CHECK-LABEL: test_256_12: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x float>* %res = load <8 x float>, <8 x float>* %vaddr, align 32 ret <8 x float>%res } -; CHECK-LABEL: test_256_13 -; CHECK: vmovupd {{.*}} ## encoding: [0x62 -; CHECK: ret define void @test_256_13(i8 * %addr, <4 x double> %data) { +; CHECK-LABEL: test_256_13: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovupd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x double>* store <4 x double>%data, <4 x double>* %vaddr, align 1 ret void } -; CHECK-LABEL: test_256_14 -; CHECK: vmovupd {{.*}} ## encoding: [0x62 -; CHECK: ret define <4 x double> @test_256_14(i8 * %addr) { +; CHECK-LABEL: test_256_14: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x double>* %res = load <4 x double>, <4 x double>* %vaddr, align 1 ret <4 x double>%res } -; CHECK-LABEL: test_256_15 -; CHECK: vmovups {{.*}} ## encoding: [0x62 -; CHECK: ret define void @test_256_15(i8 * %addr, <8 x float> %data) { +; CHECK-LABEL: test_256_15: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x float>* store <8 x float>%data, <8 x float>* %vaddr, align 1 ret void } -; CHECK-LABEL: test_256_16 -; CHECK: vmovups {{.*}} ## encoding: [0x62 -; CHECK: ret define <8 x float> @test_256_16(i8 * %addr) { +; CHECK-LABEL: test_256_16: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x float>* %res = load <8 x float>, <8 x float>* %vaddr, align 1 ret <8 x float>%res } -; CHECK-LABEL: test_256_17 -; CHECK: vmovdqa32{{.*{%k[1-7]} }} -; CHECK: ret define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { +; CHECK-LABEL: test_256_17: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i32>* %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 @@ -155,10 +175,13 @@ ret <8 x i32>%res } -; CHECK-LABEL: test_256_18 -; CHECK: vmovdqu32{{.*{%k[1-7]} }} -; CHECK: ret define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { +; CHECK-LABEL: test_256_18: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i32>* %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 @@ -166,10 +189,13 @@ ret <8 x i32>%res } -; CHECK-LABEL: test_256_19 -; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }} -; CHECK: ret define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) { +; CHECK-LABEL: test_256_19: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i32>* %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 @@ -177,10 +203,13 @@ ret <8 x i32>%res } -; CHECK-LABEL: test_256_20 -; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }} -; CHECK: ret define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) { +; CHECK-LABEL: test_256_20: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i32>* %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 @@ -188,10 +217,13 @@ ret <8 x i32>%res } -; CHECK-LABEL: test_256_21 -; CHECK: vmovdqa64{{.*{%k[1-7]} }} -; CHECK: ret define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { +; CHECK-LABEL: test_256_21: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i64>* %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 @@ -199,10 +231,13 @@ ret <4 x i64>%res } -; CHECK-LABEL: test_256_22 -; CHECK: vmovdqu64{{.*{%k[1-7]} }} -; CHECK: ret define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { +; CHECK-LABEL: test_256_22: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i64>* %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 @@ -210,10 +245,13 @@ ret <4 x i64>%res } -; CHECK-LABEL: test_256_23 -; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }} -; CHECK: ret define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) { +; CHECK-LABEL: test_256_23: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i64>* %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 @@ -221,10 +259,13 @@ ret <4 x i64>%res } -; CHECK-LABEL: test_256_24 -; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }} -; CHECK: ret define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) { +; CHECK-LABEL: test_256_24: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i64>* %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 @@ -232,10 +273,14 @@ ret <4 x i64>%res } -; CHECK-LABEL: test_256_25 -; CHECK: vmovaps{{.*{%k[1-7]} }} -; CHECK: ret define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { +; CHECK-LABEL: test_256_25: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] +; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] +; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x float>* %r = load <8 x float>, <8 x float>* %vaddr, align 32 @@ -243,10 +288,14 @@ ret <8 x float>%res } -; CHECK-LABEL: test_256_26 -; CHECK: vmovups{{.*{%k[1-7]} }} -; CHECK: ret define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { +; CHECK-LABEL: test_256_26: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] +; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] +; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x float>* %r = load <8 x float>, <8 x float>* %vaddr, align 1 @@ -254,10 +303,14 @@ ret <8 x float>%res } -; CHECK-LABEL: test_256_27 -; CHECK: vmovaps{{.*{%k[1-7]} {z} }} -; CHECK: ret define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) { +; CHECK-LABEL: test_256_27: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] +; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x float>* %r = load <8 x float>, <8 x float>* %vaddr, align 32 @@ -265,10 +318,14 @@ ret <8 x float>%res } -; CHECK-LABEL: test_256_28 -; CHECK: vmovups{{.*{%k[1-7]} {z} }} -; CHECK: ret define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) { +; CHECK-LABEL: test_256_28: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] +; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] +; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = fcmp one <8 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x float>* %r = load <8 x float>, <8 x float>* %vaddr, align 1 @@ -276,10 +333,13 @@ ret <8 x float>%res } -; CHECK-LABEL: test_256_29 -; CHECK: vmovapd{{.*{%k[1-7]} }} -; CHECK: ret define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { +; CHECK-LABEL: test_256_29: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] +; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x double>* %r = load <4 x double>, <4 x double>* %vaddr, align 32 @@ -287,10 +347,13 @@ ret <4 x double>%res } -; CHECK-LABEL: test_256_30 -; CHECK: vmovupd{{.*{%k[1-7]} }} -; CHECK: ret define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { +; CHECK-LABEL: test_256_30: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] +; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x double>* %r = load <4 x double>, <4 x double>* %vaddr, align 1 @@ -298,10 +361,13 @@ ret <4 x double>%res } -; CHECK-LABEL: test_256_31 -; CHECK: vmovapd{{.*{%k[1-7]} {z} }} -; CHECK: ret define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) { +; CHECK-LABEL: test_256_31: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x double>* %r = load <4 x double>, <4 x double>* %vaddr, align 32 @@ -309,10 +375,13 @@ ret <4 x double>%res } -; CHECK-LABEL: test_256_32 -; CHECK: vmovupd{{.*{%k[1-7]} {z} }} -; CHECK: ret define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) { +; CHECK-LABEL: test_256_32: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x double>* %r = load <4 x double>, <4 x double>* %vaddr, align 1 @@ -320,154 +389,173 @@ ret <4 x double>%res } -; CHECK-LABEL: test_128_1 -; CHECK: vmovdqu32 -; CHECK: ret define <4 x i32> @test_128_1(i8 * %addr) { +; CHECK-LABEL: test_128_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* %res = load <4 x i32>, <4 x i32>* %vaddr, align 1 ret <4 x i32>%res } -; CHECK-LABEL: test_128_2 -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test_128_2(i8 * %addr) { +; CHECK-LABEL: test_128_2: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* %res = load <4 x i32>, <4 x i32>* %vaddr, align 16 ret <4 x i32>%res } -; CHECK-LABEL: test_128_3 -; CHECK: vmovdqa64 -; CHECK: ret define void @test_128_3(i8 * %addr, <2 x i64> %data) { +; CHECK-LABEL: test_128_3: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* store <2 x i64>%data, <2 x i64>* %vaddr, align 16 ret void } -; CHECK-LABEL: test_128_4 -; CHECK: vmovdqu32 -; CHECK: ret define void @test_128_4(i8 * %addr, <4 x i32> %data) { +; CHECK-LABEL: test_128_4: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* store <4 x i32>%data, <4 x i32>* %vaddr, align 1 ret void } -; CHECK-LABEL: test_128_5 -; CHECK: vmovdqa32 -; CHECK: ret define void @test_128_5(i8 * %addr, <4 x i32> %data) { +; CHECK-LABEL: test_128_5: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* store <4 x i32>%data, <4 x i32>* %vaddr, align 16 ret void } -; CHECK-LABEL: test_128_6 -; CHECK: vmovdqa64 -; CHECK: ret define <2 x i64> @test_128_6(i8 * %addr) { +; CHECK-LABEL: test_128_6: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* %res = load <2 x i64>, <2 x i64>* %vaddr, align 16 ret <2 x i64>%res } -; CHECK-LABEL: test_128_7 -; CHECK: vmovdqu64 -; CHECK: ret define void @test_128_7(i8 * %addr, <2 x i64> %data) { +; CHECK-LABEL: test_128_7: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* store <2 x i64>%data, <2 x i64>* %vaddr, align 1 ret void } -; CHECK-LABEL: test_128_8 -; CHECK: vmovdqu64 -; CHECK: ret define <2 x i64> @test_128_8(i8 * %addr) { +; CHECK-LABEL: test_128_8: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* %res = load <2 x i64>, <2 x i64>* %vaddr, align 1 ret <2 x i64>%res } -; CHECK-LABEL: test_128_9 -; CHECK: vmovapd {{.*}} ## encoding: [0x62 -; CHECK: ret define void @test_128_9(i8 * %addr, <2 x double> %data) { +; CHECK-LABEL: test_128_9: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovapd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x double>* store <2 x double>%data, <2 x double>* %vaddr, align 16 ret void } -; CHECK-LABEL: test_128_10 -; CHECK: vmovapd {{.*}} ## encoding: [0x62 -; CHECK: ret define <2 x double> @test_128_10(i8 * %addr) { +; CHECK-LABEL: test_128_10: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x double>* %res = load <2 x double>, <2 x double>* %vaddr, align 16 ret <2 x double>%res } -; CHECK-LABEL: test_128_11 -; CHECK: vmovaps {{.*}} ## encoding: [0x62 -; CHECK: ret define void @test_128_11(i8 * %addr, <4 x float> %data) { +; CHECK-LABEL: test_128_11: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x float>* store <4 x float>%data, <4 x float>* %vaddr, align 16 ret void } -; CHECK-LABEL: test_128_12 -; CHECK: vmovaps {{.*}} ## encoding: [0x62 -; CHECK: ret define <4 x float> @test_128_12(i8 * %addr) { +; CHECK-LABEL: test_128_12: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x float>* %res = load <4 x float>, <4 x float>* %vaddr, align 16 ret <4 x float>%res } -; CHECK-LABEL: test_128_13 -; CHECK: vmovupd {{.*}} ## encoding: [0x62 -; CHECK: ret define void @test_128_13(i8 * %addr, <2 x double> %data) { +; CHECK-LABEL: test_128_13: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x double>* store <2 x double>%data, <2 x double>* %vaddr, align 1 ret void } -; CHECK-LABEL: test_128_14 -; CHECK: vmovupd {{.*}} ## encoding: [0x62 -; CHECK: ret define <2 x double> @test_128_14(i8 * %addr) { +; CHECK-LABEL: test_128_14: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x double>* %res = load <2 x double>, <2 x double>* %vaddr, align 1 ret <2 x double>%res } -; CHECK-LABEL: test_128_15 -; CHECK: vmovups {{.*}} ## encoding: [0x62 -; CHECK: ret define void @test_128_15(i8 * %addr, <4 x float> %data) { +; CHECK-LABEL: test_128_15: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x float>* store <4 x float>%data, <4 x float>* %vaddr, align 1 ret void } -; CHECK-LABEL: test_128_16 -; CHECK: vmovups {{.*}} ## encoding: [0x62 -; CHECK: ret define <4 x float> @test_128_16(i8 * %addr) { +; CHECK-LABEL: test_128_16: +; CHECK: ## BB#0: +; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x float>* %res = load <4 x float>, <4 x float>* %vaddr, align 1 ret <4 x float>%res } -; CHECK-LABEL: test_128_17 -; CHECK: vmovdqa32{{.*{%k[1-7]} }} -; CHECK: ret define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { +; CHECK-LABEL: test_128_17: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i32>* %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 @@ -475,10 +563,13 @@ ret <4 x i32>%res } -; CHECK-LABEL: test_128_18 -; CHECK: vmovdqu32{{.*{%k[1-7]} }} -; CHECK: ret define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { +; CHECK-LABEL: test_128_18: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i32>* %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 @@ -486,10 +577,13 @@ ret <4 x i32>%res } -; CHECK-LABEL: test_128_19 -; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }} -; CHECK: ret define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) { +; CHECK-LABEL: test_128_19: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i32>* %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 @@ -497,10 +591,13 @@ ret <4 x i32>%res } -; CHECK-LABEL: test_128_20 -; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }} -; CHECK: ret define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) { +; CHECK-LABEL: test_128_20: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x i32>* %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 @@ -508,10 +605,13 @@ ret <4 x i32>%res } -; CHECK-LABEL: test_128_21 -; CHECK: vmovdqa64{{.*{%k[1-7]} }} -; CHECK: ret define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { +; CHECK-LABEL: test_128_21: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x i64>* %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 @@ -519,10 +619,13 @@ ret <2 x i64>%res } -; CHECK-LABEL: test_128_22 -; CHECK: vmovdqu64{{.*{%k[1-7]} }} -; CHECK: ret define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { +; CHECK-LABEL: test_128_22: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] +; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x i64>* %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 @@ -530,10 +633,13 @@ ret <2 x i64>%res } -; CHECK-LABEL: test_128_23 -; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }} -; CHECK: ret define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) { +; CHECK-LABEL: test_128_23: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x i64>* %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 @@ -541,10 +647,13 @@ ret <2 x i64>%res } -; CHECK-LABEL: test_128_24 -; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }} -; CHECK: ret define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) { +; CHECK-LABEL: test_128_24: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x i64>* %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 @@ -552,10 +661,13 @@ ret <2 x i64>%res } -; CHECK-LABEL: test_128_25 -; CHECK: vmovaps{{.*{%k[1-7]} }} -; CHECK: ret define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { +; CHECK-LABEL: test_128_25: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] +; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x float>* %r = load <4 x float>, <4 x float>* %vaddr, align 16 @@ -563,10 +675,13 @@ ret <4 x float>%res } -; CHECK-LABEL: test_128_26 -; CHECK: vmovups{{.*{%k[1-7]} }} -; CHECK: ret define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { +; CHECK-LABEL: test_128_26: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] +; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x float>* %r = load <4 x float>, <4 x float>* %vaddr, align 1 @@ -574,10 +689,13 @@ ret <4 x float>%res } -; CHECK-LABEL: test_128_27 -; CHECK: vmovaps{{.*{%k[1-7]} {z} }} -; CHECK: ret define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) { +; CHECK-LABEL: test_128_27: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x float>* %r = load <4 x float>, <4 x float>* %vaddr, align 16 @@ -585,10 +703,13 @@ ret <4 x float>%res } -; CHECK-LABEL: test_128_28 -; CHECK: vmovups{{.*{%k[1-7]} {z} }} -; CHECK: ret define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) { +; CHECK-LABEL: test_128_28: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <4 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <4 x float>* %r = load <4 x float>, <4 x float>* %vaddr, align 1 @@ -596,10 +717,13 @@ ret <4 x float>%res } -; CHECK-LABEL: test_128_29 -; CHECK: vmovapd{{.*{%k[1-7]} }} -; CHECK: ret define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { +; CHECK-LABEL: test_128_29: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] +; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x double>* %r = load <2 x double>, <2 x double>* %vaddr, align 16 @@ -607,10 +731,13 @@ ret <2 x double>%res } -; CHECK-LABEL: test_128_30 -; CHECK: vmovupd{{.*{%k[1-7]} }} -; CHECK: ret define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { +; CHECK-LABEL: test_128_30: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] +; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x double>* %r = load <2 x double>, <2 x double>* %vaddr, align 1 @@ -618,10 +745,13 @@ ret <2 x double>%res } -; CHECK-LABEL: test_128_31 -; CHECK: vmovapd{{.*{%k[1-7]} {z} }} -; CHECK: ret define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) { +; CHECK-LABEL: test_128_31: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x double>* %r = load <2 x double>, <2 x double>* %vaddr, align 16 @@ -629,10 +759,13 @@ ret <2 x double>%res } -; CHECK-LABEL: test_128_32 -; CHECK: vmovupd{{.*{%k[1-7]} {z} }} -; CHECK: ret define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) { +; CHECK-LABEL: test_128_32: +; CHECK: ## BB#0: +; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] +; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07] +; CHECK-NEXT: retq ## encoding: [0xc3] %mask = icmp ne <2 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <2 x double>* %r = load <2 x double>, <2 x double>* %vaddr, align 1 Index: test/CodeGen/X86/avx512vl-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -1,94 +1,105 @@ +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -; CHECK-LABEL: test256_1 -; CHECK: vpcmpeqq {{.*%k[0-7]}} -; CHECK: vmovdqa64 {{.*}}%k1 -; CHECK: ret define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind { +; CHECK-LABEL: test256_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask = icmp eq <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y ret <4 x i64> %max } -; CHECK-LABEL: test256_2 -; CHECK: vpcmpgtq {{.*%k[0-7]}} -; CHECK: vmovdqa64 {{.*}}%k1 -; CHECK: ret define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind { +; CHECK-LABEL: test256_2: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sgt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y ret <4 x i64> %max } -; CHECK-LABEL: @test256_3 -; CHECK: vpcmpled {{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind { +; CHECK-LABEL: test256_3: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k1 +; CHECK-NEXT: vpblendmd %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sge <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y ret <8 x i32> %max } -; CHECK-LABEL: test256_4 -; CHECK: vpcmpnleuq {{.*%k[0-7]}} -; CHECK: vmovdqa64 {{.*}}%k1 -; CHECK: ret define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind { +; CHECK-LABEL: test256_4: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ugt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y ret <4 x i64> %max } -; CHECK-LABEL: test256_5 -; CHECK: vpcmpeqd (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind { +; CHECK-LABEL: test256_5: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp eq <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1 ret <8 x i32> %max } -; CHECK-LABEL: @test256_6 -; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind { +; CHECK-LABEL: test256_6: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sgt <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1 ret <8 x i32> %max } -; CHECK-LABEL: @test256_7 -; CHECK: vpcmpled (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind { +; CHECK-LABEL: test256_7: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sle <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1 ret <8 x i32> %max } -; CHECK-LABEL: @test256_8 -; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind { +; CHECK-LABEL: test256_8: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp ule <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1 ret <8 x i32> %max } -; CHECK-LABEL: @test256_9 -; CHECK: vpcmpeqd %ymm{{.*{%k[1-7]}}} -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> %y1) nounwind { +; CHECK-LABEL: test256_9: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 {%k1} +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp eq <8 x i32> %x1, %y1 %mask0 = icmp eq <8 x i32> %x, %y %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer @@ -96,11 +107,13 @@ ret <8 x i32> %max } -; CHECK-LABEL: @test256_10 -; CHECK: vpcmpleq %ymm{{.*{%k[1-7]}}} -; CHECK: vmovdqa64 -; CHECK: ret define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) nounwind { +; CHECK-LABEL: test256_10: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1 +; CHECK-NEXT: vpcmpleq %ymm2, %ymm3, %k1 {%k1} +; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %mask0 = icmp sle <4 x i64> %x, %y %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer @@ -108,11 +121,13 @@ ret <4 x i64> %max } -; CHECK-LABEL: @test256_11 -; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqa64 -; CHECK: ret define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind { +; CHECK-LABEL: test256_11: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtq %ymm2, %ymm1, %k1 +; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k1 {%k1} +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sgt <4 x i64> %x1, %y1 %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4 %mask0 = icmp sgt <4 x i64> %x, %y @@ -121,11 +136,13 @@ ret <4 x i64> %max } -; CHECK-LABEL: @test256_12 -; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind { +; CHECK-LABEL: test256_12: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1 +; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 {%k1} +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask0 = icmp ule <8 x i32> %x, %y @@ -134,11 +151,12 @@ ret <8 x i32> %max } -; CHECK-LABEL: test256_13 -; CHECK: vpcmpeqq (%rdi){1to4}, %ymm -; CHECK: vmovdqa64 -; CHECK: ret define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind { +; CHECK-LABEL: test256_13: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k1 +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0 %y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer @@ -147,11 +165,12 @@ ret <4 x i64> %max } -; CHECK-LABEL: test256_14 -; CHECK: vpcmpled (%rdi){1to8}, %ymm -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind { +; CHECK-LABEL: test256_14: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled (%rdi){1to8}, %ymm0, %k1 +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0 %y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer @@ -160,11 +179,13 @@ ret <8 x i32> %max } -; CHECK-LABEL: test256_15 -; CHECK: vpcmpgtd (%rdi){1to8}, %ymm{{.*{%k[1-7]}}} -; CHECK: vmovdqa32 -; CHECK: ret define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind { +; CHECK-LABEL: test256_15: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1 +; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k1 {%k1} +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0 @@ -175,11 +196,13 @@ ret <8 x i32> %max } -; CHECK-LABEL: test256_16 -; CHECK: vpcmpgtq (%rdi){1to4}, %ymm{{.*{%k[1-7]}}} -; CHECK: vmovdqa64 -; CHECK: ret define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind { +; CHECK-LABEL: test256_16: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleq %ymm1, %ymm2, %k1 +; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k1 {%k1} +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0 @@ -190,95 +213,105 @@ ret <4 x i64> %max } -; CHECK-LABEL: test128_1 -; CHECK: vpcmpeqq {{.*%k[0-7]}} -; CHECK: vmovdqa64 {{.*}}%k1 -; CHECK: ret define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind { +; CHECK-LABEL: test128_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp eq <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y ret <2 x i64> %max } -; CHECK-LABEL: test128_2 -; CHECK: vpcmpgtq {{.*%k[0-7]}} -; CHECK: vmovdqa64 {{.*}}%k1 -; CHECK: ret define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind { +; CHECK-LABEL: test128_2: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sgt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y ret <2 x i64> %max } -; CHECK-LABEL: @test128_3 -; CHECK: vpcmpled {{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind { +; CHECK-LABEL: test128_3: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k1 +; CHECK-NEXT: vpblendmd %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp sge <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y ret <4 x i32> %max } -; CHECK-LABEL: test128_4 -; CHECK: vpcmpnleuq {{.*%k[0-7]}} -; CHECK: vmovdqa64 {{.*}}%k1 -; CHECK: ret define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind { +; CHECK-LABEL: test128_4: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask = icmp ugt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y ret <2 x i64> %max } -; CHECK-LABEL: test128_5 -; CHECK: vpcmpeqd (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwind { +; CHECK-LABEL: test128_5: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1 ret <4 x i32> %max } -; CHECK-LABEL: @test128_6 -; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind { +; CHECK-LABEL: test128_6: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sgt <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1 ret <4 x i32> %max } -; CHECK-LABEL: @test128_7 -; CHECK: vpcmpled (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind { +; CHECK-LABEL: test128_7: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sle <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1 ret <4 x i32> %max } -; CHECK-LABEL: @test128_8 -; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}} -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind { +; CHECK-LABEL: test128_8: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ule <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1 ret <4 x i32> %max } -; CHECK-LABEL: @test128_9 -; CHECK: vpcmpeqd %xmm{{.*{%k[1-7]}}} -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> %y1) nounwind { +; CHECK-LABEL: test128_9: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %k1 {%k1} +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp eq <4 x i32> %x1, %y1 %mask0 = icmp eq <4 x i32> %x, %y %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer @@ -286,11 +319,13 @@ ret <4 x i32> %max } -; CHECK-LABEL: @test128_10 -; CHECK: vpcmpleq %xmm{{.*{%k[1-7]}}} -; CHECK: vmovdqa64 -; CHECK: ret define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) nounwind { +; CHECK-LABEL: test128_10: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1 +; CHECK-NEXT: vpcmpleq %xmm2, %xmm3, %k1 {%k1} +; CHECK-NEXT: vpblendmq %xmm0, %xmm2, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %mask0 = icmp sle <2 x i64> %x, %y %mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer @@ -298,11 +333,13 @@ ret <2 x i64> %max } -; CHECK-LABEL: @test128_11 -; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqa64 -; CHECK: ret define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind { +; CHECK-LABEL: test128_11: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpgtq %xmm2, %xmm1, %k1 +; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k1 {%k1} +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sgt <2 x i64> %x1, %y1 %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4 %mask0 = icmp sgt <2 x i64> %x, %y @@ -311,11 +348,13 @@ ret <2 x i64> %max } -; CHECK-LABEL: @test128_12 -; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}} -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind { +; CHECK-LABEL: test128_12: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1 +; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 {%k1} +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask0 = icmp ule <4 x i32> %x, %y @@ -324,11 +363,12 @@ ret <4 x i32> %max } -; CHECK-LABEL: test128_13 -; CHECK: vpcmpeqq (%rdi){1to2}, %xmm -; CHECK: vmovdqa64 -; CHECK: ret define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind { +; CHECK-LABEL: test128_13: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k1 +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0 %y = insertelement <2 x i64> %y.0, i64 %yb, i32 1 @@ -337,11 +377,12 @@ ret <2 x i64> %max } -; CHECK-LABEL: test128_14 -; CHECK: vpcmpled (%rdi){1to4}, %xmm -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind { +; CHECK-LABEL: test128_14: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled (%rdi){1to4}, %xmm0, %k1 +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0 %y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer @@ -350,11 +391,13 @@ ret <4 x i32> %max } -; CHECK-LABEL: test128_15 -; CHECK: vpcmpgtd (%rdi){1to4}, %xmm{{.*{%k[1-7]}}} -; CHECK: vmovdqa32 -; CHECK: ret define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind { +; CHECK-LABEL: test128_15: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1 +; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k1 {%k1} +; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0 @@ -365,11 +408,13 @@ ret <4 x i32> %max } -; CHECK-LABEL: test128_16 -; CHECK: vpcmpgtq (%rdi){1to2}, %xmm{{.*{%k[1-7]}}} -; CHECK: vmovdqa64 -; CHECK: ret define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind { +; CHECK-LABEL: test128_16: +; CHECK: ## BB#0: +; CHECK-NEXT: vpcmpleq %xmm1, %xmm2, %k1 +; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k1 {%k1} +; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0 Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -1490,8 +1490,7 @@ ; SKX-NEXT: vmovq %xmm1, %rax ; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0 ; SKX-NEXT: .LBB29_6: # %else5 -; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1} -; SKX-NEXT: vmovaps %zmm3, %zmm0 +; SKX-NEXT: vpblendmd %xmm0, %xmm3, %xmm0 {%k1} ; SKX-NEXT: retq ; ; SKX_32-LABEL: test30: @@ -1503,33 +1502,33 @@ ; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1 ; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp) ; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 -; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1 ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al -; SKX_32-NEXT: # implicit-def: %XMM1 +; SKX_32-NEXT: # implicit-def: %XMM0 ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_2 ; SKX_32-NEXT: # BB#1: # %cond.load -; SKX_32-NEXT: vmovd %xmm2, %eax -; SKX_32-NEXT: vmovd (%eax), %xmm1 +; SKX_32-NEXT: vmovd %xmm1, %eax +; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SKX_32-NEXT: .LBB29_2: # %else ; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp) ; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_4 ; SKX_32-NEXT: # BB#3: # %cond.load1 -; SKX_32-NEXT: vpextrd $1, %xmm2, %eax -; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1 +; SKX_32-NEXT: vpextrd $1, %xmm1, %eax +; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0 ; SKX_32-NEXT: .LBB29_4: # %else2 -; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm0 +; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm2 ; SKX_32-NEXT: kmovb %k1, (%esp) ; SKX_32-NEXT: movb (%esp), %al ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_6 ; SKX_32-NEXT: # BB#5: # %cond.load4 -; SKX_32-NEXT: vpextrd $2, %xmm2, %eax -; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1 +; SKX_32-NEXT: vpextrd $2, %xmm1, %eax +; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0 ; SKX_32-NEXT: .LBB29_6: # %else5 -; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; SKX_32-NEXT: vpblendmd %xmm0, %xmm2, %xmm0 {%k1} ; SKX_32-NEXT: addl $12, %esp ; SKX_32-NEXT: retl