Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -1840,6 +1840,13 @@ def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + + def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; } // Conditional store ops Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -2973,53 +2973,60 @@ // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { - let hasSideEffects = 0 in { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, (vt (OpNode VR128X:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; - let Constraints = "$src1 = $dst" in - def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3), - !strconcat(asm, - "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"), - [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K; - def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, - EVEX, VEX_LIG; +multiclass avx512_move_scalar { + defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), + asm, "$src2, $src1","$src1, $src2", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2))), + IIC_SSE_MOV_S_RR>, EVEX_4V; + let Constraints = "$src1 = $dst" , mayLoad = 1 in + defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _, + (outs _.RC:$dst), + (ins _.ScalarMemOp:$src), + asm,"$src","$src", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector + (_.ScalarLdFrag addr:$src)))))>, EVEX; + let isCodeGenOnly = 1 in { + def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src1, _.FRC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; + let mayLoad = 1 in + def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], + _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + } let mayStore = 1 in { - def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG; - def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src), - !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), - [], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG, EVEX_K; + def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>, + EVEX; + def mrk: AVX512PI<0x11, MRMDestMem, (outs), + (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K; } // mayStore - } //hasSideEffects = 0 } -let ExeDomain = SSEPackedSingle in -defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem, - loadf32>, XS, EVEX_CD8<32, CD8VT1>; +defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, + VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; -let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem, - loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, + VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), - (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), - VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), - (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), - VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -143,7 +143,7 @@ EXPAND_FROM_MEM, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512, EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, + X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), @@ -807,6 +807,10 @@ X86ISD::MOVDDUP, 0), X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK, X86ISD::MOVDDUP, 0), + X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK, + X86ISD::MOVSD, 0), + X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK, + X86ISD::MOVSS, 0), X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK, X86ISD::MOVSHDUP, 0), X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK, Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -6234,3 +6234,48 @@ } declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) +declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk: +; CHECK: vmovss %xmm1, %xmm0, %xmm2 {%k1} + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + ret <4 x float> %res +} + +define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz: +; CHECK: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2) + ret <4 x float> %res +} + +define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr: +; CHECK: vmovss %xmm1, %xmm0, %xmm0 + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1) + ret <4 x float> %res +} + +declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) +define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr: +; CHECK: vmovsd %xmm1, %xmm0, %xmm0 + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1) + ret <2 x double> %res +} + +define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz: +; CHECK: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2) + ret <2 x double> %res +} + +define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk: +; CHECK: vmovsd %xmm1, %xmm0, %xmm2 {%k1} + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + ret <2 x double> %res +} + Index: llvm/trunk/test/MC/X86/avx512-encodings.s =================================================================== --- llvm/trunk/test/MC/X86/avx512-encodings.s +++ llvm/trunk/test/MC/X86/avx512-encodings.s @@ -19220,3 +19220,58 @@ // CHECK: vucomiss -516(%rdx), %xmm22 // CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0xfc,0xfd,0xff,0xff] vucomiss -516(%rdx), %xmm22 +// CHECK: vmovsd (%rcx), %xmm25 {%k3} +// CHECK: encoding: [0x62,0x61,0xff,0x0b,0x10,0x09] + vmovsd (%rcx), %xmm25 {%k3} + +// CHECK: vmovsd (%rcx), %xmm25 {%k3} {z} +// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09] + vmovsd (%rcx), %xmm25 {%k3} {z} + +// CHECK: vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0xe7,0x8b,0x10,0xdb] + vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z} + +// CHECK: vmovss (%rcx), %xmm2 {%k4} +// CHECK: encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11] + vmovss (%rcx), %xmm2 {%k4} + +// CHECK: vmovss (%rcx), %xmm2 {%k4} {z} +// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11] + vmovss (%rcx), %xmm2 {%k4} {z} + +// CHECK: vmovss %xmm26, %xmm9, %xmm28 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0x36,0x8c,0x10,0xe2] + vmovss %xmm26, %xmm9, %xmm28 {%k4} {z} + +// CHECK: vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z} +// CHECK: encoding: [0x62,0xc1,0xcf,0x87,0x10,0xef] + vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z} + +// CHECK: vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z} +// CHECK: encoding: [0x62,0xd1,0x97,0x8d,0x10,0xd8] + vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z} + +// CHECK: vmovss %xmm2, %xmm27, %xmm17 {%k2} {z} +// CHECK: encoding: [0x62,0xe1,0x26,0x82,0x10,0xca] + vmovss %xmm2, %xmm27, %xmm17 {%k2} {z} + +// CHECK: vmovss %xmm23, %xmm19, %xmm10 {%k3} {z} +// CHECK: encoding: [0x62,0x31,0x66,0x83,0x10,0xd7] + vmovss %xmm23, %xmm19, %xmm10 {%k3} {z} + +// CHECK: vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z} +// CHECK: encoding: [0x62,0xf1,0x87,0x8e,0x10,0xe4] + vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z} + +// CHECK: vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z} +// CHECK: encoding: [0x62,0xc1,0xef,0x8f,0x10,0xe6] + vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z} + +// CHECK: vmovss %xmm19, %xmm11, %xmm21 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x26,0x8b,0x10,0xeb] + vmovss %xmm19, %xmm11, %xmm21 {%k3} {z} + +// CHECK: vmovss %xmm24, %xmm27, %xmm15 {%k2} {z} +// CHECK: encoding: [0x62,0x11,0x26,0x82,0x10,0xf8] + vmovss %xmm24, %xmm27, %xmm15 {%k2} {z} Index: llvm/trunk/test/MC/X86/intel-syntax-avx512.s =================================================================== --- llvm/trunk/test/MC/X86/intel-syntax-avx512.s +++ llvm/trunk/test/MC/X86/intel-syntax-avx512.s @@ -264,3 +264,91 @@ // CHECK: vcomiss xmm16, dword ptr [rcx] // CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2f,0x01] vcomiss xmm16, DWORD PTR [rcx] + +// CHECK: vmovss dword ptr [rcx] {k2}, xmm13 +// CHECK: encoding: [0x62,0x71,0x7e,0x0a,0x11,0x29] + vmovss dword ptr [rcx]{k2},xmm13 + +// CHECK: vmovss dword ptr [rax + 8*r14 + 4660], xmm13 +// CHECK: encoding: [0xc4,0x21,0x7a,0x11,0xac,0xf0,0x34,0x12,0x00,0x00] + vmovss dword ptr [rax+r14*8+0x1234],xmm13 + +// CHECK: vmovss dword ptr [rdx + 508], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0x01,0x00,0x00] + vmovss dword ptr [rdx+0x1fc],xmm13 + +// CHECK: vmovss dword ptr [rdx + 512], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0x00,0x02,0x00,0x00] + vmovss dword ptr [rdx+0x200],xmm13 + +// CHECK: vmovss dword ptr [rdx - 512], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0x00,0xfe,0xff,0xff] + vmovss dword ptr [rdx-0x200],xmm13 + +// CHECK: vmovss dword ptr [rdx - 516], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0xfd,0xff,0xff] + vmovss dword ptr [rdx-0x204],xmm13 + +// CHECK: vmovss dword ptr [rdx + 508], xmm5 +// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0x01,0x00,0x00] + vmovss dword ptr [rdx+0x1fc],xmm5 + +// CHECK: vmovss dword ptr [rdx + 512], xmm5 +// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0x00,0x02,0x00,0x00] + vmovss dword ptr [rdx+0x200],xmm5 + +// CHECK: vmovss dword ptr [rdx - 512], xmm5 +// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0x00,0xfe,0xff,0xff] + vmovss dword ptr [rdx-0x200], xmm5 + +// CHECK: vmovss dword ptr [rdx - 516], xmm5 +// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0xfd,0xff,0xff] + vmovss dword ptr [rdx-0x204],xmm5 + +// CHECK: vmovss dword ptr [rcx], xmm13 +// CHECK: encoding: [0xc5,0x7a,0x11,0x29] + vmovss dword ptr [rcx],xmm13 + +// CHECK: vmovss xmm2, dword ptr [rcx] +// CHECK: encoding: [0xc5,0xfa,0x10,0x11] + vmovss xmm2, dword ptr [rcx] + +// CHECK: vmovss xmm2 {k4}, dword ptr [rcx] +// CHECK: encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11] + vmovss xmm2{k4}, dword ptr [rcx] + +// CHECK: vmovss xmm2 {k4} {z}, dword ptr [rcx] +// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11] + vmovss xmm2{k4} {z}, dword ptr [rcx] + +// CHECK: vmovsd xmm25 , qword ptr [rcx] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x09] + vmovsd xmm25, qword ptr [rcx] + +// CHECK: vmovsd xmm25 {k3}, qword ptr [rcx] +// CHECK: encoding: [0x62,0x61,0xff,0x0b,0x10,0x09] + vmovsd xmm25{k3}, qword ptr [rcx] + +// CHECK: vmovsd xmm25 {k3} {z}, qword ptr [rcx] +// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09] + vmovsd xmm25{k3} {z}, qword ptr [rcx] + +// CHECK: vmovsd xmm25 , qword ptr [rax + 8*r14 + 291] +// CHECK: encoding: [0x62,0x21,0xff,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00] + vmovsd xmm25, qword ptr [rax+r14*8+0x123] + +// CHECK: vmovsd xmm25 , qword ptr [rdx + 1016] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x7f] + vmovsd xmm25, qword ptr [rdx+0x3f8] + +// CHECK: vmovsd xmm25 , qword ptr [rdx + 1024] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0x00,0x04,0x00,0x00] + vmovsd xmm25, qword ptr [rdx+0x400] + +// CHECK: vmovsd xmm25 , qword ptr [rdx - 1024] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x80] + vmovsd xmm25, qword ptr [rdx-0x400] + +// CHECK: vmovsd xmm25 , qword ptr [rdx - 1032] +// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff] + vmovsd xmm25, qword ptr [rdx-0x408]