Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -2998,6 +2998,20 @@ def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtsi2sd32 : GCCBuiltin<"__builtin_ia32_cvtsi2sd32">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + } // Pack ops. Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -417,6 +417,10 @@ COMPRESS, EXPAND, + //Convert Unsigned/Integer to Scalar Floating-Point Value + //with rounding mode + SINT_TO_FP_RND, + UINT_TO_FP_RND, // Save xmm argument registers to the stack, according to %al. An operator // is needed so that this can be expanded with control flow. VASTART_SAVE_XMM_REGS, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -18578,6 +18578,8 @@ case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; + case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; + case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; } return nullptr; } Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -4205,15 +4205,32 @@ } // hasSideEffects = 0 } +multiclass avx512_vcvtsi_round opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, string asm> { + def rrb_Int : SI, EVEX_4V, EVEX_B, EVEX_RC; +} + +multiclass avx512_vcvtsi_common opc, SDNode OpNode, RegisterClass SrcRC, + X86VectorVTInfo DstVT, X86MemOperand x86memop, string asm> { + defm NAME : avx512_vcvtsi_round, + avx512_vcvtsi, VEX_LIG; +} + let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, + v4f32x_info, i32mem, "cvtsi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, + v4f32x_info, i64mem, "cvtsi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, + v2f64x_info, i32mem, "cvtsi2sd{l}">, XD, EVEX_CD8<32, CD8VT1>; +defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, + v2f64x_info, i64mem, "cvtsi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -4233,14 +4250,14 @@ def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">, - XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">, - XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; +defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32, + v4f32x_info, i32mem, "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64, + v4f32x_info, i64mem, "cvtusi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">, - XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">, - XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; + XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; +defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64, + v2f64x_info, i64mem, "cvtusi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -350,6 +350,12 @@ [SDTCisSameAs<0, 3>, SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>; +def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, + SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; + +def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>; +def X86SuintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -242,6 +242,10 @@ X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), + X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), + X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0), Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -2807,3 +2807,43 @@ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) ret <2 x double> %res } + +define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) { +; CHECK-LABEL: test_x86_avx512_cvtsi2sd32: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone + +define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) { +; CHECK-LABEL: test_x86_avx512_cvtsi2sd64: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone + +define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) { +; CHECK-LABEL: test_x86_avx512_cvtsi2ss32: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone + +define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { +; CHECK-LABEL: test_x86_avx512_cvtsi2ss64: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone Index: llvm/trunk/test/MC/X86/avx512-encodings.s =================================================================== --- llvm/trunk/test/MC/X86/avx512-encodings.s +++ llvm/trunk/test/MC/X86/avx512-encodings.s @@ -8812,4 +8812,496 @@ // CHECK: encoding: [0x62,0xe2,0x1d,0x50,0x36,0xb2,0xfc,0xfd,0xff,0xff] vpermd -516(%rdx){1to16}, %zmm28, %zmm22 - +// CHECK: vcvtsi2sdl %eax, %xmm10, %xmm7 +// CHECK: encoding: [0xc5,0xab,0x2a,0xf8] + vcvtsi2sd %eax, %xmm10, %xmm7 + +// CHECK: vcvtsi2sdl %ebp, %xmm10, %xmm7 +// CHECK: encoding: [0xc5,0xab,0x2a,0xfd] + vcvtsi2sd %ebp, %xmm10, %xmm7 + +// CHECK: vcvtsi2sdl %r13d, %xmm10, %xmm7 +// CHECK: encoding: [0xc4,0xc1,0x2b,0x2a,0xfd] + vcvtsi2sd %r13d, %xmm10, %xmm7 + +// CHECK: vcvtsi2sdl (%rcx), %xmm10, %xmm7 +// CHECK: encoding: [0xc5,0xab,0x2a,0x39] + vcvtsi2sdl (%rcx), %xmm10, %xmm7 + +// CHECK: vcvtsi2sdl 291(%rax,%r14,8), %xmm10, %xmm7 +// CHECK: encoding: [0xc4,0xa1,0x2b,0x2a,0xbc,0xf0,0x23,0x01,0x00,0x00] + vcvtsi2sdl 291(%rax,%r14,8), %xmm10, %xmm7 + +// CHECK: vcvtsi2sdl 508(%rdx), %xmm10, %xmm7 +// CHECK: encoding: [0xc5,0xab,0x2a,0xba,0xfc,0x01,0x00,0x00] + vcvtsi2sdl 508(%rdx), %xmm10, %xmm7 + +// CHECK: vcvtsi2sdl 512(%rdx), %xmm10, %xmm7 +// CHECK: encoding: [0xc5,0xab,0x2a,0xba,0x00,0x02,0x00,0x00] + vcvtsi2sdl 512(%rdx), %xmm10, %xmm7 + +// CHECK: vcvtsi2sdl -512(%rdx), %xmm10, %xmm7 +// CHECK: encoding: [0xc5,0xab,0x2a,0xba,0x00,0xfe,0xff,0xff] + vcvtsi2sdl -512(%rdx), %xmm10, %xmm7 + +// CHECK: vcvtsi2sdl -516(%rdx), %xmm10, %xmm7 +// CHECK: encoding: [0xc5,0xab,0x2a,0xba,0xfc,0xfd,0xff,0xff] + vcvtsi2sdl -516(%rdx), %xmm10, %xmm7 +// CHECK: vcvtsi2sdq %rax, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x08,0x2a,0xe8] + vcvtsi2sd %rax, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %rax, {rn-sae}, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x18,0x2a,0xe8] + vcvtsi2sd %rax, {rn-sae}, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %rax, {ru-sae}, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x58,0x2a,0xe8] + vcvtsi2sd %rax, {ru-sae}, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %rax, {rd-sae}, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x38,0x2a,0xe8] + vcvtsi2sd %rax, {rd-sae}, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %rax, {rz-sae}, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x78,0x2a,0xe8] + vcvtsi2sd %rax, {rz-sae}, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %r8, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x41,0x9f,0x08,0x2a,0xe8] + vcvtsi2sd %r8, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %r8, {rn-sae}, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x41,0x9f,0x18,0x2a,0xe8] + vcvtsi2sd %r8, {rn-sae}, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %r8, {ru-sae}, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x41,0x9f,0x58,0x2a,0xe8] + vcvtsi2sd %r8, {ru-sae}, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %r8, {rd-sae}, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x41,0x9f,0x38,0x2a,0xe8] + vcvtsi2sd %r8, {rd-sae}, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq %r8, {rz-sae}, %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x41,0x9f,0x78,0x2a,0xe8] + vcvtsi2sd %r8, {rz-sae}, %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq (%rcx), %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x08,0x2a,0x29] + vcvtsi2sdq (%rcx), %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq 291(%rax,%r14,8), %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x21,0x9f,0x08,0x2a,0xac,0xf0,0x23,0x01,0x00,0x00] + vcvtsi2sdq 291(%rax,%r14,8), %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq 1016(%rdx), %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x08,0x2a,0x6a,0x7f] + vcvtsi2sdq 1016(%rdx), %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq 1024(%rdx), %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x08,0x2a,0xaa,0x00,0x04,0x00,0x00] + vcvtsi2sdq 1024(%rdx), %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq -1024(%rdx), %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x08,0x2a,0x6a,0x80] + vcvtsi2sdq -1024(%rdx), %xmm12, %xmm29 + +// CHECK: vcvtsi2sdq -1032(%rdx), %xmm12, %xmm29 +// CHECK: encoding: [0x62,0x61,0x9f,0x08,0x2a,0xaa,0xf8,0xfb,0xff,0xff] + vcvtsi2sdq -1032(%rdx), %xmm12, %xmm29 + +// CHECK: vcvtsi2ssl %eax, %xmm10, %xmm15 +// CHECK: encoding: [0xc5,0x2a,0x2a,0xf8] + vcvtsi2ss %eax, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %eax, {rn-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x71,0x2e,0x18,0x2a,0xf8] + vcvtsi2ss %eax, {rn-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %eax, {ru-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x71,0x2e,0x58,0x2a,0xf8] + vcvtsi2ss %eax, {ru-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %eax, {rd-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x71,0x2e,0x38,0x2a,0xf8] + vcvtsi2ss %eax, {rd-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %eax, {rz-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x71,0x2e,0x78,0x2a,0xf8] + vcvtsi2ss %eax, {rz-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %ebp, %xmm10, %xmm15 +// CHECK: encoding: [0xc5,0x2a,0x2a,0xfd] + vcvtsi2ss %ebp, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %ebp, {rn-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x71,0x2e,0x18,0x2a,0xfd] + vcvtsi2ss %ebp, {rn-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %ebp, {ru-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x71,0x2e,0x58,0x2a,0xfd] + vcvtsi2ss %ebp, {ru-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %ebp, {rd-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x71,0x2e,0x38,0x2a,0xfd] + vcvtsi2ss %ebp, {rd-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %ebp, {rz-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x71,0x2e,0x78,0x2a,0xfd] + vcvtsi2ss %ebp, {rz-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %r13d, %xmm10, %xmm15 +// CHECK: encoding: [0xc4,0x41,0x2a,0x2a,0xfd] + vcvtsi2ss %r13d, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %r13d, {rn-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x51,0x2e,0x18,0x2a,0xfd] + vcvtsi2ss %r13d, {rn-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %r13d, {ru-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x51,0x2e,0x58,0x2a,0xfd] + vcvtsi2ss %r13d, {ru-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %r13d, {rd-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x51,0x2e,0x38,0x2a,0xfd] + vcvtsi2ss %r13d, {rd-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl %r13d, {rz-sae}, %xmm10, %xmm15 +// CHECK: encoding: [0x62,0x51,0x2e,0x78,0x2a,0xfd] + vcvtsi2ss %r13d, {rz-sae}, %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl (%rcx), %xmm10, %xmm15 +// CHECK: encoding: [0xc5,0x2a,0x2a,0x39] + vcvtsi2ssl (%rcx), %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl 291(%rax,%r14,8), %xmm10, %xmm15 +// CHECK: encoding: [0xc4,0x21,0x2a,0x2a,0xbc,0xf0,0x23,0x01,0x00,0x00] + vcvtsi2ssl 291(%rax,%r14,8), %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl 508(%rdx), %xmm10, %xmm15 +// CHECK: encoding: [0xc5,0x2a,0x2a,0xba,0xfc,0x01,0x00,0x00] + vcvtsi2ssl 508(%rdx), %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl 512(%rdx), %xmm10, %xmm15 +// CHECK: encoding: [0xc5,0x2a,0x2a,0xba,0x00,0x02,0x00,0x00] + vcvtsi2ssl 512(%rdx), %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl -512(%rdx), %xmm10, %xmm15 +// CHECK: encoding: [0xc5,0x2a,0x2a,0xba,0x00,0xfe,0xff,0xff] + vcvtsi2ssl -512(%rdx), %xmm10, %xmm15 + +// CHECK: vcvtsi2ssl -516(%rdx), %xmm10, %xmm15 +// CHECK: encoding: [0xc5,0x2a,0x2a,0xba,0xfc,0xfd,0xff,0xff] + vcvtsi2ssl -516(%rdx), %xmm10, %xmm15 +// CHECK: vcvtsi2ssq %rax, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x08,0x2a,0xc0] + vcvtsi2ss %rax, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %rax, {rn-sae}, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x18,0x2a,0xc0] + vcvtsi2ss %rax, {rn-sae}, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %rax, {ru-sae}, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x58,0x2a,0xc0] + vcvtsi2ss %rax, {ru-sae}, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %rax, {rd-sae}, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x38,0x2a,0xc0] + vcvtsi2ss %rax, {rd-sae}, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %rax, {rz-sae}, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x78,0x2a,0xc0] + vcvtsi2ss %rax, {rz-sae}, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %r8, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xc1,0xae,0x08,0x2a,0xc0] + vcvtsi2ss %r8, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %r8, {rn-sae}, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xc1,0xae,0x18,0x2a,0xc0] + vcvtsi2ss %r8, {rn-sae}, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %r8, {ru-sae}, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xc1,0xae,0x58,0x2a,0xc0] + vcvtsi2ss %r8, {ru-sae}, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %r8, {rd-sae}, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xc1,0xae,0x38,0x2a,0xc0] + vcvtsi2ss %r8, {rd-sae}, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq %r8, {rz-sae}, %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xc1,0xae,0x78,0x2a,0xc0] + vcvtsi2ss %r8, {rz-sae}, %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq (%rcx), %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x08,0x2a,0x01] + vcvtsi2ssq (%rcx), %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq 291(%rax,%r14,8), %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xa1,0xae,0x08,0x2a,0x84,0xf0,0x23,0x01,0x00,0x00] + vcvtsi2ssq 291(%rax,%r14,8), %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq 1016(%rdx), %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x08,0x2a,0x42,0x7f] + vcvtsi2ssq 1016(%rdx), %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq 1024(%rdx), %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x08,0x2a,0x82,0x00,0x04,0x00,0x00] + vcvtsi2ssq 1024(%rdx), %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq -1024(%rdx), %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x08,0x2a,0x42,0x80] + vcvtsi2ssq -1024(%rdx), %xmm10, %xmm16 + +// CHECK: vcvtsi2ssq -1032(%rdx), %xmm10, %xmm16 +// CHECK: encoding: [0x62,0xe1,0xae,0x08,0x2a,0x82,0xf8,0xfb,0xff,0xff] + vcvtsi2ssq -1032(%rdx), %xmm10, %xmm16 + +// CHECK: vcvtusi2sdl %eax, %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x77,0x08,0x7b,0xd8] + vcvtusi2sd %eax, %xmm1, %xmm19 + +// CHECK: vcvtusi2sdl %ebp, %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x77,0x08,0x7b,0xdd] + vcvtusi2sd %ebp, %xmm1, %xmm19 + +// CHECK: vcvtusi2sdl %r13d, %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xc1,0x77,0x08,0x7b,0xdd] + vcvtusi2sd %r13d, %xmm1, %xmm19 + +// CHECK: vcvtusi2sdl (%rcx), %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x77,0x08,0x7b,0x19] + vcvtusi2sdl (%rcx), %xmm1, %xmm19 + +// CHECK: vcvtusi2sdl 291(%rax,%r14,8), %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xa1,0x77,0x08,0x7b,0x9c,0xf0,0x23,0x01,0x00,0x00] + vcvtusi2sdl 291(%rax,%r14,8), %xmm1, %xmm19 + +// CHECK: vcvtusi2sdl 508(%rdx), %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x77,0x08,0x7b,0x5a,0x7f] + vcvtusi2sdl 508(%rdx), %xmm1, %xmm19 + +// CHECK: vcvtusi2sdl 512(%rdx), %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x77,0x08,0x7b,0x9a,0x00,0x02,0x00,0x00] + vcvtusi2sdl 512(%rdx), %xmm1, %xmm19 + +// CHECK: vcvtusi2sdl -512(%rdx), %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x77,0x08,0x7b,0x5a,0x80] + vcvtusi2sdl -512(%rdx), %xmm1, %xmm19 + +// CHECK: vcvtusi2sdl -516(%rdx), %xmm1, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x77,0x08,0x7b,0x9a,0xfc,0xfd,0xff,0xff] + vcvtusi2sdl -516(%rdx), %xmm1, %xmm19 + +// CHECK: vcvtusi2sdq %rax, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x00,0x7b,0xf0] + vcvtusi2sd %rax, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %rax, {rn-sae}, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x10,0x7b,0xf0] + vcvtusi2sd %rax, {rn-sae}, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %rax, {ru-sae}, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x50,0x7b,0xf0] + vcvtusi2sd %rax, {ru-sae}, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %rax, {rd-sae}, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x30,0x7b,0xf0] + vcvtusi2sd %rax, {rd-sae}, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %rax, {rz-sae}, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x70,0x7b,0xf0] + vcvtusi2sd %rax, {rz-sae}, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %r8, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x51,0xaf,0x00,0x7b,0xf0] + vcvtusi2sd %r8, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %r8, {rn-sae}, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x51,0xaf,0x10,0x7b,0xf0] + vcvtusi2sd %r8, {rn-sae}, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %r8, {ru-sae}, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x51,0xaf,0x50,0x7b,0xf0] + vcvtusi2sd %r8, {ru-sae}, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %r8, {rd-sae}, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x51,0xaf,0x30,0x7b,0xf0] + vcvtusi2sd %r8, {rd-sae}, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq %r8, {rz-sae}, %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x51,0xaf,0x70,0x7b,0xf0] + vcvtusi2sd %r8, {rz-sae}, %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq (%rcx), %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x00,0x7b,0x31] + vcvtusi2sdq (%rcx), %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq 291(%rax,%r14,8), %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x31,0xaf,0x00,0x7b,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtusi2sdq 291(%rax,%r14,8), %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq 1016(%rdx), %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x00,0x7b,0x72,0x7f] + vcvtusi2sdq 1016(%rdx), %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq 1024(%rdx), %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x00,0x7b,0xb2,0x00,0x04,0x00,0x00] + vcvtusi2sdq 1024(%rdx), %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq -1024(%rdx), %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x00,0x7b,0x72,0x80] + vcvtusi2sdq -1024(%rdx), %xmm26, %xmm14 + +// CHECK: vcvtusi2sdq -1032(%rdx), %xmm26, %xmm14 +// CHECK: encoding: [0x62,0x71,0xaf,0x00,0x7b,0xb2,0xf8,0xfb,0xff,0xff] + vcvtusi2sdq -1032(%rdx), %xmm26, %xmm14 + +// CHECK: vcvtusi2ssl %eax, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x00,0x7b,0xe8] + vcvtusi2ss %eax, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %eax, {rn-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x10,0x7b,0xe8] + vcvtusi2ss %eax, {rn-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %eax, {ru-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x50,0x7b,0xe8] + vcvtusi2ss %eax, {ru-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %eax, {rd-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x30,0x7b,0xe8] + vcvtusi2ss %eax, {rd-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %eax, {rz-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x70,0x7b,0xe8] + vcvtusi2ss %eax, {rz-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %ebp, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x00,0x7b,0xed] + vcvtusi2ss %ebp, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %ebp, {rn-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x10,0x7b,0xed] + vcvtusi2ss %ebp, {rn-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %ebp, {ru-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x50,0x7b,0xed] + vcvtusi2ss %ebp, {ru-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %ebp, {rd-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x30,0x7b,0xed] + vcvtusi2ss %ebp, {rd-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %ebp, {rz-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x70,0x7b,0xed] + vcvtusi2ss %ebp, {rz-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %r13d, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xd1,0x2e,0x00,0x7b,0xed] + vcvtusi2ss %r13d, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %r13d, {rn-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xd1,0x2e,0x10,0x7b,0xed] + vcvtusi2ss %r13d, {rn-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %r13d, {ru-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xd1,0x2e,0x50,0x7b,0xed] + vcvtusi2ss %r13d, {ru-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %r13d, {rd-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xd1,0x2e,0x30,0x7b,0xed] + vcvtusi2ss %r13d, {rd-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl %r13d, {rz-sae}, %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xd1,0x2e,0x70,0x7b,0xed] + vcvtusi2ss %r13d, {rz-sae}, %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl (%rcx), %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x00,0x7b,0x29] + vcvtusi2ssl (%rcx), %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl 291(%rax,%r14,8), %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xb1,0x2e,0x00,0x7b,0xac,0xf0,0x23,0x01,0x00,0x00] + vcvtusi2ssl 291(%rax,%r14,8), %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl 508(%rdx), %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x00,0x7b,0x6a,0x7f] + vcvtusi2ssl 508(%rdx), %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl 512(%rdx), %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x00,0x7b,0xaa,0x00,0x02,0x00,0x00] + vcvtusi2ssl 512(%rdx), %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl -512(%rdx), %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x00,0x7b,0x6a,0x80] + vcvtusi2ssl -512(%rdx), %xmm26, %xmm5 + +// CHECK: vcvtusi2ssl -516(%rdx), %xmm26, %xmm5 +// CHECK: encoding: [0x62,0xf1,0x2e,0x00,0x7b,0xaa,0xfc,0xfd,0xff,0xff] + vcvtusi2ssl -516(%rdx), %xmm26, %xmm5 + +// CHECK: vcvtusi2ssq %rax, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x00,0x7b,0xf0] + vcvtusi2ss %rax, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %rax, {rn-sae}, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x10,0x7b,0xf0] + vcvtusi2ss %rax, {rn-sae}, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %rax, {ru-sae}, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x50,0x7b,0xf0] + vcvtusi2ss %rax, {ru-sae}, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %rax, {rd-sae}, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x30,0x7b,0xf0] + vcvtusi2ss %rax, {rd-sae}, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %rax, {rz-sae}, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x70,0x7b,0xf0] + vcvtusi2ss %rax, {rz-sae}, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %r8, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x51,0xce,0x00,0x7b,0xf0] + vcvtusi2ss %r8, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %r8, {rn-sae}, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x51,0xce,0x10,0x7b,0xf0] + vcvtusi2ss %r8, {rn-sae}, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %r8, {ru-sae}, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x51,0xce,0x50,0x7b,0xf0] + vcvtusi2ss %r8, {ru-sae}, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %r8, {rd-sae}, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x51,0xce,0x30,0x7b,0xf0] + vcvtusi2ss %r8, {rd-sae}, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq %r8, {rz-sae}, %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x51,0xce,0x70,0x7b,0xf0] + vcvtusi2ss %r8, {rz-sae}, %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq (%rcx), %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x00,0x7b,0x31] + vcvtusi2ssq (%rcx), %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq 291(%rax,%r14,8), %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x31,0xce,0x00,0x7b,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtusi2ssq 291(%rax,%r14,8), %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq 1016(%rdx), %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x00,0x7b,0x72,0x7f] + vcvtusi2ssq 1016(%rdx), %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq 1024(%rdx), %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x00,0x7b,0xb2,0x00,0x04,0x00,0x00] + vcvtusi2ssq 1024(%rdx), %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq -1024(%rdx), %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x00,0x7b,0x72,0x80] + vcvtusi2ssq -1024(%rdx), %xmm22, %xmm14 + +// CHECK: vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14 +// CHECK: encoding: [0x62,0x71,0xce,0x00,0x7b,0xb2,0xf8,0xfb,0xff,0xff] + vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14