diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index af24ecea0c27..459463003c72 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1,6491 +1,6520 @@ //===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines all of the X86-specific intrinsics. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Interrupt traps let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_int : Intrinsic<[], [llvm_i8_ty]>; } //===----------------------------------------------------------------------===// // SEH intrinsics for Windows let TargetPrefix = "x86" in { def int_x86_seh_lsda : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>; // Marks the EH registration node created in LLVM IR prior to code generation. def int_x86_seh_ehregnode : Intrinsic<[], [llvm_ptr_ty], []>; // Marks the EH guard slot node created in LLVM IR prior to code generation. def int_x86_seh_ehguard : Intrinsic<[], [llvm_ptr_ty], []>; // Given a pointer to the end of an EH registration object, returns the true // parent frame address that can be used with llvm.localrecover. def int_x86_seh_recoverfp : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // FLAGS. let TargetPrefix = "x86" in { def int_x86_flags_read_u32 : GCCBuiltin<"__builtin_ia32_readeflags_u32">, Intrinsic<[llvm_i32_ty], [], []>; def int_x86_flags_read_u64 : GCCBuiltin<"__builtin_ia32_readeflags_u64">, Intrinsic<[llvm_i64_ty], [], []>; def int_x86_flags_write_u32 : GCCBuiltin<"__builtin_ia32_writeeflags_u32">, Intrinsic<[], [llvm_i32_ty], []>; def int_x86_flags_write_u64 : GCCBuiltin<"__builtin_ia32_writeeflags_u64">, Intrinsic<[], [llvm_i64_ty], []>; } //===----------------------------------------------------------------------===// // Read Time Stamp Counter. let TargetPrefix = "x86" in { def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">, Intrinsic<[llvm_i64_ty], [], []>; def int_x86_rdtscp : GCCBuiltin<"__builtin_ia32_rdtscp">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrArgMemOnly]>; } // Read Performance-Monitoring Counter. let TargetPrefix = "x86" in { def int_x86_rdpmc : GCCBuiltin<"__builtin_ia32_rdpmc">, Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; } +//===----------------------------------------------------------------------===// +// CET SS +let TargetPrefix = "x86" in { + def int_x86_incsspd : GCCBuiltin<"__builtin_ia32_incsspd">, + Intrinsic<[], [llvm_i32_ty], []>; + def int_x86_incsspq : GCCBuiltin<"__builtin_ia32_incsspq">, + Intrinsic<[], [llvm_i64_ty], []>; + def int_x86_rdsspd : GCCBuiltin<"__builtin_ia32_rdsspd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + def int_x86_rdsspq : GCCBuiltin<"__builtin_ia32_rdsspq">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; + def int_x86_saveprevssp : GCCBuiltin<"__builtin_ia32_saveprevssp">, + Intrinsic<[], [], []>; + def int_x86_rstorssp : GCCBuiltin<"__builtin_ia32_rstorssp">, + Intrinsic<[], [llvm_ptr_ty], []>; + def int_x86_wrssd : GCCBuiltin<"__builtin_ia32_wrssd">, + Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; + def int_x86_wrssq : GCCBuiltin<"__builtin_ia32_wrssq">, + Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], []>; + def int_x86_wrussd : GCCBuiltin<"__builtin_ia32_wrussd">, + Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; + def int_x86_wrussq : GCCBuiltin<"__builtin_ia32_wrussq">, + Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty], []>; + def int_x86_setssbsy : GCCBuiltin<"__builtin_ia32_setssbsy">, + Intrinsic<[], [], []>; + def int_x86_clrssbsy : GCCBuiltin<"__builtin_ia32_clrssbsy">, + Intrinsic<[], [llvm_ptr_ty], []>; +} + //===----------------------------------------------------------------------===// // 3DNow! let TargetPrefix = "x86" in { def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // 3DNow! extensions let TargetPrefix = "x86" in { def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_3dnowa_pswapd : Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // SSE1 // Arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_sqrt_ps : GCCBuiltin<"__builtin_ia32_sqrtps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_rsqrt_ps : GCCBuiltin<"__builtin_ia32_rsqrtps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_min_ss : GCCBuiltin<"__builtin_ia32_minss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_min_ps : GCCBuiltin<"__builtin_ia32_minps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_max_ss : GCCBuiltin<"__builtin_ia32_maxss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_max_ps : GCCBuiltin<"__builtin_ia32_maxps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; } // Comparison ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse_cmp_ps : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_comilt_ss : GCCBuiltin<"__builtin_ia32_comilt">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_comile_ss : GCCBuiltin<"__builtin_ia32_comile">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_comigt_ss : GCCBuiltin<"__builtin_ia32_comigt">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_comige_ss : GCCBuiltin<"__builtin_ia32_comige">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_comineq_ss : GCCBuiltin<"__builtin_ia32_comineq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_ucomieq_ss : GCCBuiltin<"__builtin_ia32_ucomieq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_ucomilt_ss : GCCBuiltin<"__builtin_ia32_ucomilt">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_ucomile_ss : GCCBuiltin<"__builtin_ia32_ucomile">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_ucomigt_ss : GCCBuiltin<"__builtin_ia32_ucomigt">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_ucomige_ss : GCCBuiltin<"__builtin_ia32_ucomige">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_ucomineq_ss : GCCBuiltin<"__builtin_ia32_ucomineq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; } // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_cvtss2si : GCCBuiltin<"__builtin_ia32_cvtss2si">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvtss2si64 : GCCBuiltin<"__builtin_ia32_cvtss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvttss2si : GCCBuiltin<"__builtin_ia32_cvttss2si">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvtsi2ss : // TODO: Remove this intrinsic. Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse_cvtsi642ss : // TODO: Remove this intrinsic. Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_x86mmx_ty], [IntrNoMem]>; } // Cacheability support ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">, Intrinsic<[], [], []>; } // Control register. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_stmxcsr : Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_sse_ldmxcsr : Intrinsic<[], [llvm_ptr_ty], []>; } // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_movmsk_ps : GCCBuiltin<"__builtin_ia32_movmskps">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // SSE2 // FP arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_sqrt_pd : GCCBuiltin<"__builtin_ia32_sqrtpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_max_pd : GCCBuiltin<"__builtin_ia32_maxpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; } // FP comparison ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse2_cmp_pd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_comilt_sd : GCCBuiltin<"__builtin_ia32_comisdlt">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_comile_sd : GCCBuiltin<"__builtin_ia32_comisdle">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_comigt_sd : GCCBuiltin<"__builtin_ia32_comisdgt">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_comige_sd : GCCBuiltin<"__builtin_ia32_comisdge">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_comineq_sd : GCCBuiltin<"__builtin_ia32_comisdneq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_ucomieq_sd : GCCBuiltin<"__builtin_ia32_ucomisdeq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_ucomilt_sd : GCCBuiltin<"__builtin_ia32_ucomisdlt">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_ucomile_sd : GCCBuiltin<"__builtin_ia32_ucomisdle">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_ucomigt_sd : GCCBuiltin<"__builtin_ia32_ucomisdgt">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_ucomige_sd : GCCBuiltin<"__builtin_ia32_ucomisdge">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_ucomineq_sd : GCCBuiltin<"__builtin_ia32_ucomisdneq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; } // Integer arithmetic ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, Commutative]>; } // Integer shift ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; } // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvttsd2si : GCCBuiltin<"__builtin_ia32_cvttsd2si">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvtsi2sd : // TODO: Remove this intrinsic. Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_cvtsi642sd : // TODO: Remove this intrinsic. Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_cvtss2sd : // TODO: Remove this intrinsic. Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">, Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse_cvtpi2pd : GCCBuiltin<"__builtin_ia32_cvtpi2pd">, Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>; } // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">, Intrinsic<[], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_ptr_ty], []>; def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">, Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">, Intrinsic<[], [], []>; def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">, Intrinsic<[], [], []>; def int_x86_sse2_pause : GCCBuiltin<"__builtin_ia32_pause">, Intrinsic<[], [], []>; } //===----------------------------------------------------------------------===// // SSE3 // Addition / subtraction ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse3_addsub_ps : GCCBuiltin<"__builtin_ia32_addsubps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse3_addsub_pd : GCCBuiltin<"__builtin_ia32_addsubpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; } // Horizontal ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse3_hadd_ps : GCCBuiltin<"__builtin_ia32_haddps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse3_hadd_pd : GCCBuiltin<"__builtin_ia32_haddpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse3_hsub_ps : GCCBuiltin<"__builtin_ia32_hsubps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse3_hsub_pd : GCCBuiltin<"__builtin_ia32_hsubpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; } // Specialized unaligned load. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse3_ldu_dq : GCCBuiltin<"__builtin_ia32_lddqu">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>; } // Thread synchronization ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">, Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; } //===----------------------------------------------------------------------===// // SSSE3 // Horizontal arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_phadd_w_128 : GCCBuiltin<"__builtin_ia32_phaddw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_ssse3_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_phadd_d_128 : GCCBuiltin<"__builtin_ia32_phaddd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_ssse3_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_phadd_sw_128 : GCCBuiltin<"__builtin_ia32_phaddsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_ssse3_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_phsub_w_128 : GCCBuiltin<"__builtin_ia32_phsubw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_ssse3_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_phsub_d_128 : GCCBuiltin<"__builtin_ia32_phsubd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_ssse3_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_phsub_sw_128 : GCCBuiltin<"__builtin_ia32_phsubsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_ssse3_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; } // Packed multiply high with round and scale let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_ssse3_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, Commutative]>; } // Shuffle ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>; } // Sign ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_psign_b : GCCBuiltin<"__builtin_ia32_psignb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_psign_b_128 : GCCBuiltin<"__builtin_ia32_psignb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_ssse3_psign_w : GCCBuiltin<"__builtin_ia32_psignw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_psign_w_128 : GCCBuiltin<"__builtin_ia32_psignw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_ssse3_psign_d : GCCBuiltin<"__builtin_ia32_psignd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_psign_d_128 : GCCBuiltin<"__builtin_ia32_psignd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; } // Absolute value ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // SSE4.1 // FP rounding ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; } // Vector min element let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_phminposuw : GCCBuiltin<"__builtin_ia32_phminposuw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; } // Advanced Encryption Standard (AES) Instructions let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_aesni_aesimc : GCCBuiltin<"__builtin_ia32_aesimc128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_x86_aesni_aesenc : GCCBuiltin<"__builtin_ia32_aesenc128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_aesni_aesenc_256 : GCCBuiltin<"__builtin_ia32_aesenc256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_aesni_aesenc_512 : GCCBuiltin<"__builtin_ia32_aesenc512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_aesni_aesenclast : GCCBuiltin<"__builtin_ia32_aesenclast128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_aesni_aesenclast_256 : GCCBuiltin<"__builtin_ia32_aesenclast256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_aesni_aesenclast_512 : GCCBuiltin<"__builtin_ia32_aesenclast512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_aesni_aesdec : GCCBuiltin<"__builtin_ia32_aesdec128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_aesni_aesdec_256 : GCCBuiltin<"__builtin_ia32_aesdec256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_aesni_aesdec_512 : GCCBuiltin<"__builtin_ia32_aesdec512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_aesni_aesdeclast_256 : GCCBuiltin<"__builtin_ia32_aesdeclast256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_aesni_aesdeclast_512 : GCCBuiltin<"__builtin_ia32_aesdeclast512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_aesni_aeskeygenassist : GCCBuiltin<"__builtin_ia32_aeskeygenassist128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; } // PCLMUL instructions let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_pclmulqdq_256 : GCCBuiltin<"__builtin_ia32_pclmulqdq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_pclmulqdq_512 : GCCBuiltin<"__builtin_ia32_pclmulqdq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Vector pack let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_packusdw : GCCBuiltin<"__builtin_ia32_packusdw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; } // Vector multiply let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pmuldq : GCCBuiltin<"__builtin_ia32_pmuldq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem, Commutative]>; } // Vector insert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; } // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse41_blendvps : GCCBuiltin<"__builtin_ia32_blendvps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_v4f32_ty], [IntrNoMem]>; } // Vector dot product let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, Commutative]>; def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, Commutative]>; } // Vector sum of absolute differences let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty], [IntrNoMem, Commutative]>; } // Test instruction with bitwise comparison. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">, Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // SSE4.2 // Miscellaneous // CRC Instruction let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse42_crc32_32_8 : GCCBuiltin<"__builtin_ia32_crc32qi">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_crc32_32_16 : GCCBuiltin<"__builtin_ia32_crc32hi">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_sse42_crc32_32_32 : GCCBuiltin<"__builtin_ia32_crc32si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_sse42_crc32_64_64 : GCCBuiltin<"__builtin_ia32_crc32di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } // String/text processing ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse42_pcmpistrm128 : GCCBuiltin<"__builtin_ia32_pcmpistrm128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpistri128 : GCCBuiltin<"__builtin_ia32_pcmpistri128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpestrm128 : GCCBuiltin<"__builtin_ia32_pcmpestrm128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpestri128 : GCCBuiltin<"__builtin_ia32_pcmpestri128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // SSE4A let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse4a_extrq : GCCBuiltin<"__builtin_ia32_extrq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // AVX // Arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_addsub_pd_256 : GCCBuiltin<"__builtin_ia32_addsubpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_addsub_ps_256 : GCCBuiltin<"__builtin_ia32_addsubps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>; } // Horizontal ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_hadd_pd_256 : GCCBuiltin<"__builtin_ia32_haddpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_hsub_ps_256 : GCCBuiltin<"__builtin_ia32_hsubps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_hsub_pd_256 : GCCBuiltin<"__builtin_ia32_hsubpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_hadd_ps_256 : GCCBuiltin<"__builtin_ia32_haddps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; } // Vector permutation let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_vpermilvar_pd : GCCBuiltin<"__builtin_ia32_vpermilvarpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx_vpermilvar_ps : GCCBuiltin<"__builtin_ia32_vpermilvarps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx_vpermilvar_pd_256 : GCCBuiltin<"__builtin_ia32_vpermilvarpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx_vpermilvar_ps_256 : GCCBuiltin<"__builtin_ia32_vpermilvarps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_d_128 : GCCBuiltin<"__builtin_ia32_vpermi2vard128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_d_256 : GCCBuiltin<"__builtin_ia32_vpermi2vard256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_d_512 : GCCBuiltin<"__builtin_ia32_vpermi2vard512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_hi_128 : GCCBuiltin<"__builtin_ia32_vpermi2varhi128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_hi_256 : GCCBuiltin<"__builtin_ia32_vpermi2varhi256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_hi_512 : GCCBuiltin<"__builtin_ia32_vpermi2varhi512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_pd_128 : GCCBuiltin<"__builtin_ia32_vpermi2varpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_pd_256 : GCCBuiltin<"__builtin_ia32_vpermi2varpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_pd_512 : GCCBuiltin<"__builtin_ia32_vpermi2varpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_ps_128 : GCCBuiltin<"__builtin_ia32_vpermi2varps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_ps_256 : GCCBuiltin<"__builtin_ia32_vpermi2varps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_ps_512 : GCCBuiltin<"__builtin_ia32_vpermi2varps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_q_128 : GCCBuiltin<"__builtin_ia32_vpermi2varq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_q_256 : GCCBuiltin<"__builtin_ia32_vpermi2varq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_q_512 : GCCBuiltin<"__builtin_ia32_vpermi2varq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_d_512: GCCBuiltin<"__builtin_ia32_vpermt2vard512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_q_512: GCCBuiltin<"__builtin_ia32_vpermt2varq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_ps_512: GCCBuiltin<"__builtin_ia32_vpermt2varps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_pd_512: GCCBuiltin<"__builtin_ia32_vpermt2varpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_d_128 : GCCBuiltin<"__builtin_ia32_vpermt2vard128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_d_128 : GCCBuiltin<"__builtin_ia32_vpermt2vard128_maskz">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_d_256 : GCCBuiltin<"__builtin_ia32_vpermt2vard256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_d_256 : GCCBuiltin<"__builtin_ia32_vpermt2vard256_maskz">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_d_512 : GCCBuiltin<"__builtin_ia32_vpermt2vard512_maskz">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_hi_128 : GCCBuiltin<"__builtin_ia32_vpermt2varhi128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_hi_128 : GCCBuiltin<"__builtin_ia32_vpermt2varhi128_maskz">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_hi_256 : GCCBuiltin<"__builtin_ia32_vpermt2varhi256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_hi_256 : GCCBuiltin<"__builtin_ia32_vpermt2varhi256_maskz">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_hi_512 : GCCBuiltin<"__builtin_ia32_vpermt2varhi512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_hi_512 : GCCBuiltin<"__builtin_ia32_vpermt2varhi512_maskz">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_pd_128 : GCCBuiltin<"__builtin_ia32_vpermt2varpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_pd_128 : GCCBuiltin<"__builtin_ia32_vpermt2varpd128_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_pd_256 : GCCBuiltin<"__builtin_ia32_vpermt2varpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_pd_256 : GCCBuiltin<"__builtin_ia32_vpermt2varpd256_maskz">, Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_pd_512 : GCCBuiltin<"__builtin_ia32_vpermt2varpd512_maskz">, Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_ps_128 : GCCBuiltin<"__builtin_ia32_vpermt2varps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_ps_128 : GCCBuiltin<"__builtin_ia32_vpermt2varps128_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_ps_256 : GCCBuiltin<"__builtin_ia32_vpermt2varps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_ps_256 : GCCBuiltin<"__builtin_ia32_vpermt2varps256_maskz">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_ps_512 : GCCBuiltin<"__builtin_ia32_vpermt2varps512_maskz">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_q_128 : GCCBuiltin<"__builtin_ia32_vpermt2varq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_q_128 : GCCBuiltin<"__builtin_ia32_vpermt2varq128_maskz">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_q_256 : GCCBuiltin<"__builtin_ia32_vpermt2varq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_q_256 : GCCBuiltin<"__builtin_ia32_vpermt2varq256_maskz">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_q_512 : GCCBuiltin<"__builtin_ia32_vpermt2varq512_maskz">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_qi_128 : GCCBuiltin<"__builtin_ia32_vpermi2varqi128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_qi_128 : GCCBuiltin<"__builtin_ia32_vpermt2varqi128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_qi_128 : GCCBuiltin<"__builtin_ia32_vpermt2varqi128_maskz">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_qi_256 : GCCBuiltin<"__builtin_ia32_vpermi2varqi256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_qi_256 : GCCBuiltin<"__builtin_ia32_vpermt2varqi256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_qi_256 : GCCBuiltin<"__builtin_ia32_vpermt2varqi256_maskz">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermi2var_qi_512 : GCCBuiltin<"__builtin_ia32_vpermi2varqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpermt2var_qi_512 : GCCBuiltin<"__builtin_ia32_vpermt2varqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpermt2var_qi_512 : GCCBuiltin<"__builtin_ia32_vpermt2varqi512_maskz">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_vpermilvar_pd_512 : GCCBuiltin<"__builtin_ia32_vpermilvarpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_avx512_vpermilvar_ps_512 : GCCBuiltin<"__builtin_ia32_vpermilvarps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_pshuf_b_512 : GCCBuiltin<"__builtin_ia32_pshufb512">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; } // GFNI Instructions let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_vgf2p8affineinvqb_128 : GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_vgf2p8affineinvqb_256 : GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_vgf2p8affineinvqb_512 : GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_vgf2p8affineqb_128 : GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_vgf2p8affineqb_256 : GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_vgf2p8affineqb_512 : GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_vgf2p8mulb_128 : GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_vgf2p8mulb_256 : GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v32qi">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; def int_x86_vgf2p8mulb_512 : GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v64qi">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem]>; } // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_blendv_ps_256 : GCCBuiltin<"__builtin_ia32_blendvps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; } // Vector dot product let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, Commutative]>; } // Vector compare let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_cmp_pd_256 : Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx_cmp_ps_256 : Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; } // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; } // Vector bit test let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_vtestz_pd : GCCBuiltin<"__builtin_ia32_vtestzpd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_avx_vtestc_pd : GCCBuiltin<"__builtin_ia32_vtestcpd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_avx_vtestnzc_pd : GCCBuiltin<"__builtin_ia32_vtestnzcpd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_avx_vtestz_ps : GCCBuiltin<"__builtin_ia32_vtestzps">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx_vtestc_ps : GCCBuiltin<"__builtin_ia32_vtestcps">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx_vtestnzc_ps : GCCBuiltin<"__builtin_ia32_vtestnzcps">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx_vtestz_pd_256 : GCCBuiltin<"__builtin_ia32_vtestzpd256">, Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_vtestc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestcpd256">, Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_vtestnzc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestnzcpd256">, Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_vtestz_ps_256 : GCCBuiltin<"__builtin_ia32_vtestzps256">, Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_vtestc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestcps256">, Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_vtestnzc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestnzcps256">, Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx_ptestz_256 : GCCBuiltin<"__builtin_ia32_ptestz256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx_ptestc_256 : GCCBuiltin<"__builtin_ia32_ptestc256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">, Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_pd_128 : GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_pd_256 : GCCBuiltin<"__builtin_ia32_fpclasspd256_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_pd_512 : GCCBuiltin<"__builtin_ia32_fpclasspd512_mask">, Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_ps_128 : GCCBuiltin<"__builtin_ia32_fpclassps128_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_ps_256 : GCCBuiltin<"__builtin_ia32_fpclassps256_mask">, Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_ps_512 : GCCBuiltin<"__builtin_ia32_fpclassps512_mask">, Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_sd : GCCBuiltin<"__builtin_ia32_fpclasssd_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fpclass_ss : GCCBuiltin<"__builtin_ia32_fpclassss_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; } // Vector extract sign mask let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_movmsk_pd_256 : GCCBuiltin<"__builtin_ia32_movmskpd256">, Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx_movmsk_ps_256 : GCCBuiltin<"__builtin_ia32_movmskps256">, Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; } // Vector zero let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_vzeroall : GCCBuiltin<"__builtin_ia32_vzeroall">, Intrinsic<[], [], []>; def int_x86_avx_vzeroupper : GCCBuiltin<"__builtin_ia32_vzeroupper">, Intrinsic<[], [], []>; } // SIMD load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">, Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>; } // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">, Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">, Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">, Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem, IntrArgMemOnly]>; } // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty], [IntrArgMemOnly]>; def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty], [IntrArgMemOnly]>; def int_x86_avx_maskstore_pd_256 : GCCBuiltin<"__builtin_ia32_maskstorepd256">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty], [IntrArgMemOnly]>; def int_x86_avx_maskstore_ps_256 : GCCBuiltin<"__builtin_ia32_maskstoreps256">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_store_ss : GCCBuiltin<"__builtin_ia32_storess_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrArgMemOnly]>; } // BITALG bits shuffle let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_vpshufbitqmb_128 : GCCBuiltin<"__builtin_ia32_vpshufbitqmb128_mask">, Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshufbitqmb_256 : GCCBuiltin<"__builtin_ia32_vpshufbitqmb256_mask">, Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshufbitqmb_512 : GCCBuiltin<"__builtin_ia32_vpshufbitqmb512_mask">, Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // AVX2 // Integer arithmetic ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_pmul_dq : GCCBuiltin<"__builtin_ia32_pmuldq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">, Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem, Commutative]>; } // Integer shift ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_psrai_q_128 : GCCBuiltin<"__builtin_ia32_psraqi128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psrai_q_256 : GCCBuiltin<"__builtin_ia32_psraqi256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx512_psll_d_512 : GCCBuiltin<"__builtin_ia32_pslld512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_psll_q_512 : GCCBuiltin<"__builtin_ia32_psllq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx512_psrl_d_512 : GCCBuiltin<"__builtin_ia32_psrld512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_psrl_q_512 : GCCBuiltin<"__builtin_ia32_psrlq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx512_psra_d_512 : GCCBuiltin<"__builtin_ia32_psrad512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_psra_q_512 : GCCBuiltin<"__builtin_ia32_psraq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_pslli_w_512 : GCCBuiltin<"__builtin_ia32_psllwi512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_pslli_d_512 : GCCBuiltin<"__builtin_ia32_pslldi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_pslli_q_512 : GCCBuiltin<"__builtin_ia32_psllqi512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psrli_w_512 : GCCBuiltin<"__builtin_ia32_psrlwi512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psrli_d_512 : GCCBuiltin<"__builtin_ia32_psrldi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psrli_q_512 : GCCBuiltin<"__builtin_ia32_psrlqi512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psrai_w_512 : GCCBuiltin<"__builtin_ia32_psrawi512">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psrai_d_512 : GCCBuiltin<"__builtin_ia32_psradi512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psrai_q_512 : GCCBuiltin<"__builtin_ia32_psraqi512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_128: GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_256: GCCBuiltin<"__builtin_ia32_vpmultishiftqb256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_512: GCCBuiltin<"__builtin_ia32_vpmultishiftqb512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; } // Pack ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; } // Horizontal arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; } // Sign ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; } // Packed multiply high with round and scale let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem, Commutative]>; def int_x86_avx512_mask_pmul_hr_sw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmul_hr_sw_256 : GCCBuiltin<"__builtin_ia32_pmulhrsw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmul_hr_sw_512 : GCCBuiltin<"__builtin_ia32_pmulhrsw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; } // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; } // Vector permutation let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>; } // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">, Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">, Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">, Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">, Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem, IntrArgMemOnly]>; } // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrArgMemOnly]>; def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrArgMemOnly]>; def int_x86_avx2_maskstore_d_256 : GCCBuiltin<"__builtin_ia32_maskstored256">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrArgMemOnly]>; def int_x86_avx2_maskstore_q_256 : GCCBuiltin<"__builtin_ia32_maskstoreq256">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrArgMemOnly]>; } // Variable bit shift ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_psllv_d_512 : GCCBuiltin<"__builtin_ia32_psllv16si">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_psllv_q_512 : GCCBuiltin<"__builtin_ia32_psllv8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_psrlv_d_512 : GCCBuiltin<"__builtin_ia32_psrlv16si">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_psrlv_q_512 : GCCBuiltin<"__builtin_ia32_psrlv8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_psrav_d_512 : GCCBuiltin<"__builtin_ia32_psrav16si">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_psrav_q_128 : GCCBuiltin<"__builtin_ia32_psravq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_psrav_q_256 : GCCBuiltin<"__builtin_ia32_psravq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_psrav_q_512 : GCCBuiltin<"__builtin_ia32_psrav8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty], [IntrNoMem]>; def int_x86_avx512_psllv_w_128 : GCCBuiltin<"__builtin_ia32_psllv8hi">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx512_psllv_w_256 : GCCBuiltin<"__builtin_ia32_psllv16hi">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx512_psllv_w_512 : GCCBuiltin<"__builtin_ia32_psllv32hi">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; def int_x86_avx512_psrlv_w_128 : GCCBuiltin<"__builtin_ia32_psrlv8hi">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx512_psrlv_w_256 : GCCBuiltin<"__builtin_ia32_psrlv16hi">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx512_psrlv_w_512 : GCCBuiltin<"__builtin_ia32_psrlv32hi">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; def int_x86_avx512_psrav_w_128 : GCCBuiltin<"__builtin_ia32_psrav8hi">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx512_psrav_w_256 : GCCBuiltin<"__builtin_ia32_psrav16hi">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128_mask">, Intrinsic<[llvm_v4i32_ty] , [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256_mask">, Intrinsic<[llvm_v8i32_ty] , [llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512_mask">, Intrinsic<[llvm_v16i32_ty] , [llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128_mask">, Intrinsic<[llvm_v2i64_ty] , [llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256_mask">, Intrinsic<[llvm_v4i64_ty] , [llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512_mask">, Intrinsic<[llvm_v8i64_ty] , [llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Gather ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; } // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem, Commutative]>; } //===----------------------------------------------------------------------===// // FMA3 and FMA4 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_fma_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_fma_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_fma_vfmsub_ss : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfmsub_sd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfmsub_ps : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfmsub_pd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfmsub_ps_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_fma_vfmsub_pd_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_fma_vfnmadd_ss : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfnmadd_sd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfnmadd_ps : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfnmadd_pd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfnmadd_ps_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_fma_vfnmadd_pd_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_fma_vfnmsub_ss : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfnmsub_sd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfnmsub_ps : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfnmsub_pd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfnmsub_ps_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_fma_vfnmsub_pd_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_fma_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_fma_vfmsubadd_ps : // TODO: remove this intrinsic Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_x86_fma_vfmsubadd_pd : // TODO: remove this intrinsic Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_fma_vfmsubadd_ps_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; def int_x86_fma_vfmsubadd_pd_256 : // TODO: remove this intrinsic Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask3">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_maskz">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask3">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_maskz">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_mask3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_mask3">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_maskz">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_mask3">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_maskz">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask3">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_maskz">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask3">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_maskz">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask3">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_maskz">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask3">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_maskz">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd3_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss3_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd3_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss3_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd3_mask3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss3_mask3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd3_mask3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss3_mask3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask3">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask3">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubps128_mask3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256_mask3">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512_mask3">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsubadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsubadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask3">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask3">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsubadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsubadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask3">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask3">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd3_mask3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss3_mask3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfnmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask3">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask3">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask3">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfnmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask3">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask3">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask3_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask3">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpmadd52h_uq_128 : GCCBuiltin<"__builtin_ia32_vpmadd52huq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpmadd52h_uq_128 : GCCBuiltin<"__builtin_ia32_vpmadd52huq128_maskz">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpmadd52l_uq_128 : GCCBuiltin<"__builtin_ia32_vpmadd52luq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpmadd52l_uq_128 : GCCBuiltin<"__builtin_ia32_vpmadd52luq128_maskz">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpmadd52h_uq_256 : GCCBuiltin<"__builtin_ia32_vpmadd52huq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpmadd52h_uq_256 : GCCBuiltin<"__builtin_ia32_vpmadd52huq256_maskz">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpmadd52l_uq_256 : GCCBuiltin<"__builtin_ia32_vpmadd52luq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpmadd52l_uq_256 : GCCBuiltin<"__builtin_ia32_vpmadd52luq256_maskz">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpmadd52h_uq_512 : GCCBuiltin<"__builtin_ia32_vpmadd52huq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpmadd52h_uq_512 : GCCBuiltin<"__builtin_ia32_vpmadd52huq512_maskz">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpmadd52l_uq_512 : GCCBuiltin<"__builtin_ia32_vpmadd52luq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpmadd52l_uq_512 : GCCBuiltin<"__builtin_ia32_vpmadd52luq512_maskz">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // VNNI let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_vpdpbusd_128 : GCCBuiltin<"__builtin_ia32_vpdpbusd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpbusd_128 : GCCBuiltin<"__builtin_ia32_vpdpbusd128_maskz">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpbusd_256 : GCCBuiltin<"__builtin_ia32_vpdpbusd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpbusd_256 : GCCBuiltin<"__builtin_ia32_vpdpbusd256_maskz">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpbusd_512 : GCCBuiltin<"__builtin_ia32_vpdpbusd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpbusd_512 : GCCBuiltin<"__builtin_ia32_vpdpbusd512_maskz">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpbusds_128 : GCCBuiltin<"__builtin_ia32_vpdpbusds128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpbusds_128 : GCCBuiltin<"__builtin_ia32_vpdpbusds128_maskz">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpbusds_256 : GCCBuiltin<"__builtin_ia32_vpdpbusds256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpbusds_256 : GCCBuiltin<"__builtin_ia32_vpdpbusds256_maskz">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpbusds_512 : GCCBuiltin<"__builtin_ia32_vpdpbusds512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpbusds_512 : GCCBuiltin<"__builtin_ia32_vpdpbusds512_maskz">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpwssd_128 : GCCBuiltin<"__builtin_ia32_vpdpwssd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpwssd_128 : GCCBuiltin<"__builtin_ia32_vpdpwssd128_maskz">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpwssd_256 : GCCBuiltin<"__builtin_ia32_vpdpwssd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpwssd_256 : GCCBuiltin<"__builtin_ia32_vpdpwssd256_maskz">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpwssd_512 : GCCBuiltin<"__builtin_ia32_vpdpwssd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpwssd_512 : GCCBuiltin<"__builtin_ia32_vpdpwssd512_maskz">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpwssds_128 : GCCBuiltin<"__builtin_ia32_vpdpwssds128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpwssds_128 : GCCBuiltin<"__builtin_ia32_vpdpwssds128_maskz">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpwssds_256 : GCCBuiltin<"__builtin_ia32_vpdpwssds256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpwssds_256 : GCCBuiltin<"__builtin_ia32_vpdpwssds256_maskz">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpdpwssds_512 : GCCBuiltin<"__builtin_ia32_vpdpwssds512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpdpwssds_512 : GCCBuiltin<"__builtin_ia32_vpdpwssds512_maskz">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // XOP let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpermil2pd_256 : GCCBuiltin<"__builtin_ia32_vpermil2pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpermil2ps_256 : GCCBuiltin<"__builtin_ia32_vpermil2ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpcomw : GCCBuiltin<"__builtin_ia32_vpcomw">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpcomd : GCCBuiltin<"__builtin_ia32_vpcomd">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpcomq : GCCBuiltin<"__builtin_ia32_vpcomq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpcomub : GCCBuiltin<"__builtin_ia32_vpcomub">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpcomuw : GCCBuiltin<"__builtin_ia32_vpcomuw">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpcomud : GCCBuiltin<"__builtin_ia32_vpcomud">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpcomuq : GCCBuiltin<"__builtin_ia32_vpcomuq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddbd : GCCBuiltin<"__builtin_ia32_vphaddbd">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddbq : GCCBuiltin<"__builtin_ia32_vphaddbq">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddbw : GCCBuiltin<"__builtin_ia32_vphaddbw">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphadddq : GCCBuiltin<"__builtin_ia32_vphadddq">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vphaddubd : GCCBuiltin<"__builtin_ia32_vphaddubd">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddubq : GCCBuiltin<"__builtin_ia32_vphaddubq">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddubw : GCCBuiltin<"__builtin_ia32_vphaddubw">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphaddudq : GCCBuiltin<"__builtin_ia32_vphaddudq">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vphadduwd : GCCBuiltin<"__builtin_ia32_vphadduwd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vphadduwq : GCCBuiltin<"__builtin_ia32_vphadduwq">, Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vphaddwd : GCCBuiltin<"__builtin_ia32_vphaddwd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vphaddwq : GCCBuiltin<"__builtin_ia32_vphaddwq">, Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vphsubbw : GCCBuiltin<"__builtin_ia32_vphsubbw">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vphsubdq : GCCBuiltin<"__builtin_ia32_vphsubdq">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vphsubwd : GCCBuiltin<"__builtin_ia32_vphsubwd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vpmacsdd : GCCBuiltin<"__builtin_ia32_vpmacsdd">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpmacsdqh : GCCBuiltin<"__builtin_ia32_vpmacsdqh">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vpmacsdql : GCCBuiltin<"__builtin_ia32_vpmacsdql">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vpmacssdd : GCCBuiltin<"__builtin_ia32_vpmacssdd">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpmacssdqh : GCCBuiltin<"__builtin_ia32_vpmacssdqh">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vpmacssdql : GCCBuiltin<"__builtin_ia32_vpmacssdql">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vpmacsswd : GCCBuiltin<"__builtin_ia32_vpmacsswd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpmacssww : GCCBuiltin<"__builtin_ia32_vpmacssww">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vpmacswd : GCCBuiltin<"__builtin_ia32_vpmacswd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpmacsww : GCCBuiltin<"__builtin_ia32_vpmacsww">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vpmadcsswd : GCCBuiltin<"__builtin_ia32_vpmadcsswd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpmadcswd : GCCBuiltin<"__builtin_ia32_vpmadcswd">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpperm : GCCBuiltin<"__builtin_ia32_vpperm">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vprotb : GCCBuiltin<"__builtin_ia32_vprotb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vprotd : GCCBuiltin<"__builtin_ia32_vprotd">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vprotq : GCCBuiltin<"__builtin_ia32_vprotq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vprotw : GCCBuiltin<"__builtin_ia32_vprotw">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vprotbi : GCCBuiltin<"__builtin_ia32_vprotbi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vprotdi : GCCBuiltin<"__builtin_ia32_vprotdi">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vprotqi : GCCBuiltin<"__builtin_ia32_vprotqi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vprotwi : GCCBuiltin<"__builtin_ia32_vprotwi">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_xop_vpshab : GCCBuiltin<"__builtin_ia32_vpshab">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vpshad : GCCBuiltin<"__builtin_ia32_vpshad">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpshaq : GCCBuiltin<"__builtin_ia32_vpshaq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vpshaw : GCCBuiltin<"__builtin_ia32_vpshaw">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_xop_vpshlb : GCCBuiltin<"__builtin_ia32_vpshlb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_xop_vpshld : GCCBuiltin<"__builtin_ia32_vpshld">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_xop_vpshlq : GCCBuiltin<"__builtin_ia32_vpshlq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_xop_vpshlw : GCCBuiltin<"__builtin_ia32_vpshlw">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // LWP let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_llwpcb : GCCBuiltin<"__builtin_ia32_llwpcb">, Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_slwpcb : GCCBuiltin<"__builtin_ia32_slwpcb">, Intrinsic<[llvm_ptr_ty], [], []>; def int_x86_lwpins32 : GCCBuiltin<"__builtin_ia32_lwpins32">, Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_lwpins64 : GCCBuiltin<"__builtin_ia32_lwpins64">, Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_lwpval32 : GCCBuiltin<"__builtin_ia32_lwpval32">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_lwpval64 : GCCBuiltin<"__builtin_ia32_lwpval64">, Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; } //===----------------------------------------------------------------------===// // MMX // Empty MMX state op. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_emms : GCCBuiltin<"__builtin_ia32_emms">, Intrinsic<[], [], []>; def int_x86_mmx_femms : GCCBuiltin<"__builtin_ia32_femms">, Intrinsic<[], [], []>; } // Integer arithmetic ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Addition def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Subtraction def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; // Multiplication def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Bitwise operations def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Averages def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Maximum def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Minimum def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; // Packed sum of absolute differences def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; } // Integer shift ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Shift left logical def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; } // Permute let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_permvar_df_256 : GCCBuiltin<"__builtin_ia32_permvardf256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_df_512 : GCCBuiltin<"__builtin_ia32_permvardf512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_di_256 : GCCBuiltin<"__builtin_ia32_permvardi256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_di_512 : GCCBuiltin<"__builtin_ia32_permvardi512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_hi_128 : GCCBuiltin<"__builtin_ia32_permvarhi128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_hi_256 : GCCBuiltin<"__builtin_ia32_permvarhi256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_hi_512 : GCCBuiltin<"__builtin_ia32_permvarhi512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_qi_128 : GCCBuiltin<"__builtin_ia32_permvarqi128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_qi_256 : GCCBuiltin<"__builtin_ia32_permvarqi256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_qi_512 : GCCBuiltin<"__builtin_ia32_permvarqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_sf_256 : GCCBuiltin<"__builtin_ia32_permvarsf256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_sf_512 : GCCBuiltin<"__builtin_ia32_permvarsf512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_si_256 : GCCBuiltin<"__builtin_ia32_permvarsi256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_permvar_si_512 : GCCBuiltin<"__builtin_ia32_permvarsi512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; } // Pack ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_packsswb : GCCBuiltin<"__builtin_ia32_packsswb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_packssdw : GCCBuiltin<"__builtin_ia32_packssdw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_packuswb : GCCBuiltin<"__builtin_ia32_packuswb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; } // Unpacking ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; } // Integer comparison ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; } // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">, Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>; def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">, Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">, Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>; def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">, Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // BMI let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_bmi_bextr_32 : GCCBuiltin<"__builtin_ia32_bextr_u32">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_bmi_bextr_64 : GCCBuiltin<"__builtin_ia32_bextr_u64">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_bmi_bzhi_32 : GCCBuiltin<"__builtin_ia32_bzhi_si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // FS/GS Base let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_rdfsbase_32 : GCCBuiltin<"__builtin_ia32_rdfsbase32">, Intrinsic<[llvm_i32_ty], []>; def int_x86_rdgsbase_32 : GCCBuiltin<"__builtin_ia32_rdgsbase32">, Intrinsic<[llvm_i32_ty], []>; def int_x86_rdfsbase_64 : GCCBuiltin<"__builtin_ia32_rdfsbase64">, Intrinsic<[llvm_i64_ty], []>; def int_x86_rdgsbase_64 : GCCBuiltin<"__builtin_ia32_rdgsbase64">, Intrinsic<[llvm_i64_ty], []>; def int_x86_wrfsbase_32 : GCCBuiltin<"__builtin_ia32_wrfsbase32">, Intrinsic<[], [llvm_i32_ty]>; def int_x86_wrgsbase_32 : GCCBuiltin<"__builtin_ia32_wrgsbase32">, Intrinsic<[], [llvm_i32_ty]>; def int_x86_wrfsbase_64 : GCCBuiltin<"__builtin_ia32_wrfsbase64">, Intrinsic<[], [llvm_i64_ty]>; def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">, Intrinsic<[], [llvm_i64_ty]>; } //===----------------------------------------------------------------------===// // FXSR let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_fxrstor : GCCBuiltin<"__builtin_ia32_fxrstor">, Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_fxrstor64 : GCCBuiltin<"__builtin_ia32_fxrstor64">, Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_fxsave : GCCBuiltin<"__builtin_ia32_fxsave">, Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_fxsave64 : GCCBuiltin<"__builtin_ia32_fxsave64">, Intrinsic<[], [llvm_ptr_ty], []>; } //===----------------------------------------------------------------------===// // XSAVE let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_xsave : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xsave64 : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xrstor : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xrstor64 : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xsaveopt : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xsaveopt64 : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xrstors : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xrstors64 : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xsavec : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xsavec64 : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xsaves : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xsaves64 : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_x86_xgetbv : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; def int_x86_xsetbv : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; } //===----------------------------------------------------------------------===// // CLFLUSHOPT and CLWB let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_clflushopt : GCCBuiltin<"__builtin_ia32_clflushopt">, Intrinsic<[], [llvm_ptr_ty], []>; def int_x86_clwb : GCCBuiltin<"__builtin_ia32_clwb">, Intrinsic<[], [llvm_ptr_ty], []>; } //===----------------------------------------------------------------------===// // Support protection key let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">, Intrinsic<[llvm_i32_ty], [], []>; def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">, Intrinsic<[], [llvm_i32_ty], []>; } //===----------------------------------------------------------------------===// // Half float conversion let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // TBM let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // RDRAND intrinsics - Return a random value and whether it is valid. // RDSEED intrinsics - Return a NIST SP800-90B & C compliant random value and // whether it is valid. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // These are declared side-effecting so they don't get eliminated by CSE or // LICM. def int_x86_rdrand_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>; def int_x86_rdrand_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>; def int_x86_rdrand_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; def int_x86_rdseed_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>; def int_x86_rdseed_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>; def int_x86_rdseed_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; } //===----------------------------------------------------------------------===// // ADX let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_addcarryx_u32: GCCBuiltin<"__builtin_ia32_addcarryx_u32">, Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>; def int_x86_addcarryx_u64: GCCBuiltin<"__builtin_ia32_addcarryx_u64">, Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty], [IntrArgMemOnly]>; def int_x86_addcarry_u32: GCCBuiltin<"__builtin_ia32_addcarry_u32">, Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>; def int_x86_addcarry_u64: GCCBuiltin<"__builtin_ia32_addcarry_u64">, Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty], [IntrArgMemOnly]>; def int_x86_subborrow_u32: GCCBuiltin<"__builtin_ia32_subborrow_u32">, Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>; def int_x86_subborrow_u64: GCCBuiltin<"__builtin_ia32_subborrow_u64">, Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty], [IntrArgMemOnly]>; } //===----------------------------------------------------------------------===// // RTM intrinsics. Transactional Memory support. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_xbegin : GCCBuiltin<"__builtin_ia32_xbegin">, Intrinsic<[llvm_i32_ty], [], []>; def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">, Intrinsic<[], [], []>; def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">, Intrinsic<[], [llvm_i8_ty], []>; def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">, Intrinsic<[llvm_i32_ty], [], []>; } //===----------------------------------------------------------------------===// // AVX512 // Mask ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Mask instructions // 16-bit mask def int_x86_avx512_kand_w : GCCBuiltin<"__builtin_ia32_kandhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_kandn_w : GCCBuiltin<"__builtin_ia32_kandnhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_knot_w : GCCBuiltin<"__builtin_ia32_knothi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_kor_w : GCCBuiltin<"__builtin_ia32_korhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_kxor_w : GCCBuiltin<"__builtin_ia32_kxorhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_kxnor_w : GCCBuiltin<"__builtin_ia32_kxnorhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; } // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcvtss2usi32 : GCCBuiltin<"__builtin_ia32_vcvtss2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcvtss2usi64 : GCCBuiltin<"__builtin_ia32_vcvtss2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcvtss2si32 : GCCBuiltin<"__builtin_ia32_vcvtss2si32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcvtss2si64 : GCCBuiltin<"__builtin_ia32_vcvtss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcvtsd2usi32 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcvtsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcvtsd2si32 : GCCBuiltin<"__builtin_ia32_vcvtsd2si32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcvtsd2si64 : GCCBuiltin<"__builtin_ia32_vcvtsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtb2mask_128 : GCCBuiltin<"__builtin_ia32_cvtb2mask128">, Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_avx512_cvtb2mask_256 : GCCBuiltin<"__builtin_ia32_cvtb2mask256">, Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx512_cvtb2mask_512 : GCCBuiltin<"__builtin_ia32_cvtb2mask512">, Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>; def int_x86_avx512_cvtw2mask_128 : GCCBuiltin<"__builtin_ia32_cvtw2mask128">, Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_avx512_cvtw2mask_256 : GCCBuiltin<"__builtin_ia32_cvtw2mask256">, Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx512_cvtw2mask_512 : GCCBuiltin<"__builtin_ia32_cvtw2mask512">, Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>; def int_x86_avx512_cvtd2mask_128 : GCCBuiltin<"__builtin_ia32_cvtd2mask128">, Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtd2mask_256 : GCCBuiltin<"__builtin_ia32_cvtd2mask256">, Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtd2mask_512 : GCCBuiltin<"__builtin_ia32_cvtd2mask512">, Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtq2mask_128 : GCCBuiltin<"__builtin_ia32_cvtq2mask128">, Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_cvtq2mask_256 : GCCBuiltin<"__builtin_ia32_cvtq2mask256">, Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_cvtq2mask_512 : GCCBuiltin<"__builtin_ia32_cvtq2mask512">, Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>; } // Pack ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_packsswb_512 : GCCBuiltin<"__builtin_ia32_packsswb512">, Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty], [IntrNoMem]>; def int_x86_avx512_packssdw_512 : GCCBuiltin<"__builtin_ia32_packssdw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_packuswb_512 : GCCBuiltin<"__builtin_ia32_packuswb512">, Intrinsic<[llvm_v64i8_ty], [llvm_v32i16_ty,llvm_v32i16_ty], [IntrNoMem]>; def int_x86_avx512_packusdw_512 : GCCBuiltin<"__builtin_ia32_packusdw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; } // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_cvtdq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtdq2ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtdq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2dq_128 : GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2dq_512 : GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2ps_512 : GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtsd2ss_round : GCCBuiltin<"__builtin_ia32_cvtsd2ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtss2sd_round : GCCBuiltin<"__builtin_ia32_cvtss2sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2qq_128 : GCCBuiltin<"__builtin_ia32_cvtpd2qq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2qq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2qq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2qq_512 : GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2udq_128 : GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2udq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2udq256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2udq_512 : GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2uqq_128 : GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2uqq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2uqq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtpd2uqq_512 : GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2dq_128 : GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2dq_512 : GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2pd_128 : GCCBuiltin<"__builtin_ia32_cvtps2pd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2pd_512 : GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2qq_128 : GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2qq_256 : GCCBuiltin<"__builtin_ia32_cvtps2qq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2qq_512 : GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2udq_128 : GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2udq_256 : GCCBuiltin<"__builtin_ia32_cvtps2udq256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2udq_512 : GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2uqq_128 : GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2uqq_256 : GCCBuiltin<"__builtin_ia32_cvtps2uqq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtps2uqq_512 : GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtqq2pd_128 : GCCBuiltin<"__builtin_ia32_cvtqq2pd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtqq2pd_256 : GCCBuiltin<"__builtin_ia32_cvtqq2pd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtqq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtqq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtqq2ps_256 : GCCBuiltin<"__builtin_ia32_cvtqq2ps256_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtqq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtqq2ps512_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2dq_128 : GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2dq_512 : GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2qq_128 : GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2qq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2qq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2qq_512 : GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2udq_128 : GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2udq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2udq256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2udq_512 : GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2uqq_128 : GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2uqq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2uqq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttpd2uqq_512 : GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2dq_128 : GCCBuiltin<"__builtin_ia32_cvttps2dq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2dq_512 : GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2qq_128 : GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2qq_256 : GCCBuiltin<"__builtin_ia32_cvttps2qq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2qq_512 : GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2udq_128 : GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2udq_256 : GCCBuiltin<"__builtin_ia32_cvttps2udq256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2udq_512 : GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2uqq_128 : GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2uqq_256 : GCCBuiltin<"__builtin_ia32_cvttps2uqq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvttps2uqq_512 : GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtudq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtudq2ps_256 : GCCBuiltin<"__builtin_ia32_cvtudq2ps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtudq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtuqq2pd_128 : GCCBuiltin<"__builtin_ia32_cvtuqq2pd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtuqq2pd_256 : GCCBuiltin<"__builtin_ia32_cvtuqq2pd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtuqq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtuqq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtuqq2ps_256 : GCCBuiltin<"__builtin_ia32_cvtuqq2ps256_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cvtuqq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtuqq2ps512_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; } // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // TODO: Remove the broadcast intrinsics with no gcc builtin and autoupgrade def int_x86_avx512_vbroadcast_ss_512 : Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_vbroadcast_sd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_broadcastmw_512 : GCCBuiltin<"__builtin_ia32_broadcastmw512">, Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmw_256 : GCCBuiltin<"__builtin_ia32_broadcastmw256">, Intrinsic<[llvm_v8i32_ty], [llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmw_128 : GCCBuiltin<"__builtin_ia32_broadcastmw128">, Intrinsic<[llvm_v4i32_ty], [llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmb_512 : GCCBuiltin<"__builtin_ia32_broadcastmb512">, Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmb_256 : GCCBuiltin<"__builtin_ia32_broadcastmb256">, Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_broadcastmb_128 : GCCBuiltin<"__builtin_ia32_broadcastmb128">, Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>; } // Arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_pd_256 : GCCBuiltin<"__builtin_ia32_scalefpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],[IntrNoMem]>; def int_x86_avx512_mask_scalef_pd_512 : GCCBuiltin<"__builtin_ia32_scalefpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_ps_256 : GCCBuiltin<"__builtin_ia32_scalefps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_ps_128 : GCCBuiltin<"__builtin_ia32_sqrtps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_fixupimm_pd_128 : GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_fixupimm_pd_128 : GCCBuiltin<"__builtin_ia32_fixupimmpd128_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fixupimm_pd_256 : GCCBuiltin<"__builtin_ia32_fixupimmpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_fixupimm_pd_256 : GCCBuiltin<"__builtin_ia32_fixupimmpd256_maskz">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fixupimm_pd_512 : GCCBuiltin<"__builtin_ia32_fixupimmpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_fixupimm_pd_512 : GCCBuiltin<"__builtin_ia32_fixupimmpd512_maskz">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_fixupimm_ps_128 : GCCBuiltin<"__builtin_ia32_fixupimmps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_fixupimm_ps_128 : GCCBuiltin<"__builtin_ia32_fixupimmps128_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fixupimm_ps_256 : GCCBuiltin<"__builtin_ia32_fixupimmps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_fixupimm_ps_256 : GCCBuiltin<"__builtin_ia32_fixupimmps256_maskz">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_fixupimm_ps_512 : GCCBuiltin<"__builtin_ia32_fixupimmps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_fixupimm_ps_512 : GCCBuiltin<"__builtin_ia32_fixupimmps512_maskz">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_fixupimm_sd : GCCBuiltin<"__builtin_ia32_fixupimmsd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_fixupimm_sd : GCCBuiltin<"__builtin_ia32_fixupimmsd_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_fixupimm_ss : GCCBuiltin<"__builtin_ia32_fixupimmss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_fixupimm_ss : GCCBuiltin<"__builtin_ia32_fixupimmss_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_pd_256 : GCCBuiltin<"__builtin_ia32_getexppd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_ps_256 : GCCBuiltin<"__builtin_ia32_getexpps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss128_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd128_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_pd_128 : GCCBuiltin<"__builtin_ia32_getmantpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_pd_256 : GCCBuiltin<"__builtin_ia32_getmantpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_pd_512 : GCCBuiltin<"__builtin_ia32_getmantpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty,llvm_i32_ty ], [IntrNoMem]>; def int_x86_avx512_mask_getmant_ps_128 : GCCBuiltin<"__builtin_ia32_getmantps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_ps_256 : GCCBuiltin<"__builtin_ia32_getmantps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_ps_512 : GCCBuiltin<"__builtin_ia32_getmantps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_ss : GCCBuiltin<"__builtin_ia32_getmantss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_sd : GCCBuiltin<"__builtin_ia32_getmantsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_pd_128 : GCCBuiltin<"__builtin_ia32_rsqrt14pd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_pd_256 : GCCBuiltin<"__builtin_ia32_rsqrt14pd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_ps_128 : GCCBuiltin<"__builtin_ia32_rsqrt14ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrt14ps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_pd_128 : GCCBuiltin<"__builtin_ia32_rcp14pd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_pd_256 : GCCBuiltin<"__builtin_ia32_rcp14pd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_ps_128 : GCCBuiltin<"__builtin_ia32_rcp14ps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_ps_256 : GCCBuiltin<"__builtin_ia32_rcp14ps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_exp2_ps : GCCBuiltin<"__builtin_ia32_exp2ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_exp2_pd : GCCBuiltin<"__builtin_ia32_exp2pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">, Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem, Commutative]>; } // Integer arithmetic ops let TargetPrefix = "x86" in { def int_x86_avx512_mask_padds_b_128 : GCCBuiltin<"__builtin_ia32_paddsb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_padds_b_256 : GCCBuiltin<"__builtin_ia32_paddsb256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_padds_w_128 : GCCBuiltin<"__builtin_ia32_paddsw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_padds_w_256 : GCCBuiltin<"__builtin_ia32_paddsw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_paddus_b_128 : GCCBuiltin<"__builtin_ia32_paddusb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_paddus_b_256 : GCCBuiltin<"__builtin_ia32_paddusb256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_paddus_b_512 : GCCBuiltin<"__builtin_ia32_paddusb512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_paddus_w_128 : GCCBuiltin<"__builtin_ia32_paddusw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_paddus_w_256 : GCCBuiltin<"__builtin_ia32_paddusw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubs_b_128 : GCCBuiltin<"__builtin_ia32_psubsb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubs_b_256 : GCCBuiltin<"__builtin_ia32_psubsb256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubs_w_128 : GCCBuiltin<"__builtin_ia32_psubsw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubs_w_256 : GCCBuiltin<"__builtin_ia32_psubsw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubus_b_128 : GCCBuiltin<"__builtin_ia32_psubusb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubus_b_256 : GCCBuiltin<"__builtin_ia32_psubusb256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubus_b_512 : GCCBuiltin<"__builtin_ia32_psubusb512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubus_w_128 : GCCBuiltin<"__builtin_ia32_psubusw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubus_w_256 : GCCBuiltin<"__builtin_ia32_psubusw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmulh_w_512 : GCCBuiltin<"__builtin_ia32_pmulhw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmulhu_w_128 : GCCBuiltin<"__builtin_ia32_pmulhuw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmulhu_w_256 : GCCBuiltin<"__builtin_ia32_pmulhuw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmulh_w_128 : GCCBuiltin<"__builtin_ia32_pmulhw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmulh_w_256 : GCCBuiltin<"__builtin_ia32_pmulhw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmaddw_d_128 : GCCBuiltin<"__builtin_ia32_pmaddwd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmaddw_d_256 : GCCBuiltin<"__builtin_ia32_pmaddwd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmaddw_d_512 : GCCBuiltin<"__builtin_ia32_pmaddwd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmaddubs_w_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmaddubs_w_256 : GCCBuiltin<"__builtin_ia32_pmaddubsw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmaddubs_w_512 : GCCBuiltin<"__builtin_ia32_pmaddubsw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_dbpsadbw_128 : GCCBuiltin<"__builtin_ia32_dbpsadbw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_dbpsadbw_256 : GCCBuiltin<"__builtin_ia32_dbpsadbw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_dbpsadbw_512 : GCCBuiltin<"__builtin_ia32_dbpsadbw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; } // Gather and Scatter ops let TargetPrefix = "x86" in { def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gathersiv16sf">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherdiv8df">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherdiv16sf">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gathersiv8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather_dpi_512 : GCCBuiltin<"__builtin_ia32_gathersiv16si">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather_qpq_512 : GCCBuiltin<"__builtin_ia32_gatherdiv8di">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather_qpi_512 : GCCBuiltin<"__builtin_ia32_gatherdiv16si">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3div2_df : GCCBuiltin<"__builtin_ia32_gather3div2df">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3div2_di : GCCBuiltin<"__builtin_ia32_gather3div2di">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3div4_df : GCCBuiltin<"__builtin_ia32_gather3div4df">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3div4_di : GCCBuiltin<"__builtin_ia32_gather3div4di">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3div4_sf : GCCBuiltin<"__builtin_ia32_gather3div4sf">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3div4_si : GCCBuiltin<"__builtin_ia32_gather3div4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3div8_sf : GCCBuiltin<"__builtin_ia32_gather3div8sf">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3div8_si : GCCBuiltin<"__builtin_ia32_gather3div8si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3siv2_df : GCCBuiltin<"__builtin_ia32_gather3siv2df">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3siv2_di : GCCBuiltin<"__builtin_ia32_gather3siv2di">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3siv4_df : GCCBuiltin<"__builtin_ia32_gather3siv4df">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3siv4_di : GCCBuiltin<"__builtin_ia32_gather3siv4di">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3siv4_sf : GCCBuiltin<"__builtin_ia32_gather3siv4sf">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3siv4_si : GCCBuiltin<"__builtin_ia32_gather3siv4si">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3siv8_sf : GCCBuiltin<"__builtin_ia32_gather3siv8sf">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_gather3siv8_si : GCCBuiltin<"__builtin_ia32_gather3siv8si">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; // scatter def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scattersiv8df">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatter_dps_512 : GCCBuiltin<"__builtin_ia32_scattersiv16sf">, Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatter_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterdiv8df">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatter_qps_512 : GCCBuiltin<"__builtin_ia32_scatterdiv16sf">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatter_dpq_512 : GCCBuiltin<"__builtin_ia32_scattersiv8di">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatter_dpi_512 : GCCBuiltin<"__builtin_ia32_scattersiv16si">, Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatter_qpq_512 : GCCBuiltin<"__builtin_ia32_scatterdiv8di">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatter_qpi_512 : GCCBuiltin<"__builtin_ia32_scatterdiv16si">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterdiv2_df : GCCBuiltin<"__builtin_ia32_scatterdiv2df">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterdiv2_di : GCCBuiltin<"__builtin_ia32_scatterdiv2di">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterdiv4_df : GCCBuiltin<"__builtin_ia32_scatterdiv4df">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterdiv4_di : GCCBuiltin<"__builtin_ia32_scatterdiv4di">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterdiv4_sf : GCCBuiltin<"__builtin_ia32_scatterdiv4sf">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterdiv4_si : GCCBuiltin<"__builtin_ia32_scatterdiv4si">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterdiv8_sf : GCCBuiltin<"__builtin_ia32_scatterdiv8sf">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterdiv8_si : GCCBuiltin<"__builtin_ia32_scatterdiv8si">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scattersiv2_df : GCCBuiltin<"__builtin_ia32_scattersiv2df">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scattersiv2_di : GCCBuiltin<"__builtin_ia32_scattersiv2di">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scattersiv4_df : GCCBuiltin<"__builtin_ia32_scattersiv4df">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scattersiv4_di : GCCBuiltin<"__builtin_ia32_scattersiv4di">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scattersiv4_sf : GCCBuiltin<"__builtin_ia32_scattersiv4sf">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scattersiv4_si : GCCBuiltin<"__builtin_ia32_scattersiv4si">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scattersiv8_sf : GCCBuiltin<"__builtin_ia32_scattersiv8sf">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scattersiv8_si : GCCBuiltin<"__builtin_ia32_scattersiv8si">, Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; // gather prefetch def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_gatherpf_dps_512 : GCCBuiltin<"__builtin_ia32_gatherpfdps">, Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_gatherpf_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfqpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_gatherpf_qps_512 : GCCBuiltin<"__builtin_ia32_gatherpfqps">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; // scatter prefetch def int_x86_avx512_scatterpf_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfdpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterpf_dps_512 : GCCBuiltin<"__builtin_ia32_scatterpfdps">, Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterpf_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfqpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_scatterpf_qps_512 : GCCBuiltin<"__builtin_ia32_scatterpfqps">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; } // AVX-512 conflict detection instruction // Instructions that count the number of leading zero bits let TargetPrefix = "x86" in { def int_x86_avx512_mask_conflict_d_128 : GCCBuiltin<"__builtin_ia32_vpconflictsi_128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_conflict_d_256 : GCCBuiltin<"__builtin_ia32_vpconflictsi_256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_conflict_d_512 : GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_conflict_q_128 : GCCBuiltin<"__builtin_ia32_vpconflictdi_128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_conflict_q_256 : GCCBuiltin<"__builtin_ia32_vpconflictdi_256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_conflict_q_512 : GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Compares let TargetPrefix = "x86" in { // 512-bit def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } // Compress, Expand let TargetPrefix = "x86" in { def int_x86_avx512_mask_compress_ps_512 : GCCBuiltin<"__builtin_ia32_compresssf512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_pd_512 : GCCBuiltin<"__builtin_ia32_compressdf512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_ps_256 : GCCBuiltin<"__builtin_ia32_compresssf256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_pd_256 : GCCBuiltin<"__builtin_ia32_compressdf256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_ps_128 : GCCBuiltin<"__builtin_ia32_compresssf128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_pd_128 : GCCBuiltin<"__builtin_ia32_compressdf128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_store_ps_512 : GCCBuiltin<"__builtin_ia32_compressstoresf512_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_pd_512 : GCCBuiltin<"__builtin_ia32_compressstoredf512_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_ps_256 : GCCBuiltin<"__builtin_ia32_compressstoresf256_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_pd_256 : GCCBuiltin<"__builtin_ia32_compressstoredf256_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_ps_128 : GCCBuiltin<"__builtin_ia32_compressstoresf128_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_pd_128 : GCCBuiltin<"__builtin_ia32_compressstoredf128_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_d_512 : GCCBuiltin<"__builtin_ia32_compresssi512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_q_512 : GCCBuiltin<"__builtin_ia32_compressdi512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_d_256 : GCCBuiltin<"__builtin_ia32_compresssi256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_q_256 : GCCBuiltin<"__builtin_ia32_compressdi256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_d_128 : GCCBuiltin<"__builtin_ia32_compresssi128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_q_128 : GCCBuiltin<"__builtin_ia32_compressdi128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_store_d_512 : GCCBuiltin<"__builtin_ia32_compressstoresi512_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_q_512 : GCCBuiltin<"__builtin_ia32_compressstoredi512_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_d_256 : GCCBuiltin<"__builtin_ia32_compressstoresi256_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_q_256 : GCCBuiltin<"__builtin_ia32_compressstoredi256_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_d_128 : GCCBuiltin<"__builtin_ia32_compressstoresi128_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_q_128 : GCCBuiltin<"__builtin_ia32_compressstoredi128_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_b_512 : GCCBuiltin<"__builtin_ia32_compressqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_w_512 : GCCBuiltin<"__builtin_ia32_compresshi512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_b_256 : GCCBuiltin<"__builtin_ia32_compressqi256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_w_256 : GCCBuiltin<"__builtin_ia32_compresshi256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_b_128 : GCCBuiltin<"__builtin_ia32_compressqi128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_w_128 : GCCBuiltin<"__builtin_ia32_compresshi128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_compress_store_b_512 : GCCBuiltin<"__builtin_ia32_compressstoreqi512_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_w_512 : GCCBuiltin<"__builtin_ia32_compressstorehi512_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_b_256 : GCCBuiltin<"__builtin_ia32_compressstoreqi256_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_w_256 : GCCBuiltin<"__builtin_ia32_compressstorehi256_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_b_128 : GCCBuiltin<"__builtin_ia32_compressstoreqi128_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_compress_store_w_128 : GCCBuiltin<"__builtin_ia32_compressstorehi128_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrArgMemOnly]>; // expand def int_x86_avx512_mask_expand_ps_512 : GCCBuiltin<"__builtin_ia32_expandsf512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_pd_512 : GCCBuiltin<"__builtin_ia32_expanddf512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_ps_256 : GCCBuiltin<"__builtin_ia32_expandsf256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_pd_256 : GCCBuiltin<"__builtin_ia32_expanddf256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_ps_128 : GCCBuiltin<"__builtin_ia32_expandsf128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_pd_128 : GCCBuiltin<"__builtin_ia32_expanddf128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_load_ps_512 : GCCBuiltin<"__builtin_ia32_expandloadsf512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_pd_512 : GCCBuiltin<"__builtin_ia32_expandloaddf512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_ps_256 : GCCBuiltin<"__builtin_ia32_expandloadsf256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_pd_256 : GCCBuiltin<"__builtin_ia32_expandloaddf256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_ps_128 : GCCBuiltin<"__builtin_ia32_expandloadsf128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_pd_128 : GCCBuiltin<"__builtin_ia32_expandloaddf128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_d_512 : GCCBuiltin<"__builtin_ia32_expandsi512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_q_512 : GCCBuiltin<"__builtin_ia32_expanddi512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_d_256 : GCCBuiltin<"__builtin_ia32_expandsi256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_q_256 : GCCBuiltin<"__builtin_ia32_expanddi256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_d_128 : GCCBuiltin<"__builtin_ia32_expandsi128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_q_128 : GCCBuiltin<"__builtin_ia32_expanddi128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_load_d_512 : GCCBuiltin<"__builtin_ia32_expandloadsi512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_q_512 : GCCBuiltin<"__builtin_ia32_expandloaddi512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_d_256 : GCCBuiltin<"__builtin_ia32_expandloadsi256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_q_256 : GCCBuiltin<"__builtin_ia32_expandloaddi256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_d_128 : GCCBuiltin<"__builtin_ia32_expandloadsi128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_q_128 : GCCBuiltin<"__builtin_ia32_expandloaddi128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_b_512 : GCCBuiltin<"__builtin_ia32_expandqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_w_512 : GCCBuiltin<"__builtin_ia32_expandhi512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_b_256 : GCCBuiltin<"__builtin_ia32_expandqi256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_w_256 : GCCBuiltin<"__builtin_ia32_expandhi256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_b_128 : GCCBuiltin<"__builtin_ia32_expandqi128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_w_128 : GCCBuiltin<"__builtin_ia32_expandhi128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_expand_load_b_512 : GCCBuiltin<"__builtin_ia32_expandloadqi512_mask">, Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_w_512 : GCCBuiltin<"__builtin_ia32_expandloadhi512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_b_256 : GCCBuiltin<"__builtin_ia32_expandloadqi256_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_w_256 : GCCBuiltin<"__builtin_ia32_expandloadhi256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_b_128 : GCCBuiltin<"__builtin_ia32_expandloadqi128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>; def int_x86_avx512_mask_expand_load_w_128 : GCCBuiltin<"__builtin_ia32_expandloadhi128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>; } // VBMI2 Concat & Shift let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_vpshld_q_512 : GCCBuiltin<"__builtin_ia32_vpshldq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshld_q_256 : GCCBuiltin<"__builtin_ia32_vpshldq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshld_q_128 : GCCBuiltin<"__builtin_ia32_vpshldq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshld_d_512 : GCCBuiltin<"__builtin_ia32_vpshldd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshld_d_256 : GCCBuiltin<"__builtin_ia32_vpshldd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshld_d_128 : GCCBuiltin<"__builtin_ia32_vpshldd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshld_w_512 : GCCBuiltin<"__builtin_ia32_vpshldw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshld_w_256 : GCCBuiltin<"__builtin_ia32_vpshldw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshld_w_128 : GCCBuiltin<"__builtin_ia32_vpshldw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_q_512 : GCCBuiltin<"__builtin_ia32_vpshrdq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_q_256 : GCCBuiltin<"__builtin_ia32_vpshrdq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_q_128 : GCCBuiltin<"__builtin_ia32_vpshrdq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_d_512 : GCCBuiltin<"__builtin_ia32_vpshrdd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_d_256 : GCCBuiltin<"__builtin_ia32_vpshrdd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_d_128 : GCCBuiltin<"__builtin_ia32_vpshrdd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_w_512 : GCCBuiltin<"__builtin_ia32_vpshrdw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_w_256 : GCCBuiltin<"__builtin_ia32_vpshrdw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrd_w_128 : GCCBuiltin<"__builtin_ia32_vpshrdw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_w_128 : GCCBuiltin<"__builtin_ia32_vpshldvw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_w_128 : GCCBuiltin<"__builtin_ia32_vpshldvw128_maskz">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_w_256 : GCCBuiltin<"__builtin_ia32_vpshldvw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_w_256 : GCCBuiltin<"__builtin_ia32_vpshldvw256_maskz">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_w_512 : GCCBuiltin<"__builtin_ia32_vpshldvw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_w_512 : GCCBuiltin<"__builtin_ia32_vpshldvw512_maskz">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_q_128 : GCCBuiltin<"__builtin_ia32_vpshldvq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_q_128 : GCCBuiltin<"__builtin_ia32_vpshldvq128_maskz">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_q_256 : GCCBuiltin<"__builtin_ia32_vpshldvq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_q_256 : GCCBuiltin<"__builtin_ia32_vpshldvq256_maskz">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_q_512 : GCCBuiltin<"__builtin_ia32_vpshldvq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_q_512 : GCCBuiltin<"__builtin_ia32_vpshldvq512_maskz">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_d_128 : GCCBuiltin<"__builtin_ia32_vpshldvd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_d_128 : GCCBuiltin<"__builtin_ia32_vpshldvd128_maskz">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_d_256 : GCCBuiltin<"__builtin_ia32_vpshldvd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_d_256 : GCCBuiltin<"__builtin_ia32_vpshldvd256_maskz">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshldv_d_512 : GCCBuiltin<"__builtin_ia32_vpshldvd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshldv_d_512 : GCCBuiltin<"__builtin_ia32_vpshldvd512_maskz">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_w_128 : GCCBuiltin<"__builtin_ia32_vpshrdvw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_w_128 : GCCBuiltin<"__builtin_ia32_vpshrdvw128_maskz">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_w_256 : GCCBuiltin<"__builtin_ia32_vpshrdvw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_w_256 : GCCBuiltin<"__builtin_ia32_vpshrdvw256_maskz">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_w_512 : GCCBuiltin<"__builtin_ia32_vpshrdvw512_mask">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_w_512 : GCCBuiltin<"__builtin_ia32_vpshrdvw512_maskz">, Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_q_128 : GCCBuiltin<"__builtin_ia32_vpshrdvq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_q_128 : GCCBuiltin<"__builtin_ia32_vpshrdvq128_maskz">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_q_256 : GCCBuiltin<"__builtin_ia32_vpshrdvq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_q_256 : GCCBuiltin<"__builtin_ia32_vpshrdvq256_maskz">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_q_512 : GCCBuiltin<"__builtin_ia32_vpshrdvq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_q_512 : GCCBuiltin<"__builtin_ia32_vpshrdvq512_maskz">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_d_128 : GCCBuiltin<"__builtin_ia32_vpshrdvd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_d_128 : GCCBuiltin<"__builtin_ia32_vpshrdvd128_maskz">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_d_256 : GCCBuiltin<"__builtin_ia32_vpshrdvd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_d_256 : GCCBuiltin<"__builtin_ia32_vpshrdvd256_maskz">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vpshrdv_d_512 : GCCBuiltin<"__builtin_ia32_vpshrdvd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_vpshrdv_d_512 : GCCBuiltin<"__builtin_ia32_vpshrdvd512_maskz">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; } // truncate let TargetPrefix = "x86" in { def int_x86_avx512_mask_pmov_qb_128 : GCCBuiltin<"__builtin_ia32_pmovqb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_128 : GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_128 : GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_128 : GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_128 : GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_128 : GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qb_256 : GCCBuiltin<"__builtin_ia32_pmovqb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_256 : GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_256 : GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_256 : GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_256 : GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_256 : GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qb_512 : GCCBuiltin<"__builtin_ia32_pmovqb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_512 : GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_512 : GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_512 : GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_512 : GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_512 : GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_128 : GCCBuiltin<"__builtin_ia32_pmovqw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_128 : GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_128 : GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_128 : GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_128 : GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_128 : GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_256 : GCCBuiltin<"__builtin_ia32_pmovqw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_256 : GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_256 : GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_256 : GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_256 : GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_256 : GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_512 : GCCBuiltin<"__builtin_ia32_pmovqw512_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_512 : GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_512 : GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_512 : GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_512 : GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_512 : GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_128 : GCCBuiltin<"__builtin_ia32_pmovqd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qd_mem_128 : GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_128 : GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_128 : GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_128 : GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_128 : GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_256 : GCCBuiltin<"__builtin_ia32_pmovqd256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qd_mem_256 : GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_256 : GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_256 : GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_256 : GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_256 : GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_512 : GCCBuiltin<"__builtin_ia32_pmovqd512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_qd_mem_512 : GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_512 : GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_512 : GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_512 : GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_512 : GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_128 : GCCBuiltin<"__builtin_ia32_pmovdb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_128 : GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_128 : GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_128 : GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_128 : GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_128 : GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_256 : GCCBuiltin<"__builtin_ia32_pmovdb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_256 : GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_256 : GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_256 : GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_256 : GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_256 : GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_512 : GCCBuiltin<"__builtin_ia32_pmovdb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_512 : GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_512 : GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_512 : GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_512 : GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_512 : GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_128 : GCCBuiltin<"__builtin_ia32_pmovdw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_128 : GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_128 : GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_128 : GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_128 : GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_128 : GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_256 : GCCBuiltin<"__builtin_ia32_pmovdw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_256 : GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_256 : GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_256 : GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_256 : GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_256 : GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_512 : GCCBuiltin<"__builtin_ia32_pmovdw512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_512 : GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_512 : GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_512 : GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_512 : GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_512 : GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_128 : GCCBuiltin<"__builtin_ia32_pmovwb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_wb_mem_128 : GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_128 : GCCBuiltin<"__builtin_ia32_pmovswb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_128 : GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_128 : GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_128 : GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_256 : GCCBuiltin<"__builtin_ia32_pmovwb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_wb_mem_256 : GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_256 : GCCBuiltin<"__builtin_ia32_pmovswb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_256 : GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_256 : GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_256 : GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_512 : GCCBuiltin<"__builtin_ia32_pmovwb512_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmov_wb_mem_512 : GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_512 : GCCBuiltin<"__builtin_ia32_pmovswb512_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_512 : GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_512 : GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_512 : GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrArgMemOnly]>; } // Bitwise ternary logic let TargetPrefix = "x86" in { def int_x86_avx512_mask_pternlog_d_128 : GCCBuiltin<"__builtin_ia32_pternlogd128_mask">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_pternlog_d_128 : GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pternlog_d_256 : GCCBuiltin<"__builtin_ia32_pternlogd256_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_pternlog_d_256 : GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pternlog_d_512 : GCCBuiltin<"__builtin_ia32_pternlogd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_maskz_pternlog_d_512 : GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_i16_ty], [IntrNoMem]>; def int_x86_avx512_mask_pternlog_q_128 : GCCBuiltin<"__builtin_ia32_pternlogq128_mask">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_pternlog_q_128 : GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pternlog_q_256 : GCCBuiltin<"__builtin_ia32_pternlogq256_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_pternlog_q_256 : GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pternlog_q_512 : GCCBuiltin<"__builtin_ia32_pternlogq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_maskz_pternlog_q_512 : GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; } // Misc. let TargetPrefix = "x86" in { def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">, Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">, Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cmp_ps_256 : GCCBuiltin<"__builtin_ia32_cmpps256_mask">, Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cmp_pd_256 : GCCBuiltin<"__builtin_ia32_cmppd256_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cmp_ps_128 : GCCBuiltin<"__builtin_ia32_cmpps128_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cmp_pd_128 : GCCBuiltin<"__builtin_ia32_cmppd128_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // SHA intrinsics let TargetPrefix = "x86" in { def int_x86_sha1rnds4 : GCCBuiltin<"__builtin_ia32_sha1rnds4">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sha1nexte : GCCBuiltin<"__builtin_ia32_sha1nexte">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sha1msg1 : GCCBuiltin<"__builtin_ia32_sha1msg1">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sha1msg2 : GCCBuiltin<"__builtin_ia32_sha1msg2">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sha256rnds2 : GCCBuiltin<"__builtin_ia32_sha256rnds2">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sha256msg1 : GCCBuiltin<"__builtin_ia32_sha256msg1">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sha256msg2 : GCCBuiltin<"__builtin_ia32_sha256msg2">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // Thread synchronization ops with timer. let TargetPrefix = "x86" in { def int_x86_monitorx : GCCBuiltin<"__builtin_ia32_monitorx">, Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty ], []>; def int_x86_mwaitx : GCCBuiltin<"__builtin_ia32_mwaitx">, Intrinsic<[], [ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], []>; } //===----------------------------------------------------------------------===// // Cache-line zero let TargetPrefix = "x86" in { def int_x86_clzero : GCCBuiltin<"__builtin_ia32_clzero">, Intrinsic<[], [llvm_ptr_ty], []>; } diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 3ce636ffcda4..e307335f8bb9 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1,1317 +1,1320 @@ //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the operating system Host concept. // //===----------------------------------------------------------------------===// #include "llvm/Support/Host.h" #include "llvm/Support/TargetParser.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Config/config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include #include // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Host.inc" #endif #ifdef LLVM_ON_WIN32 #include "Windows/Host.inc" #endif #ifdef _MSC_VER #include #endif #if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) #include #include #include #include #endif #define DEBUG_TYPE "host-detection" //===----------------------------------------------------------------------===// // // Implementations of the CPU detection routines // //===----------------------------------------------------------------------===// using namespace llvm; static std::unique_ptr LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { llvm::ErrorOr> Text = llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); if (std::error_code EC = Text.getError()) { llvm::errs() << "Can't read " << "/proc/cpuinfo: " << EC.message() << "\n"; return nullptr; } return std::move(*Text); } StringRef sys::detail::getHostCPUNameForPowerPC( const StringRef &ProcCpuinfoContent) { // Access to the Processor Version Register (PVR) on PowerPC is privileged, // and so we must use an operating-system interface to determine the current // processor type. On Linux, this is exposed through the /proc/cpuinfo file. const char *generic = "generic"; // The cpu line is second (after the 'processor: 0' line), so if this // buffer is too small then something has changed (or is wrong). StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); StringRef::const_iterator CIP = CPUInfoStart; StringRef::const_iterator CPUStart = 0; size_t CPULen = 0; // We need to find the first line which starts with cpu, spaces, and a colon. // After the colon, there may be some additional spaces and then the cpu type. while (CIP < CPUInfoEnd && CPUStart == 0) { if (CIP < CPUInfoEnd && *CIP == '\n') ++CIP; if (CIP < CPUInfoEnd && *CIP == 'c') { ++CIP; if (CIP < CPUInfoEnd && *CIP == 'p') { ++CIP; if (CIP < CPUInfoEnd && *CIP == 'u') { ++CIP; while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) ++CIP; if (CIP < CPUInfoEnd && *CIP == ':') { ++CIP; while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) ++CIP; if (CIP < CPUInfoEnd) { CPUStart = CIP; while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && *CIP != ',' && *CIP != '\n')) ++CIP; CPULen = CIP - CPUStart; } } } } } if (CPUStart == 0) while (CIP < CPUInfoEnd && *CIP != '\n') ++CIP; } if (CPUStart == 0) return generic; return StringSwitch(StringRef(CPUStart, CPULen)) .Case("604e", "604e") .Case("604", "604") .Case("7400", "7400") .Case("7410", "7400") .Case("7447", "7400") .Case("7455", "7450") .Case("G4", "g4") .Case("POWER4", "970") .Case("PPC970FX", "970") .Case("PPC970MP", "970") .Case("G5", "g5") .Case("POWER5", "g5") .Case("A2", "a2") .Case("POWER6", "pwr6") .Case("POWER7", "pwr7") .Case("POWER8", "pwr8") .Case("POWER8E", "pwr8") .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") .Default(generic); } StringRef sys::detail::getHostCPUNameForARM( const StringRef &ProcCpuinfoContent) { // The cpuid register on arm is not accessible from user space. On Linux, // it is exposed through the /proc/cpuinfo file. // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line // in all cases. SmallVector Lines; ProcCpuinfoContent.split(Lines, "\n"); // Look for the CPU implementer line. StringRef Implementer; StringRef Hardware; for (unsigned I = 0, E = Lines.size(); I != E; ++I) { if (Lines[I].startswith("CPU implementer")) Implementer = Lines[I].substr(15).ltrim("\t :"); if (Lines[I].startswith("Hardware")) Hardware = Lines[I].substr(8).ltrim("\t :"); } if (Implementer == "0x41") { // ARM Ltd. // MSM8992/8994 may give cpu part for the core that the kernel is running on, // which is undeterministic and wrong. Always return cortex-a53 for these SoC. if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) return "cortex-a53"; // Look for the CPU part line. for (unsigned I = 0, E = Lines.size(); I != E; ++I) if (Lines[I].startswith("CPU part")) // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The // values correspond to the "Part number" in the CP15/c0 register. The // contents are specified in the various processor manuals. return StringSwitch(Lines[I].substr(8).ltrim("\t :")) .Case("0x926", "arm926ej-s") .Case("0xb02", "mpcore") .Case("0xb36", "arm1136j-s") .Case("0xb56", "arm1156t2-s") .Case("0xb76", "arm1176jz-s") .Case("0xc08", "cortex-a8") .Case("0xc09", "cortex-a9") .Case("0xc0f", "cortex-a15") .Case("0xc20", "cortex-m0") .Case("0xc23", "cortex-m3") .Case("0xc24", "cortex-m4") .Case("0xd04", "cortex-a35") .Case("0xd03", "cortex-a53") .Case("0xd07", "cortex-a57") .Case("0xd08", "cortex-a72") .Case("0xd09", "cortex-a73") .Default("generic"); } if (Implementer == "0x51") // Qualcomm Technologies, Inc. // Look for the CPU part line. for (unsigned I = 0, E = Lines.size(); I != E; ++I) if (Lines[I].startswith("CPU part")) // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The // values correspond to the "Part number" in the CP15/c0 register. The // contents are specified in the various processor manuals. return StringSwitch(Lines[I].substr(8).ltrim("\t :")) .Case("0x06f", "krait") // APQ8064 .Case("0x201", "kryo") .Case("0x205", "kryo") .Case("0x211", "kryo") .Case("0x800", "cortex-a73") .Case("0x801", "cortex-a73") .Case("0xc00", "falkor") .Case("0xc01", "saphira") .Default("generic"); return "generic"; } StringRef sys::detail::getHostCPUNameForS390x( const StringRef &ProcCpuinfoContent) { // STIDP is a privileged operation, so use /proc/cpuinfo instead. // The "processor 0:" line comes after a fair amount of other information, // including a cache breakdown, but this should be plenty. SmallVector Lines; ProcCpuinfoContent.split(Lines, "\n"); // Look for the CPU features. SmallVector CPUFeatures; for (unsigned I = 0, E = Lines.size(); I != E; ++I) if (Lines[I].startswith("features")) { size_t Pos = Lines[I].find(":"); if (Pos != StringRef::npos) { Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); break; } } // We need to check for the presence of vector support independently of // the machine type, since we may only use the vector register set when // supported by the kernel (and hypervisor). bool HaveVectorSupport = false; for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { if (CPUFeatures[I] == "vx") HaveVectorSupport = true; } // Now check the processor machine type. for (unsigned I = 0, E = Lines.size(); I != E; ++I) { if (Lines[I].startswith("processor ")) { size_t Pos = Lines[I].find("machine = "); if (Pos != StringRef::npos) { Pos += sizeof("machine = ") - 1; unsigned int Id; if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { if (Id >= 3906 && HaveVectorSupport) return "z14"; if (Id >= 2964 && HaveVectorSupport) return "z13"; if (Id >= 2827) return "zEC12"; if (Id >= 2817) return "z196"; } } break; } } return "generic"; } StringRef sys::detail::getHostCPUNameForBPF() { #if !defined(__linux__) || !defined(__x86_64__) return "generic"; #else uint8_t insns[40] __attribute__ ((aligned (8))) = /* BPF_MOV64_IMM(BPF_REG_0, 0) */ { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, /* BPF_EXIT_INSN() */ 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; struct bpf_prog_load_attr { uint32_t prog_type; uint32_t insn_cnt; uint64_t insns; uint64_t license; uint32_t log_level; uint32_t log_size; uint64_t log_buf; uint32_t kern_version; uint32_t prog_flags; } attr = {}; attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ attr.insn_cnt = 5; attr.insns = (uint64_t)insns; attr.license = (uint64_t)"DUMMY"; int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); if (fd >= 0) { close(fd); return "v2"; } return "v1"; #endif } #if defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) enum VendorSignatures { SIG_INTEL = 0x756e6547 /* Genu */, SIG_AMD = 0x68747541 /* Auth */ }; // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID // support. Consequently, for i386, the presence of CPUID is checked first // via the corresponding eflags bit. // Removal of cpuid.h header motivated by PR30384 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp // or test-suite, but are used in external projects e.g. libstdcxx static bool isCpuIdSupported() { #if defined(__GNUC__) || defined(__clang__) #if defined(__i386__) int __cpuid_supported; __asm__(" pushfl\n" " popl %%eax\n" " movl %%eax,%%ecx\n" " xorl $0x00200000,%%eax\n" " pushl %%eax\n" " popfl\n" " pushfl\n" " popl %%eax\n" " movl $0,%0\n" " cmpl %%eax,%%ecx\n" " je 1f\n" " movl $1,%0\n" "1:" : "=r"(__cpuid_supported) : : "eax", "ecx"); if (!__cpuid_supported) return false; #endif return true; #endif return true; } /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in /// the specified arguments. If we can't run cpuid on the host, return true. static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) #if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" "cpuid\n\t" "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); return false; #elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); return false; #else return true; #endif #elif defined(_MSC_VER) // The MSVC intrinsic is portable across x86 and x64. int registers[4]; __cpuid(registers, value); *rEAX = registers[0]; *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; return false; #else return true; #endif } /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return /// the 4 values in the specified arguments. If we can't run cpuid on the host, /// return true. static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) #if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" "cpuid\n\t" "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); return false; #elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); return false; #else return true; #endif #elif defined(_MSC_VER) int registers[4]; __cpuidex(registers, value, subleaf); *rEAX = registers[0]; *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; return false; #else return true; #endif } // Read control register 0 (XCR0). Used to detect features such as AVX. static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) // Check xgetbv; this uses a .byte sequence instead of the instruction // directly because older assemblers do not include support for xgetbv and // there is no easy way to conditionally compile based on the assembler used. __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); return false; #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); *rEAX = Result; *rEDX = Result >> 32; return false; #else return true; #endif } static void detectX86FamilyModel(unsigned EAX, unsigned *Family, unsigned *Model) { *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 if (*Family == 6 || *Family == 0xf) { if (*Family == 0xf) // Examine extended family ID if family ID is F. *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 // Examine extended model ID if family ID is 6 or F. *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 } } static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, unsigned Brand_id, unsigned Features, unsigned Features2, unsigned *Type, unsigned *Subtype) { if (Brand_id != 0) return; switch (Family) { case 3: *Type = X86::INTEL_i386; break; case 4: *Type = X86::INTEL_i486; break; case 5: if (Features & (1 << X86::FEATURE_MMX)) { *Type = X86::INTEL_PENTIUM_MMX; break; } *Type = X86::INTEL_PENTIUM; break; case 6: switch (Model) { case 0x01: // Pentium Pro processor *Type = X86::INTEL_PENTIUM_PRO; break; case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, // model 03 case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, // model 05, and Intel Celeron processor, model 05 case 0x06: // Celeron processor, model 06 *Type = X86::INTEL_PENTIUM_II; break; case 0x07: // Pentium III processor, model 07, and Pentium III Xeon // processor, model 07 case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, // model 08, and Celeron processor, model 08 case 0x0a: // Pentium III Xeon processor, model 0Ah case 0x0b: // Pentium III processor, model 0Bh *Type = X86::INTEL_PENTIUM_III; break; case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model // 0Dh. All processors are manufactured using the 90 nm process. case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 // Integrated Processor with Intel QuickAssist Technology *Type = X86::INTEL_PENTIUM_M; break; case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model // 0Eh. All processors are manufactured using the 65 nm process. *Type = X86::INTEL_CORE_DUO; break; // yonah case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile // processor, Intel Core 2 Quad processor, Intel Core 2 Quad // mobile processor, Intel Core 2 Extreme processor, Intel // Pentium Dual-Core processor, Intel Xeon processor, model // 0Fh. All processors are manufactured using the 65 nm process. case 0x16: // Intel Celeron processor model 16h. All processors are // manufactured using the 65 nm process *Type = X86::INTEL_CORE2; // "core2" *Subtype = X86::INTEL_CORE2_65; break; case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model // 17h. All processors are manufactured using the 45 nm process. // // 45nm: Penryn , Wolfdale, Yorkfield (XE) case 0x1d: // Intel Xeon processor MP. All processors are manufactured using // the 45 nm process. *Type = X86::INTEL_CORE2; // "penryn" *Subtype = X86::INTEL_CORE2_45; break; case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 45 nm process. case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. // As found in a Summer 2010 model iMac. case 0x1f: case 0x2e: // Nehalem EX *Type = X86::INTEL_COREI7; // "nehalem" *Subtype = X86::INTEL_COREI7_NEHALEM; break; case 0x25: // Intel Core i7, laptop version. case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 32 nm process. case 0x2f: // Westmere EX *Type = X86::INTEL_COREI7; // "westmere" *Subtype = X86::INTEL_COREI7_WESTMERE; break; case 0x2a: // Intel Core i7 processor. All processors are manufactured // using the 32 nm process. case 0x2d: *Type = X86::INTEL_COREI7; //"sandybridge" *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; break; case 0x3a: case 0x3e: // Ivy Bridge EP *Type = X86::INTEL_COREI7; // "ivybridge" *Subtype = X86::INTEL_COREI7_IVYBRIDGE; break; // Haswell: case 0x3c: case 0x3f: case 0x45: case 0x46: *Type = X86::INTEL_COREI7; // "haswell" *Subtype = X86::INTEL_COREI7_HASWELL; break; // Broadwell: case 0x3d: case 0x47: case 0x4f: case 0x56: *Type = X86::INTEL_COREI7; // "broadwell" *Subtype = X86::INTEL_COREI7_BROADWELL; break; // Skylake: case 0x4e: // Skylake mobile case 0x5e: // Skylake desktop case 0x8e: // Kaby Lake mobile case 0x9e: // Kaby Lake desktop *Type = X86::INTEL_COREI7; // "skylake" *Subtype = X86::INTEL_COREI7_SKYLAKE; break; // Skylake Xeon: case 0x55: *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" break; // Cannonlake: case 0x66: *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake" break; case 0x1c: // Most 45 nm Intel Atom processors case 0x26: // 45 nm Atom Lincroft case 0x27: // 32 nm Atom Medfield case 0x35: // 32 nm Atom Midview case 0x36: // 32 nm Atom Midview *Type = X86::INTEL_BONNELL; break; // "bonnell" // Atom Silvermont codes from the Intel software optimization guide. case 0x37: case 0x4a: case 0x4d: case 0x5a: case 0x5d: case 0x4c: // really airmont *Type = X86::INTEL_SILVERMONT; break; // "silvermont" // Goldmont: case 0x5c: // Apollo Lake case 0x5f: // Denverton case 0x7a: // Gemini Lake *Type = X86::INTEL_GOLDMONT; break; // "goldmont" case 0x57: *Type = X86::INTEL_KNL; // knl break; case 0x85: *Type = X86::INTEL_KNM; // knm break; default: // Unknown family 6 CPU, try to guess. if (Features & (1 << X86::FEATURE_AVX512VBMI)) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_CANNONLAKE; break; } if (Features & (1 << X86::FEATURE_AVX512VL)) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; break; } if (Features & (1 << X86::FEATURE_AVX512ER)) { *Type = X86::INTEL_KNL; // knl break; } if (Features2 & (1 << (X86::FEATURE_CLFLUSHOPT - 32))) { if (Features2 & (1 << (X86::FEATURE_SHA - 32))) { *Type = X86::INTEL_GOLDMONT; } else { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SKYLAKE; } break; } if (Features2 & (1 << (X86::FEATURE_ADX - 32))) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_BROADWELL; break; } if (Features & (1 << X86::FEATURE_AVX2)) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_HASWELL; break; } if (Features & (1 << X86::FEATURE_AVX)) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; break; } if (Features & (1 << X86::FEATURE_SSE4_2)) { if (Features2 & (1 << (X86::FEATURE_MOVBE - 32))) { *Type = X86::INTEL_SILVERMONT; } else { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_NEHALEM; } break; } if (Features & (1 << X86::FEATURE_SSE4_1)) { *Type = X86::INTEL_CORE2; // "penryn" *Subtype = X86::INTEL_CORE2_45; break; } if (Features & (1 << X86::FEATURE_SSSE3)) { if (Features2 & (1 << (X86::FEATURE_MOVBE - 32))) { *Type = X86::INTEL_BONNELL; // "bonnell" } else { *Type = X86::INTEL_CORE2; // "core2" *Subtype = X86::INTEL_CORE2_65; } break; } if (Features2 & (1 << (X86::FEATURE_EM64T - 32))) { *Type = X86::INTEL_CORE2; // "core2" *Subtype = X86::INTEL_CORE2_65; break; } if (Features & (1 << X86::FEATURE_SSE3)) { *Type = X86::INTEL_CORE_DUO; break; } if (Features & (1 << X86::FEATURE_SSE2)) { *Type = X86::INTEL_PENTIUM_M; break; } if (Features & (1 << X86::FEATURE_SSE)) { *Type = X86::INTEL_PENTIUM_III; break; } if (Features & (1 << X86::FEATURE_MMX)) { *Type = X86::INTEL_PENTIUM_II; break; } *Type = X86::INTEL_PENTIUM_PRO; break; } break; case 15: { if (Features2 & (1 << (X86::FEATURE_EM64T - 32))) { *Type = X86::INTEL_NOCONA; break; } if (Features & (1 << X86::FEATURE_SSE3)) { *Type = X86::INTEL_PRESCOTT; break; } *Type = X86::INTEL_PENTIUM_IV; break; } default: break; /*"generic"*/ } } static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, unsigned Features, unsigned *Type, unsigned *Subtype) { // FIXME: this poorly matches the generated SubtargetFeatureKV table. There // appears to be no way to generate the wide variety of AMD-specific targets // from the information returned from CPUID. switch (Family) { case 4: *Type = X86::AMD_i486; break; case 5: *Type = X86::AMDPENTIUM; switch (Model) { case 6: case 7: *Subtype = X86::AMDPENTIUM_K6; break; // "k6" case 8: *Subtype = X86::AMDPENTIUM_K62; break; // "k6-2" case 9: case 13: *Subtype = X86::AMDPENTIUM_K63; break; // "k6-3" case 10: *Subtype = X86::AMDPENTIUM_GEODE; break; // "geode" } break; case 6: if (Features & (1 << X86::FEATURE_SSE)) { *Type = X86::AMD_ATHLON_XP; break; // "athlon-xp" } *Type = X86::AMD_ATHLON; break; // "athlon" case 15: if (Features & (1 << X86::FEATURE_SSE3)) { *Type = X86::AMD_K8SSE3; break; // "k8-sse3" } *Type = X86::AMD_K8; break; // "k8" case 16: *Type = X86::AMDFAM10H; // "amdfam10" switch (Model) { case 2: *Subtype = X86::AMDFAM10H_BARCELONA; break; case 4: *Subtype = X86::AMDFAM10H_SHANGHAI; break; case 8: *Subtype = X86::AMDFAM10H_ISTANBUL; break; } break; case 20: *Type = X86::AMD_BTVER1; break; // "btver1"; case 21: *Type = X86::AMDFAM15H; if (Model >= 0x60 && Model <= 0x7f) { *Subtype = X86::AMDFAM15H_BDVER4; break; // "bdver4"; 60h-7Fh: Excavator } if (Model >= 0x30 && Model <= 0x3f) { *Subtype = X86::AMDFAM15H_BDVER3; break; // "bdver3"; 30h-3Fh: Steamroller } if (Model >= 0x10 && Model <= 0x1f) { *Subtype = X86::AMDFAM15H_BDVER2; break; // "bdver2"; 10h-1Fh: Piledriver } if (Model <= 0x0f) { *Subtype = X86::AMDFAM15H_BDVER1; break; // "bdver1"; 00h-0Fh: Bulldozer } break; case 22: *Type = X86::AMD_BTVER2; break; // "btver2" case 23: *Type = X86::AMDFAM17H; *Subtype = X86::AMDFAM17H_ZNVER1; break; default: break; // "generic" } } static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, unsigned *FeaturesOut, unsigned *Features2Out) { unsigned Features = 0; unsigned Features2 = 0; unsigned EAX, EBX; if ((EDX >> 15) & 1) Features |= 1 << X86::FEATURE_CMOV; if ((EDX >> 23) & 1) Features |= 1 << X86::FEATURE_MMX; if ((EDX >> 25) & 1) Features |= 1 << X86::FEATURE_SSE; if ((EDX >> 26) & 1) Features |= 1 << X86::FEATURE_SSE2; if ((ECX >> 0) & 1) Features |= 1 << X86::FEATURE_SSE3; if ((ECX >> 1) & 1) Features |= 1 << X86::FEATURE_PCLMUL; if ((ECX >> 9) & 1) Features |= 1 << X86::FEATURE_SSSE3; if ((ECX >> 12) & 1) Features |= 1 << X86::FEATURE_FMA; if ((ECX >> 19) & 1) Features |= 1 << X86::FEATURE_SSE4_1; if ((ECX >> 20) & 1) Features |= 1 << X86::FEATURE_SSE4_2; if ((ECX >> 23) & 1) Features |= 1 << X86::FEATURE_POPCNT; if ((ECX >> 25) & 1) Features |= 1 << X86::FEATURE_AES; if ((ECX >> 22) & 1) Features2 |= 1 << (X86::FEATURE_MOVBE - 32); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. const unsigned AVXBits = (1 << 27) | (1 << 28); bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); if (HasAVX) Features |= 1 << X86::FEATURE_AVX; bool HasLeaf7 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); if (HasLeaf7 && ((EBX >> 3) & 1)) Features |= 1 << X86::FEATURE_BMI; if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) Features |= 1 << X86::FEATURE_AVX2; if (HasLeaf7 && ((EBX >> 9) & 1)) Features |= 1 << X86::FEATURE_BMI2; if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512F; if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512DQ; if (HasLeaf7 && ((EBX >> 19) & 1)) Features2 |= 1 << (X86::FEATURE_ADX - 32); if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512IFMA; if (HasLeaf7 && ((EBX >> 23) & 1)) Features2 |= 1 << (X86::FEATURE_CLFLUSHOPT - 32); if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512PF; if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512ER; if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512CD; if (HasLeaf7 && ((EBX >> 29) & 1)) Features2 |= 1 << (X86::FEATURE_SHA - 32); if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512BW; if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512VL; if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512VBMI; if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX512VPOPCNTDQ; if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX5124VNNIW; if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) Features |= 1 << X86::FEATURE_AVX5124FMAPS; unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); if (HasExtLeaf1 && ((ECX >> 6) & 1)) Features |= 1 << X86::FEATURE_SSE4_A; if (HasExtLeaf1 && ((ECX >> 11) & 1)) Features |= 1 << X86::FEATURE_XOP; if (HasExtLeaf1 && ((ECX >> 16) & 1)) Features |= 1 << X86::FEATURE_FMA4; if (HasExtLeaf1 && ((EDX >> 29) & 1)) Features2 |= 1 << (X86::FEATURE_EM64T - 32); *FeaturesOut = Features; *Features2Out = Features2; } StringRef sys::getHostCPUName() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; unsigned MaxLeaf, Vendor; #if defined(__GNUC__) || defined(__clang__) //FIXME: include cpuid.h from clang or copy __get_cpuid_max here // and simplify it to not invoke __cpuid (like cpu_model.c in // compiler-rt/lib/builtins/cpu_model.c? // Opting for the second option. if(!isCpuIdSupported()) return "generic"; #endif if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) return "generic"; getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); unsigned Brand_id = EBX & 0xff; unsigned Family = 0, Model = 0; unsigned Features = 0, Features2 = 0; detectX86FamilyModel(EAX, &Family, &Model); getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); unsigned Type = 0; unsigned Subtype = 0; if (Vendor == SIG_INTEL) { getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, Features2, &Type, &Subtype); } else if (Vendor == SIG_AMD) { getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); } // Check subtypes first since those are more specific. #define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ if (Subtype == X86::ENUM) \ return ARCHNAME; #include "llvm/Support/X86TargetParser.def" // Now check types. #define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ if (Type == X86::ENUM) \ return ARCHNAME; #include "llvm/Support/X86TargetParser.def" return "generic"; } #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) StringRef sys::getHostCPUName() { host_basic_info_data_t hostInfo; mach_msg_type_number_t infoCount; infoCount = HOST_BASIC_INFO_COUNT; host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount); if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic"; switch (hostInfo.cpu_subtype) { case CPU_SUBTYPE_POWERPC_601: return "601"; case CPU_SUBTYPE_POWERPC_602: return "602"; case CPU_SUBTYPE_POWERPC_603: return "603"; case CPU_SUBTYPE_POWERPC_603e: return "603e"; case CPU_SUBTYPE_POWERPC_603ev: return "603ev"; case CPU_SUBTYPE_POWERPC_604: return "604"; case CPU_SUBTYPE_POWERPC_604e: return "604e"; case CPU_SUBTYPE_POWERPC_620: return "620"; case CPU_SUBTYPE_POWERPC_750: return "750"; case CPU_SUBTYPE_POWERPC_7400: return "7400"; case CPU_SUBTYPE_POWERPC_7450: return "7450"; case CPU_SUBTYPE_POWERPC_970: return "970"; default:; } return "generic"; } #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) StringRef sys::getHostCPUName() { std::unique_ptr P = getProcCpuinfoContent(); const StringRef& Content = P ? P->getBuffer() : ""; return detail::getHostCPUNameForPowerPC(Content); } #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) StringRef sys::getHostCPUName() { std::unique_ptr P = getProcCpuinfoContent(); const StringRef& Content = P ? P->getBuffer() : ""; return detail::getHostCPUNameForARM(Content); } #elif defined(__linux__) && defined(__s390x__) StringRef sys::getHostCPUName() { std::unique_ptr P = getProcCpuinfoContent(); const StringRef& Content = P ? P->getBuffer() : ""; return detail::getHostCPUNameForS390x(Content); } #else StringRef sys::getHostCPUName() { return "generic"; } #endif #if defined(__linux__) && defined(__x86_64__) // On Linux, the number of physical cores can be computed from /proc/cpuinfo, // using the number of unique physical/core id pairs. The following // implementation reads the /proc/cpuinfo format on an x86_64 system. static int computeHostNumPhysicalCores() { // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be // mmapped because it appears to have 0 size. llvm::ErrorOr> Text = llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); if (std::error_code EC = Text.getError()) { llvm::errs() << "Can't read " << "/proc/cpuinfo: " << EC.message() << "\n"; return -1; } SmallVector strs; (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, /*KeepEmpty=*/false); int CurPhysicalId = -1; int CurCoreId = -1; SmallSet, 32> UniqueItems; for (auto &Line : strs) { Line = Line.trim(); if (!Line.startswith("physical id") && !Line.startswith("core id")) continue; std::pair Data = Line.split(':'); auto Name = Data.first.trim(); auto Val = Data.second.trim(); if (Name == "physical id") { assert(CurPhysicalId == -1 && "Expected a core id before seeing another physical id"); Val.getAsInteger(10, CurPhysicalId); } if (Name == "core id") { assert(CurCoreId == -1 && "Expected a physical id before seeing another core id"); Val.getAsInteger(10, CurCoreId); } if (CurPhysicalId != -1 && CurCoreId != -1) { UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId)); CurPhysicalId = -1; CurCoreId = -1; } } return UniqueItems.size(); } #elif defined(__APPLE__) && defined(__x86_64__) #include #include // Gets the number of *physical cores* on the machine. static int computeHostNumPhysicalCores() { uint32_t count; size_t len = sizeof(count); sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); if (count < 1) { int nm[2]; nm[0] = CTL_HW; nm[1] = HW_AVAILCPU; sysctl(nm, 2, &count, &len, NULL, 0); if (count < 1) return -1; } return count; } #else // On other systems, return -1 to indicate unknown. static int computeHostNumPhysicalCores() { return -1; } #endif int sys::getHostNumPhysicalCores() { static int NumCores = computeHostNumPhysicalCores(); return NumCores; } #if defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) bool sys::getHostCPUFeatures(StringMap &Features) { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; unsigned MaxLevel; union { unsigned u[3]; char c[12]; } text; if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) || MaxLevel < 1) return false; getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); Features["cmov"] = (EDX >> 15) & 1; Features["mmx"] = (EDX >> 23) & 1; Features["sse"] = (EDX >> 25) & 1; Features["sse2"] = (EDX >> 26) & 1; Features["sse3"] = (ECX >> 0) & 1; Features["pclmul"] = (ECX >> 1) & 1; Features["ssse3"] = (ECX >> 9) & 1; Features["cx16"] = (ECX >> 13) & 1; Features["sse4.1"] = (ECX >> 19) & 1; Features["sse4.2"] = (ECX >> 20) & 1; Features["movbe"] = (ECX >> 22) & 1; Features["popcnt"] = (ECX >> 23) & 1; Features["aes"] = (ECX >> 25) & 1; Features["rdrnd"] = (ECX >> 30) & 1; // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); // AVX512 requires additional context to be saved by the OS. bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); Features["avx"] = HasAVXSave; Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; // Only enable XSAVE if OS has enabled support for saving YMM state. Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); bool HasLeaf7 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); // AVX2 is only supported if we have the OS save support from AVX. Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); // AVX512 is only supported if the OS supports the context save for it. Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; + Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; + Features["ibt"] = HasLeaf7 && ((EDX >> 20) & 1); + bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); // Only enable XSAVE if OS has enabled support for saving YMM state. Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; return true; } #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) bool sys::getHostCPUFeatures(StringMap &Features) { std::unique_ptr P = getProcCpuinfoContent(); if (!P) return false; SmallVector Lines; P->getBuffer().split(Lines, "\n"); SmallVector CPUFeatures; // Look for the CPU features. for (unsigned I = 0, E = Lines.size(); I != E; ++I) if (Lines[I].startswith("Features")) { Lines[I].split(CPUFeatures, ' '); break; } #if defined(__aarch64__) // Keep track of which crypto features we have seen enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; uint32_t crypto = 0; #endif for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { StringRef LLVMFeatureStr = StringSwitch(CPUFeatures[I]) #if defined(__aarch64__) .Case("asimd", "neon") .Case("fp", "fp-armv8") .Case("crc32", "crc") #else .Case("half", "fp16") .Case("neon", "neon") .Case("vfpv3", "vfp3") .Case("vfpv3d16", "d16") .Case("vfpv4", "vfp4") .Case("idiva", "hwdiv-arm") .Case("idivt", "hwdiv") #endif .Default(""); #if defined(__aarch64__) // We need to check crypto separately since we need all of the crypto // extensions to enable the subtarget feature if (CPUFeatures[I] == "aes") crypto |= CAP_AES; else if (CPUFeatures[I] == "pmull") crypto |= CAP_PMULL; else if (CPUFeatures[I] == "sha1") crypto |= CAP_SHA1; else if (CPUFeatures[I] == "sha2") crypto |= CAP_SHA2; #endif if (LLVMFeatureStr != "") Features[LLVMFeatureStr] = true; } #if defined(__aarch64__) // If we have all crypto bits we can add the feature if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) Features["crypto"] = true; #endif return true; } #else bool sys::getHostCPUFeatures(StringMap &Features) { return false; } #endif std::string sys::getProcessTriple() { std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); Triple PT(Triple::normalize(TargetTripleString)); if (sizeof(void *) == 8 && PT.isArch32Bit()) PT = PT.get64BitArchVariant(); if (sizeof(void *) == 4 && PT.isArch64Bit()) PT = PT.get32BitArchVariant(); return PT.str(); } diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 8c1136341dec..f1e57091b0df 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1,1025 +1,1029 @@ //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This is a target description file for the Intel i386 architecture, referred // to here as the "X86" architecture. // //===----------------------------------------------------------------------===// // Get the target-independent interfaces which we are implementing... // include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // X86 Subtarget state // def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", "64-bit mode (x86_64)">; def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true", "32-bit mode (80386)">; def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", "16-bit mode (i8086)">; //===----------------------------------------------------------------------===// // X86 Subtarget features //===----------------------------------------------------------------------===// def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", "Enable X87 float instructions">; def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", "Support fxsave/fxrestore instructions">; def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", "Support xsave instructions">; def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", "Support xsaveopt instructions">; def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", "Support xsavec instructions">; def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", "Support xsaves instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", "Enable SSE instructions", // SSE codegen depends on cmovs, and all // SSE1+ processors support them. [FeatureCMOV]>; def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions", [FeatureSSE1]>; def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", "Enable SSE3 instructions", [FeatureSSE2]>; def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", "Enable SSSE3 instructions", [FeatureSSE3]>; def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", "Enable SSE 4.1 instructions", [FeatureSSSE3]>; def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", [FeatureSSE41]>; // The MMX subtarget feature is separate from the rest of the SSE features // because it's important (for odd compatibility reasons) to be able to // turn it off explicitly while allowing SSE+ to be on. def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", "Enable MMX instructions">; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions", [FeatureMMX]>; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", "Enable 3DNow! Athlon instructions", [Feature3DNow]>; // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied // feature, because SSE2 can be disabled (e.g. for compiling OS kernels) // without disabling 64-bit mode. def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions", [FeatureCMOV]>; def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", "64-bit with cmpxchg16b", [Feature64Bit]>; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", "PMULLD instruction is slow">; // FIXME: This should not apply to CPUs that do not have SSE. def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", "IsUAMem16Slow", "true", "Slow unaligned 16-byte memory access">; def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", "IsUAMem32Slow", "true", "Slow unaligned 32-byte memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", [FeatureSSE3]>; def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", "Enable AVX instructions", [FeatureSSE42]>; def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", "Enable AVX2 instructions", [FeatureAVX]>; def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", "Enable three-operand fused multiple-add", [FeatureAVX]>; def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", "Support 16-bit floating point conversion instructions", [FeatureAVX]>; def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F", "Enable AVX-512 instructions", [FeatureAVX2, FeatureFMA, FeatureF16C]>; def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", "Enable AVX-512 Exponential and Reciprocal Instructions", [FeatureAVX512]>; def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", "Enable AVX-512 Conflict Detection Instructions", [FeatureAVX512]>; def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", "true", "Enable AVX-512 Population Count Instructions", [FeatureAVX512]>; def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1", "true", "Prefetch with Intent to Write and T1 Hint">; def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", "Enable AVX-512 Doubleword and Quadword Instructions", [FeatureAVX512]>; def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", "Enable AVX-512 Byte and Word Instructions", [FeatureAVX512]>; def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", "Enable AVX-512 Vector Length eXtensions", [FeatureAVX512]>; def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", "Enable AVX-512 Vector Byte Manipulation Instructions", [FeatureBWI]>; def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true", "Enable AVX-512 further Vector Byte Manipulation Instructions", [FeatureBWI]>; def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", "Enable AVX-512 Integer Fused Multiple-Add", [FeatureAVX512]>; def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", "Enable protection keys">; def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", "Enable AVX-512 Vector Neural Network Instructions", [FeatureAVX512]>; def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", "Enable AVX-512 Bit Algorithms", [FeatureBWI]>; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", "Enable Galois Field Arithmetic Instructions", [FeatureSSE2]>; def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", "Enable vpclmulqdq instructions", [FeatureAVX, FeaturePCLMUL]>; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add", [FeatureAVX, FeatureSSE4A]>; def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", [FeatureFMA4]>; def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", "HasSSEUnalignedMem", "true", "Allow unaligned memory operands with SSE instructions">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions", [FeatureSSE2]>; def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true", "Promote selected AES instructions to AVX512/AVX registers", [FeatureAVX, FeatureAES]>; def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", "Enable TBM instructions">; def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", "Enable LWP instructions">; def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", "Support RDRAND instruction">; def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", "Support FS/GS Base instructions">; def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", "Support LZCNT instruction">; def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", "Support BMI instructions">; def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", "Support RTM instructions">; def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", "Support ADX instructions">; def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true", "Enable SHA instructions", [FeatureSSE2]>; +def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true", + "Support CET Shadow-Stack instructions">; +def FeatureIBT : SubtargetFeature<"ibt", "HasIBT", "true", + "Support CET Indirect-Branch-Tracking instructions">; def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", "Support LAHF and SAHF instructions">; def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", "Enable MONITORX/MWAITX timer functionality">; def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", "Enable Cache Line Zero">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", "HasSlowDivide32", "true", "Use 8-bit divide for positive values less than 256">; def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl", "HasSlowDivide64", "true", "Use 32-bit divide for positive values less than 2^32">; def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", "Enable Software Guard Extensions">; def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", "Flush A Cache Line Optimized">; def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", "Cache Line Write Back">; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", "SlowTwoMemOps", "true", "Two memory operand instructions are slow">; def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", "LEA instruction with 3 ops or certain registers is slow">; def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software floating point features.">; // On some X86 processors, there is no performance hazard to writing only the // lower parts of a YMM or ZMM register without clearing the upper part. def FeatureFastPartialYMMorZMMWrite : SubtargetFeature<"fast-partial-ymm-or-zmm-write", "HasFastPartialYMMorZMMWrite", "true", "Partial writes to YMM/ZMM registers are fast">; // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if // vector FSQRT has higher throughput than the corresponding NR code. // The idea is that throughput bound code is likely to be vectorized, so for // vectorized code we should care about the throughput of SQRT operations. // But if the code is scalar that probably means that the code has some kind of // dependency and we should care more about reducing the latency. def FeatureFastScalarFSQRT : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", "true", "Scalar SQRT is fast (disable Newton-Raphson)">; def FeatureFastVectorFSQRT : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", "true", "Vector SQRT is fast (disable Newton-Raphson)">; // If lzcnt has equivalent latency/throughput to most simple integer ops, it can // be used to replace test/set sequences. def FeatureFastLZCNT : SubtargetFeature< "fast-lzcnt", "HasFastLZCNT", "true", "LZCNT instructions are as fast as most simple integer ops">; // Sandy Bridge and newer processors can use SHLD with the same source on both // inputs to implement rotate to avoid the partial flag update of the normal // rotate instructions. def FeatureFastSHLDRotate : SubtargetFeature< "fast-shld-rotate", "HasFastSHLDRotate", "true", "SHLD can be used as a faster rotate">; // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka // "string operations"). See "REP String Enhancement" in the Intel Software // Development Manual. This feature essentially means that REP MOVSB will copy // using the largest available size instead of copying bytes one by one, making // it at least as fast as REPMOVS{W,D,Q}. def FeatureERMSB : SubtargetFeature< "ermsb", "HasERMSB", "true", "REP MOVS/STOS are fast">; // Sandy Bridge and newer processors have many instructions that can be // fused with conditional branches and pass through the CPU as a single // operation. def FeatureMacroFusion : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", "Various instructions can be fused with conditional branches">; // Gather is available since Haswell (AVX2 set). So technically, we can // generate Gathers on all AVX2 processors. But the overhead on HSW is high. // Skylake Client processor has faster Gathers than HSW and performance is // similar to Skylake Server (AVX-512). def FeatureHasFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", "Indicates if gather is reasonably fast.">; //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// include "X86Schedule.td" def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", "Intel Silvermont processors">; def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM", "Intel Goldmont processors">; def ProcIntelHSW : SubtargetFeature<"haswell", "X86ProcFamily", "IntelHaswell", "Intel Haswell processors">; def ProcIntelBDW : SubtargetFeature<"broadwell", "X86ProcFamily", "IntelBroadwell", "Intel Broadwell processors">; def ProcIntelSKL : SubtargetFeature<"skylake", "X86ProcFamily", "IntelSkylake", "Intel Skylake processors">; def ProcIntelKNL : SubtargetFeature<"knl", "X86ProcFamily", "IntelKNL", "Intel Knights Landing processors">; def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily", "IntelSKX", "Intel Skylake Server processors">; def ProcIntelCNL : SubtargetFeature<"cannonlake", "X86ProcFamily", "IntelCannonlake", "Intel Cannonlake processors">; def ProcIntelICL : SubtargetFeature<"icelake", "X86ProcFamily", "IntelIcelake", "Intel Icelake processors">; class Proc Features> : ProcessorModel; def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; foreach P = ["i686", "pentiumpro"] in { def : Proc; } def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureCMOV, FeatureFXSR]>; foreach P = ["pentium3", "pentium3m"] in { def : Proc; } // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. // The intent is to enable it for pentium4 which is the current default // processor in a vanilla 32-bit clang compilation when no specific // architecture is specified. This generally gives a nice performance // increase on silvermont, with largely neutral behavior on other // contemporary large core processors. // pentium-m, pentium4m, prescott and nocona are included as a preventative // measure to avoid performance surprises, in case clang's default cpu // changes slightly. def : ProcessorModel<"pentium-m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, FeatureFXSR]>; foreach P = ["pentium4", "pentium4m"] in { def : ProcessorModel; } // Intel Quark. def : Proc<"lakemont", []>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureCMPXCHG16B ]>; // Intel Core 2 Solo/Duo. def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSSE3, FeatureFXSR, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion ]>; def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE41, FeatureFXSR, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion ]>; // Atom CPUs. class BonnellProc : ProcessorModel; def : BonnellProc<"bonnell">; def : BonnellProc<"atom">; // Pin the generic name to the baseline. class SilvermontProc : ProcessorModel; def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. class GoldmontProc : ProcessorModel; def : GoldmontProc<"goldmont">; // "Arrandale" along with corei3 and corei5 class NehalemProc : ProcessorModel; def : NehalemProc<"nehalem">; def : NehalemProc<"corei7">; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge class WestmereProc : ProcessorModel; def : WestmereProc<"westmere">; class ProcessorFeatures Inherited, list NewFeatures> { list Value = !listconcat(Inherited, NewFeatures); } class ProcModel ProcFeatures, list OtherFeatures> : ProcessorModel; // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. def SNBFeatures : ProcessorFeatures<[], [ FeatureX87, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, FeatureSlowDivide64, FeaturePCLMUL, FeatureXSAVE, FeatureXSAVEOPT, FeatureLAHFSAHF, FeatureSlow3OpsLEA, FeatureFastScalarFSQRT, FeatureFastSHLDRotate, FeatureSlowIncDec, FeatureMacroFusion ]>; class SandyBridgeProc : ProcModel; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. def IVBFeatures : ProcessorFeatures; class IvyBridgeProc : ProcModel; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. def HSWFeatures : ProcessorFeatures; class HaswellProc : ProcModel; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. def BDWFeatures : ProcessorFeatures; class BroadwellProc : ProcModel; def : BroadwellProc<"broadwell">; def SKLFeatures : ProcessorFeatures; class SkylakeClientProc : ProcModel; def : SkylakeClientProc<"skylake">; def KNLFeatures : ProcessorFeatures; // FIXME: define KNL model class KnightsLandingProc : ProcModel; def : KnightsLandingProc<"knl">; class KnightsMillProc : ProcModel; def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features def SKXFeatures : ProcessorFeatures; class SkylakeServerProc : ProcModel; def : SkylakeServerProc<"skylake-avx512">; def : SkylakeServerProc<"skx">; // Legacy alias. def CNLFeatures : ProcessorFeatures; class CannonlakeProc : ProcModel; def : CannonlakeProc<"cannonlake">; def ICLFeatures : ProcessorFeatures; class IcelakeProc : ProcModel; def : IcelakeProc<"icelake">; // AMD CPUs. def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; foreach P = ["athlon", "athlon-tbird"] in { def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { def : Proc; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { def : Proc; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { def : Proc; } foreach P = ["amdfam10", "barcelona"] in { def : Proc; } // Bobcat def : Proc<"btver1", [ FeatureX87, FeatureMMX, FeatureSSSE3, FeatureSSE4A, FeatureFXSR, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF ]>; // Jaguar def : ProcessorModel<"btver2", BtVer2Model, [ FeatureX87, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureAES, FeaturePCLMUL, FeatureBMI, FeatureF16C, FeatureMOVBE, FeatureLZCNT, FeatureFastLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureXSAVEOPT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFastPartialYMMorZMMWrite ]>; // Bulldozer def : Proc<"bdver1", [ FeatureX87, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureSSE4A, FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureLWP, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureMacroFusion ]>; // Piledriver def : Proc<"bdver2", [ FeatureX87, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureSSE4A, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureBMI, FeatureTBM, FeatureLWP, FeatureFMA, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureMacroFusion ]>; // Steamroller def : Proc<"bdver3", [ FeatureX87, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureMMX, FeatureAVX, FeatureFXSR, FeatureSSE4A, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureBMI, FeatureTBM, FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, FeatureFSGSBase, FeatureLAHFSAHF, FeatureMacroFusion ]>; // Excavator def : Proc<"bdver4", [ FeatureX87, FeatureMMX, FeatureAVX2, FeatureFXSR, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, FeatureBMI, FeatureBMI2, FeatureTBM, FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, FeatureFSGSBase, FeatureLAHFSAHF, FeatureMWAITX, FeatureMacroFusion ]>; // Znver1 def: ProcessorModel<"znver1", Znver1Model, [ FeatureADX, FeatureAES, FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureCLFLUSHOPT, FeatureCLZERO, FeatureCMPXCHG16B, FeatureF16C, FeatureFMA, FeatureFSGSBase, FeatureFXSR, FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, FeatureMacroFusion, FeatureMMX, FeatureMOVBE, FeatureMWAITX, FeaturePCLMUL, FeaturePOPCNT, FeaturePRFCHW, FeatureRDRAND, FeatureRDSEED, FeatureSHA, FeatureSSE4A, FeatureSlowSHLD, FeatureX87, FeatureXSAVE, FeatureXSAVEC, FeatureXSAVEOPT, FeatureXSAVES]>; def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>; // We also provide a generic 64-bit specific x86 processor model which tries to // be good for modern chips without enabling instruction set encodings past the // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and // modern 64-bit x86 chip, and enables features that are generally beneficial. // // We currently use the Sandy Bridge model as the default scheduling model as // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which // covers a huge swath of x86 processors. If there are specific scheduling // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. def : ProcessorModel<"x86-64", SandyBridgeModel, [ FeatureX87, FeatureMMX, FeatureSSE2, FeatureFXSR, Feature64Bit, FeatureSlow3OpsLEA, FeatureSlowIncDec, FeatureMacroFusion ]>; //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// include "X86RegisterInfo.td" include "X86RegisterBanks.td" //===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// include "X86InstrInfo.td" def X86InstrInfo : InstrInfo; //===----------------------------------------------------------------------===// // Calling Conventions //===----------------------------------------------------------------------===// include "X86CallingConv.td" //===----------------------------------------------------------------------===// // Assembly Parser //===----------------------------------------------------------------------===// def ATTAsmParserVariant : AsmParserVariant { int Variant = 0; // Variant name. string Name = "att"; // Discard comments in assembly strings. string CommentDelimiter = "#"; // Recognize hard coded registers. string RegisterPrefix = "%"; } def IntelAsmParserVariant : AsmParserVariant { int Variant = 1; // Variant name. string Name = "intel"; // Discard comments in assembly strings. string CommentDelimiter = ";"; // Recognize hard coded registers. string RegisterPrefix = ""; } //===----------------------------------------------------------------------===// // Assembly Printers //===----------------------------------------------------------------------===// // The X86 target supports two different syntaxes for emitting machine code. // This is controlled by the -x86-asm-syntax={att|intel} def ATTAsmWriter : AsmWriter { string AsmWriterClassName = "ATTInstPrinter"; int Variant = 0; } def IntelAsmWriter : AsmWriter { string AsmWriterClassName = "IntelInstPrinter"; int Variant = 1; } def X86 : Target { // Information about the instructions... let InstructionSet = X86InstrInfo; let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; } diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index b941050c9f79..82885687bb42 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1,1983 +1,1983 @@ //===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the various pseudo instructions used by the compiler, // as well as Pat patterns used during instruction selection. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Pattern Matching Support def GetLo32XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; def GetLo8XForm : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; //===----------------------------------------------------------------------===// // Random Pseudo Instructions. // PIC base construction. This expands to code that looks like this: // call $next_inst // popl %destreg" -let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in +let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP] in def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), "", []>; // ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into // a stack adjustment and the codegen must know that they may modify the stack // pointer before prolog-epilog rewriting occurs. // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. -let Defs = [ESP, EFLAGS], Uses = [ESP] in { +let Defs = [ESP, EFLAGS, SSP], Uses = [ESP, SSP] in { def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), "#ADJCALLSTACKDOWN", []>, Requires<[NotLP64]>; def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, Requires<[NotLP64]>; } def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), (ADJCALLSTACKDOWN32 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[NotLP64]>; // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into // a stack adjustment and the codegen must know that they may modify the stack // pointer before prolog-epilog rewriting occurs. // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. -let Defs = [RSP, EFLAGS], Uses = [RSP] in { +let Defs = [RSP, EFLAGS, SSP], Uses = [RSP, SSP] in { def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), "#ADJCALLSTACKDOWN", []>, Requires<[IsLP64]>; def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, Requires<[IsLP64]>; } def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), (ADJCALLSTACKDOWN64 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[IsLP64]>; // x86-64 va_start lowering magic. let usesCustomInserter = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, i64imm:$regsavefi, i64imm:$offset, variable_ops), "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", [(X86vastart_save_xmm_regs GR8:$al, imm:$regsavefi, imm:$offset), (implicit EFLAGS)]>; // The VAARG_64 pseudo-instruction takes the address of the va_list, // and places the address of the next argument into a register. let Defs = [EFLAGS] in def VAARG_64 : I<0, Pseudo, (outs GR64:$dst), (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), "#VAARG_64 $dst, $ap, $size, $mode, $align", [(set GR64:$dst, (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), (implicit EFLAGS)]>; // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is // allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), "# variable sized alloca for segmented stacks", [(set GR32:$dst, (X86SegAlloca GR32:$size))]>, Requires<[NotLP64]>; let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), "# variable sized alloca for segmented stacks", [(set GR64:$dst, (X86SegAlloca GR64:$size))]>, Requires<[In64BitMode]>; } // Dynamic stack allocation yields a _chkstk or _alloca call for all Windows // targets. These calls are needed to probe the stack when allocating more than // 4k bytes in one go. Touching the stack at 4K increments is necessary to // ensure that the guard pages used by the OS virtual memory manager are // allocated in correct sequence. // The main point of having separate instruction are extra unmodelled effects // (compared to ordinary calls) like stack pointer change. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def WIN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size), "# dynamic stack allocation", [(X86WinAlloca GR32:$size)]>, Requires<[NotLP64]>; let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size), "# dynamic stack allocation", [(X86WinAlloca GR64:$size)]>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // EH Pseudo Instructions // let SchedRW = [WriteSystem] in { let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1, isReturn = 1 in { def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>; // CATCHRET needs a custom inserter for SEH. let usesCustomInserter = 1 in def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from), "# CATCHRET", [(catchret bb:$dst, bb:$from)]>; } let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>; // This instruction is responsible for re-establishing stack pointers after an // exception has been caught and we are rejoining normal control flow in the // parent function or funclet. It generally sets ESP and EBP, and optionally // ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us // elsewhere. let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>; let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), "#EH_SJLJ_SETJMP32", [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, Requires<[Not64BitMode]>; def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf), "#EH_SJLJ_SETJMP64", [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, Requires<[In64BitMode]>; let isTerminator = 1 in { def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf), "#EH_SJLJ_LONGJMP32", [(X86eh_sjlj_longjmp addr:$buf)]>, Requires<[Not64BitMode]>; def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf), "#EH_SJLJ_LONGJMP64", [(X86eh_sjlj_longjmp addr:$buf)]>, Requires<[In64BitMode]>; } } } // SchedRW let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), "#EH_SjLj_Setup\t$dst", []>; } //===----------------------------------------------------------------------===// // Pseudo instructions used by unwind info. // let isPseudo = 1 in { def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), "#SEH_PushReg $reg", []>; def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), "#SEH_SaveReg $reg, $dst", []>; def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), "#SEH_SaveXMM $reg, $dst", []>; def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size), "#SEH_StackAlloc $size", []>; def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset), "#SEH_SetFrame $reg, $offset", []>; def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode), "#SEH_PushFrame $mode", []>; def SEH_EndPrologue : I<0, Pseudo, (outs), (ins), "#SEH_EndPrologue", []>; def SEH_Epilogue : I<0, Pseudo, (outs), (ins), "#SEH_Epilogue", []>; } //===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // // This is lowered into a RET instruction by MCInstLower. We need // this so that we don't have to have a MachineBasicBlock which ends // with a RET and also has successors. let isPseudo = 1 in { def MORESTACK_RET: I<0, Pseudo, (outs), (ins), "", []>; // This instruction is lowered to a RET followed by a MOV. The two // instructions are not generated on a higher level since then the // verifier sees a MachineBasicBlock ending with a non-terminator. def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>; } //===----------------------------------------------------------------------===// // Alias Instructions //===----------------------------------------------------------------------===// // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, AddedComplexity = 10 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Other widths can also make use of the 32-bit xor, which may have a smaller // encoding and avoid partial register updates. let AddedComplexity = 10 in { def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>; def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>; def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>; } let Predicates = [OptForSize, Not64BitMode], AddedComplexity = 10 in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 1)]>; def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, -1)]>; } // MOV16ri is 4 bytes, so the instructions above are smaller. def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>; def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; } let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5 in { // AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1. // FIXME: Add itinerary class and Schedule. def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", [(set GR32:$dst, i32immSExt8:$src)]>, Requires<[OptForMinSize, NotWin64WithoutFP]>; def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", [(set GR64:$dst, i64immSExt8:$src)]>, Requires<[OptForMinSize, NotWin64WithoutFP]>; } // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, hasSideEffects = 0 in def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>; // This 64-bit pseudo-move can be used for both a 64-bit constant that is // actually the zero-extension of a 32-bit constant and for labels in the // x86-64 small code model. def mov64imm32 : ComplexPattern; let AddedComplexity = 1 in def : Pat<(i64 mov64imm32:$src), (SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>; // Use sbb to materialize carry bit. let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "", [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "", [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; } // isCodeGenOnly def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C64r)>; def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C64r)>; // We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and // will be eliminated and that the sbb can be extended up to a wider type. When // this happens, it is great. However, if we are left with an 8-bit sbb and an // and, we might as well just match it as a setb. def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), (SETBr)>; // (add OP, SETB) -> (adc OP, 0) def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op), (ADC8ri GR8:$op, 0)>; def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op), (ADC32ri8 GR32:$op, 0)>; def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op), (ADC64ri8 GR64:$op, 0)>; // (sub OP, SETB) -> (sbb OP, 0) def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)), (SBB8ri GR8:$op, 0)>; def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)), (SBB32ri8 GR32:$op, 0)>; def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)), (SBB64ri8 GR64:$op, 0)>; // (sub OP, SETCC_CARRY) -> (adc OP, 0) def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))), (ADC8ri GR8:$op, 0)>; def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))), (ADC32ri8 GR32:$op, 0)>; def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), (ADC64ri8 GR64:$op, 0)>; //===----------------------------------------------------------------------===// // String Pseudo Instructions // let SchedRW = [WriteMicrocoded] in { let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)], IIC_REP_MOVS>, REP, Requires<[Not64BitMode]>; def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16, Requires<[Not64BitMode]>; def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", [(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32, Requires<[Not64BitMode]>; } let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)], IIC_REP_MOVS>, REP, Requires<[In64BitMode]>; def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize16, Requires<[In64BitMode]>; def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", [(X86rep_movs i32)], IIC_REP_MOVS>, REP, OpSize32, Requires<[In64BitMode]>; def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", [(X86rep_movs i64)], IIC_REP_MOVS>, REP, Requires<[In64BitMode]>; } // FIXME: Should use "(X86rep_stos AL)" as the pattern. let Defs = [ECX,EDI], isCodeGenOnly = 1 in { let Uses = [AL,ECX,EDI] in def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", [(X86rep_stos i8)], IIC_REP_STOS>, REP, Requires<[Not64BitMode]>; let Uses = [AX,ECX,EDI] in def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16, Requires<[Not64BitMode]>; let Uses = [EAX,ECX,EDI] in def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32, Requires<[Not64BitMode]>; } let Defs = [RCX,RDI], isCodeGenOnly = 1 in { let Uses = [AL,RCX,RDI] in def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", [(X86rep_stos i8)], IIC_REP_STOS>, REP, Requires<[In64BitMode]>; let Uses = [AX,RCX,RDI] in def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize16, Requires<[In64BitMode]>; let Uses = [RAX,RCX,RDI] in def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32, Requires<[In64BitMode]>; let Uses = [RAX,RCX,RDI] in def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)], IIC_REP_STOS>, REP, Requires<[In64BitMode]>; } } // SchedRW //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // // ELF TLS Support // All calls clobber the non-callee saved registers. ESP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - usesCustomInserter = 1, Uses = [ESP] in { + usesCustomInserter = 1, Uses = [ESP, SSP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, Requires<[Not64BitMode]>; def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_base_addr32", [(X86tlsbaseaddr tls32baseaddr:$sym)]>, Requires<[Not64BitMode]>; } // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - usesCustomInserter = 1, Uses = [RSP] in { + usesCustomInserter = 1, Uses = [RSP, SSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_base_addr64", [(X86tlsbaseaddr tls64baseaddr:$sym)]>, Requires<[In64BitMode]>; } // Darwin TLS Support // For i386, the address of the thunk is passed on the stack, on return the // address of the variable is in %eax. %ecx is trashed during the function // call. All other registers are preserved. let Defs = [EAX, ECX, EFLAGS], - Uses = [ESP], + Uses = [ESP, SSP], usesCustomInserter = 1 in def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLSCall_32", [(X86TLSCall addr:$sym)]>, Requires<[Not64BitMode]>; // For x86_64, the address of the thunk is passed in %rdi, but the // pseudo directly use the symbol, so do not add an implicit use of // %rdi. The lowering will do the right thing with RDI. // On return the address of the variable is in %rax. All other // registers are preserved. let Defs = [RAX, EFLAGS], - Uses = [RSP], + Uses = [RSP, SSP], usesCustomInserter = 1 in def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLSCall_64", [(X86TLSCall addr:$sym)]>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions // CMOV* - Used to implement the SELECT DAG operation. Expanded after // instruction selection into a branch sequence. multiclass CMOVrr_PSEUDO { def CMOV#NAME : I<0, Pseudo, (outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond), "#CMOV_"#NAME#" PSEUDO!", [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond, EFLAGS)))]>; } let usesCustomInserter = 1, Uses = [EFLAGS] in { // X86 doesn't have 8-bit conditional moves. Use a customInserter to // emit control flow. An alternative to this is to mark i8 SELECT as Promote, // however that requires promoting the operands, and can induce additional // i8 register pressure. defm _GR8 : CMOVrr_PSEUDO; let Predicates = [NoCMov] in { defm _GR32 : CMOVrr_PSEUDO; defm _GR16 : CMOVrr_PSEUDO; } // Predicates = [NoCMov] // fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no // SSE1/SSE2. let Predicates = [FPStackf32] in defm _RFP32 : CMOVrr_PSEUDO; let Predicates = [FPStackf64] in defm _RFP64 : CMOVrr_PSEUDO; defm _RFP80 : CMOVrr_PSEUDO; defm _FR32 : CMOVrr_PSEUDO; defm _FR64 : CMOVrr_PSEUDO; defm _FR128 : CMOVrr_PSEUDO; defm _V4F32 : CMOVrr_PSEUDO; defm _V2F64 : CMOVrr_PSEUDO; defm _V2I64 : CMOVrr_PSEUDO; defm _V8F32 : CMOVrr_PSEUDO; defm _V4F64 : CMOVrr_PSEUDO; defm _V4I64 : CMOVrr_PSEUDO; defm _V8I64 : CMOVrr_PSEUDO; defm _V8F64 : CMOVrr_PSEUDO; defm _V16F32 : CMOVrr_PSEUDO; defm _V8I1 : CMOVrr_PSEUDO; defm _V16I1 : CMOVrr_PSEUDO; defm _V32I1 : CMOVrr_PSEUDO; defm _V64I1 : CMOVrr_PSEUDO; } // usesCustomInserter = 1, Uses = [EFLAGS] //===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions //===----------------------------------------------------------------------===// // FIXME: Use normal instructions and add lock prefix dynamically. // Memory barriers // TODO: Get this to fold the constant into the instruction. let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "or{l}\t{$zero, $dst|$dst, $zero}", [], IIC_ALU_MEM>, Requires<[Not64BitMode]>, OpSize32, LOCK, Sched<[WriteALULd, WriteRMW]>; let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", [(X86MemBarrier)]>, Sched<[WriteLoad]>; // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction // ImmOpc8 corresponds to the mi8 version of the instruction // ImmMod corresponds to the instruction format of the mi and mi8 versions multiclass LOCK_ArithBinOp RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, Format ImmMod, SDNode Op, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, GR8:$src2))], IIC_ALU_NONMEM>, LOCK; def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, GR16:$src2))], IIC_ALU_NONMEM>, OpSize16, LOCK; def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, GR32:$src2))], IIC_ALU_NONMEM>, OpSize32, LOCK; def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, GR64:$src2))], IIC_ALU_NONMEM>, LOCK; def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, (i8 imm:$src2)))], IIC_ALU_MEM>, LOCK; def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, (i16 imm:$src2)))], IIC_ALU_MEM>, OpSize16, LOCK; def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, (i32 imm:$src2)))], IIC_ALU_MEM>, OpSize32, LOCK; def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))], IIC_ALU_MEM>, LOCK; def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))], IIC_ALU_MEM>, OpSize16, LOCK; def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))], IIC_ALU_MEM>, OpSize32, LOCK; def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))], IIC_ALU_MEM>, LOCK; } } defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, X86lock_add, "add">; defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, X86lock_sub, "sub">; defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">; defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">; defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">; multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, string frag, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { def NAME#8m : I(frag # "_8") addr:$dst))], IIC_UNARY_MEM>, LOCK; def NAME#16m : I(frag # "_16") addr:$dst))], IIC_UNARY_MEM>, OpSize16, LOCK; def NAME#32m : I(frag # "_32") addr:$dst))], IIC_UNARY_MEM>, OpSize32, LOCK; def NAME#64m : RI(frag # "_64") addr:$dst))], IIC_UNARY_MEM>, LOCK; } } multiclass unary_atomic_intrin { def _8 : PatFrag<(ops node:$ptr), (atomic_op node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; def _16 : PatFrag<(ops node:$ptr), (atomic_op node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; }]>; def _32 : PatFrag<(ops node:$ptr), (atomic_op node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; def _64 : PatFrag<(ops node:$ptr), (atomic_op node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; }]>; } defm X86lock_inc : unary_atomic_intrin; defm X86lock_dec : unary_atomic_intrin; defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">; defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">; // Atomic compare and swap. multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, SDPatternOperator frag, X86MemOperand x86memop, InstrItinClass itin> { let isCodeGenOnly = 1, usesCustomInserter = 1 in { def NAME : I, TB, LOCK; } } multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag, InstrItinClass itin8, InstrItinClass itin> { let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { let Defs = [AL, EFLAGS], Uses = [AL] in def NAME#8 : I, TB, LOCK; let Defs = [AX, EFLAGS], Uses = [AX] in def NAME#16 : I, TB, OpSize16, LOCK; let Defs = [EAX, EFLAGS], Uses = [EAX] in def NAME#32 : I, TB, OpSize32, LOCK; let Defs = [RAX, EFLAGS], Uses = [RAX] in def NAME#64 : RI, TB, LOCK; } } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem, IIC_CMPX_LOCK_8B>; } // This pseudo must be used when the frame uses RBX as // the base pointer. Indeed, in such situation RBX is a reserved // register and the register allocator will ignore any use/def of // it. In other words, the register will not fix the clobbering of // RBX that will happen when setting the arguments for the instrucion. // // Unlike the actual related instuction, we mark that this one // defines EBX (instead of using EBX). // The rationale is that we will define RBX during the expansion of // the pseudo. The argument feeding EBX is ebx_input. // // The additional argument, $ebx_save, is a temporary register used to // save the value of RBX across the actual instruction. // // To make sure the register assigned to $ebx_save does not interfere with // the definition of the actual instruction, we use a definition $dst which // is tied to $rbx_save. That way, the live-range of $rbx_save spans across // the instruction and we are sure we will have a valid register to restore // the value of RBX. let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX], SchedRW = [WriteALULd, WriteRMW], isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in { def LCMPXCHG8B_SAVE_EBX : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save), !strconcat("cmpxchg8b", "\t$ptr"), [(set GR32:$dst, (X86cas8save_ebx addr:$ptr, GR32:$ebx_input, GR32:$ebx_save))], IIC_CMPX_LOCK_8B>; } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", X86cas16, i128mem, IIC_CMPX_LOCK_16B>, REX_W; } // Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant. let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX], Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW], isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst", usesCustomInserter = 1 in { def LCMPXCHG16B_SAVE_RBX : I<0, Pseudo, (outs GR64:$dst), (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save), !strconcat("cmpxchg16b", "\t$ptr"), [(set GR64:$dst, (X86cas16save_rbx addr:$ptr, GR64:$rbx_input, GR64:$rbx_save))], IIC_CMPX_LOCK_16B>; } defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>; // Atomic exchange and add multiclass ATOMIC_LOAD_BINOP opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin8, InstrItinClass itin> { let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { def NAME#8 : I(frag # "_8") addr:$ptr, GR8:$val))], itin8>; def NAME#16 : I(frag # "_16") addr:$ptr, GR16:$val))], itin>, OpSize16; def NAME#32 : I(frag # "_32") addr:$ptr, GR32:$val))], itin>, OpSize32; def NAME#64 : RI(frag # "_64") addr:$ptr, GR64:$val))], itin>; } } defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add", IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>, TB, LOCK; /* The following multiclass tries to make sure that in code like * x.store (immediate op x.load(acquire), release) * and * x.store (register op x.load(acquire), release) * an operation directly on memory is generated instead of wasting a register. * It is not automatic as atomic_store/load are only lowered to MOV instructions * extremely late to prevent them from being accidentally reordered in the backend * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions) */ multiclass RELEASE_BINOP_MI { def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), "#BINOP "#NAME#"8mi PSEUDO!", [(atomic_store_8 addr:$dst, (op (atomic_load_8 addr:$dst), (i8 imm:$src)))]>; def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src), "#BINOP "#NAME#"8mr PSEUDO!", [(atomic_store_8 addr:$dst, (op (atomic_load_8 addr:$dst), GR8:$src))]>; // NAME#16 is not generated as 16-bit arithmetic instructions are considered // costly and avoided as far as possible by this backend anyway def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), "#BINOP "#NAME#"32mi PSEUDO!", [(atomic_store_32 addr:$dst, (op (atomic_load_32 addr:$dst), (i32 imm:$src)))]>; def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), "#BINOP "#NAME#"32mr PSEUDO!", [(atomic_store_32 addr:$dst, (op (atomic_load_32 addr:$dst), GR32:$src))]>; def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), "#BINOP "#NAME#"64mi32 PSEUDO!", [(atomic_store_64 addr:$dst, (op (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>; def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), "#BINOP "#NAME#"64mr PSEUDO!", [(atomic_store_64 addr:$dst, (op (atomic_load_64 addr:$dst), GR64:$src))]>; } let Defs = [EFLAGS] in { defm RELEASE_ADD : RELEASE_BINOP_MI; defm RELEASE_AND : RELEASE_BINOP_MI; defm RELEASE_OR : RELEASE_BINOP_MI; defm RELEASE_XOR : RELEASE_BINOP_MI; // Note: we don't deal with sub, because substractions of constants are // optimized into additions before this code can run. } // Same as above, but for floating-point. // FIXME: imm version. // FIXME: Version that doesn't clobber $src, using AVX's VADDSS. // FIXME: This could also handle SIMD operations with *ps and *pd instructions. let usesCustomInserter = 1 in { multiclass RELEASE_FP_BINOP_MI { def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src), "#BINOP "#NAME#"32mr PSEUDO!", [(atomic_store_32 addr:$dst, (i32 (bitconvert (op (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))), FR32:$src))))]>, Requires<[HasSSE1]>; def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src), "#BINOP "#NAME#"64mr PSEUDO!", [(atomic_store_64 addr:$dst, (i64 (bitconvert (op (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))), FR64:$src))))]>, Requires<[HasSSE2]>; } defm RELEASE_FADD : RELEASE_FP_BINOP_MI; // FIXME: Add fsub, fmul, fdiv, ... } multiclass RELEASE_UNOP { def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst), "#UNOP "#NAME#"8m PSEUDO!", [(atomic_store_8 addr:$dst, dag8)]>; def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst), "#UNOP "#NAME#"16m PSEUDO!", [(atomic_store_16 addr:$dst, dag16)]>; def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst), "#UNOP "#NAME#"32m PSEUDO!", [(atomic_store_32 addr:$dst, dag32)]>; def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst), "#UNOP "#NAME#"64m PSEUDO!", [(atomic_store_64 addr:$dst, dag64)]>; } let Defs = [EFLAGS], Predicates = [UseIncDec] in { defm RELEASE_INC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 1)), (add (atomic_load_16 addr:$dst), (i16 1)), (add (atomic_load_32 addr:$dst), (i32 1)), (add (atomic_load_64 addr:$dst), (i64 1))>; defm RELEASE_DEC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 -1)), (add (atomic_load_16 addr:$dst), (i16 -1)), (add (atomic_load_32 addr:$dst), (i32 -1)), (add (atomic_load_64 addr:$dst), (i64 -1))>; } /* TODO: These don't work because the type inference of TableGen fails. TODO: find a way to fix it. let Defs = [EFLAGS] in { defm RELEASE_NEG : RELEASE_UNOP< (ineg (atomic_load_8 addr:$dst)), (ineg (atomic_load_16 addr:$dst)), (ineg (atomic_load_32 addr:$dst)), (ineg (atomic_load_64 addr:$dst))>; } // NOT doesn't set flags. defm RELEASE_NOT : RELEASE_UNOP< (not (atomic_load_8 addr:$dst)), (not (atomic_load_16 addr:$dst)), (not (atomic_load_32 addr:$dst)), (not (atomic_load_64 addr:$dst))>; */ def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), "#RELEASE_MOV8mi PSEUDO!", [(atomic_store_8 addr:$dst, (i8 imm:$src))]>; def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src), "#RELEASE_MOV16mi PSEUDO!", [(atomic_store_16 addr:$dst, (i16 imm:$src))]>; def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), "#RELEASE_MOV32mi PSEUDO!", [(atomic_store_32 addr:$dst, (i32 imm:$src))]>; def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), "#RELEASE_MOV64mi32 PSEUDO!", [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>; def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src), "#RELEASE_MOV8mr PSEUDO!", [(atomic_store_8 addr:$dst, GR8 :$src)]>; def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src), "#RELEASE_MOV16mr PSEUDO!", [(atomic_store_16 addr:$dst, GR16:$src)]>; def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), "#RELEASE_MOV32mr PSEUDO!", [(atomic_store_32 addr:$dst, GR32:$src)]>; def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), "#RELEASE_MOV64mr PSEUDO!", [(atomic_store_64 addr:$dst, GR64:$src)]>; def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), "#ACQUIRE_MOV8rm PSEUDO!", [(set GR8:$dst, (atomic_load_8 addr:$src))]>; def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src), "#ACQUIRE_MOV16rm PSEUDO!", [(set GR16:$dst, (atomic_load_16 addr:$src))]>; def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src), "#ACQUIRE_MOV32rm PSEUDO!", [(set GR32:$dst, (atomic_load_32 addr:$src))]>; def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src), "#ACQUIRE_MOV64rm PSEUDO!", [(set GR64:$dst, (atomic_load_64 addr:$src))]>; //===----------------------------------------------------------------------===// // DAG Pattern Matching Rules //===----------------------------------------------------------------------===// // Use AND/OR to store 0/-1 in memory when optimizing for minsize. This saves // binary size compared to a regular MOV, but it introduces an unnecessary // load, so is not suitable for regular or optsize functions. let Predicates = [OptForMinSize] in { def : Pat<(store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>; def : Pat<(store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>; def : Pat<(store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>; def : Pat<(store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>; def : Pat<(store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>; def : Pat<(store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>; } // In kernel code model, we can get the address of a label // into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of // the MOV64ri32 should accept these. def : Pat<(i64 (X86Wrapper tconstpool :$dst)), (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper mcsym:$dst)), (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; // If we have small model and -static mode, it is safe to store global addresses // directly as immediates. FIXME: This is really a hack, the 'imm' predicate // for MOV64mi32 should handle this sort of thing. def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), (MOV64mi32 addr:$dst, tconstpool:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), (MOV64mi32 addr:$dst, tjumptable:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), (MOV64mi32 addr:$dst, tglobaladdr:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, texternalsym:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, mcsym:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), (MOV64mi32 addr:$dst, tblockaddress:$src)>, Requires<[NearData, IsNotPIC]>; def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>; def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>; // Calls // tls has some funny stuff here... // This corresponds to movabs $foo@tpoff, %rax def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), (MOV64ri32 tglobaltlsaddr :$dst)>; // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), (CALL64pcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i64 texternalsym:$dst)), (CALL64pcrel32 texternalsym:$dst)>; // Tailcall stuff. The TCRETURN instructions execute after the epilog, so they // can never use callee-saved registers. That is the purpose of the GR64_TC // register classes. // // The only volatile register that is never used by the calling convention is // %r11. This happens when calling a vararg function with 6 arguments. // // Match an X86tcret that uses less than 7 volatile registers. def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), (X86tcret node:$ptr, node:$off), [{ // X86tcret args: (*chain, ptr, imm, regs..., glue) unsigned NumRegs = 0; for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i) if (isa(N->getOperand(i)) && ++NumRegs > 6) return false; return true; }]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, Requires<[Not64BitMode]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, Requires<[Not64BitMode, IsNotPIC]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi tglobaladdr:$dst, imm:$off)>, Requires<[NotLP64]>; def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>, Requires<[NotLP64]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In64BitMode]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, Requires<[In64BitMode]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, Requires<[IsLP64]>; def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), (TCRETURNdi64 texternalsym:$dst, imm:$off)>, Requires<[IsLP64]>; // Normal calls, with various flavors of addresses. def : Pat<(X86call (i32 tglobaladdr:$dst)), (CALLpcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i32 texternalsym:$dst)), (CALLpcrel32 texternalsym:$dst)>; def : Pat<(X86call (i32 imm:$dst)), (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; // Comparisons. // TEST R,R is smaller than CMP R,0 def : Pat<(X86cmp GR8:$src1, 0), (TEST8rr GR8:$src1, GR8:$src1)>; def : Pat<(X86cmp GR16:$src1, 0), (TEST16rr GR16:$src1, GR16:$src1)>; def : Pat<(X86cmp GR32:$src1, 0), (TEST32rr GR32:$src1, GR32:$src1)>; def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; // Conditional moves with folded loads with operands swapped and conditions // inverted. multiclass CMOVmr { let Predicates = [HasCMov] in { def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), (Inst16 GR16:$src2, addr:$src1)>; def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), (Inst32 GR32:$src2, addr:$src1)>; def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), (Inst64 GR64:$src2, addr:$src1)>; } } defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; defm : CMOVmr; // zextload bool -> zextload byte // i1 stored in one byte in zero-extended form. // Upper bits cleanup should be executed before Store. def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(zextloadi64i1 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; // extload bool -> extload byte // When extloading from 16-bit and smaller memory locations into 64-bit // registers, use zero-extending loads so that the entire 64-bit register is // defined, avoiding partial-register updates. def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. def : Pat<(extloadi64i1 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; def : Pat<(extloadi64i8 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; def : Pat<(extloadi64i16 addr:$src), (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; def : Pat<(extloadi64i32 addr:$src), (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG (MOVZX32rr8 GR8 :$src), sub_16bit)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. def : Pat<(i32 (anyext GR16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; def : Pat<(i64 (anyext GR8 :$src)), (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>; def : Pat<(i64 (anyext GR16:$src)), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>; def : Pat<(i64 (anyext GR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>; // Any instruction that defines a 32-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may // be copying from a truncate. Any other 32-bit operation will zero-extend // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper // 32 bits, they're probably just qualifying a CopyFromReg. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && N->getOpcode() != ISD::CopyFromReg && N->getOpcode() != ISD::AssertSext && N->getOpcode() != ISD::AssertZext; }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, // we can use a SUBREG_TO_REG. def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; //===----------------------------------------------------------------------===// // Pattern match OR as ADD //===----------------------------------------------------------------------===// // If safe, we prefer to pattern match OR as ADD at isel time. ADD can be // 3-addressified into an LEA instruction to avoid copies. However, we also // want to finally emit these instructions as an or at the end of the code // generator to make the generated code easier to read. To do this, we select // into "disjoint bits" pseudo ops. // Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); KnownBits Known0; CurDAG->computeKnownBits(N->getOperand(0), Known0, 0); KnownBits Known1; CurDAG->computeKnownBits(N->getOperand(1), Known1, 0); return (~Known0.Zero & ~Known1.Zero) == 0; }]>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. // Try this before the selecting to OR. let AddedComplexity = 5, SchedRW = [WriteALU] in { let isConvertibleToThreeAddress = 1, Constraints = "$src1 = $dst", Defs = [EFLAGS] in { let isCommutable = 1 in { def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "", // orw/addw REG, REG [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>; def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "", // orl/addl REG, REG [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>; def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "", // orq/addq REG, REG [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>; } // isCommutable // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. def ADD16ri8_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "", // orw/addw REG, imm8 [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>; def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "", // orw/addw REG, imm [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>; def ADD32ri8_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "", // orl/addl REG, imm8 [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>; def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "", // orl/addl REG, imm [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>; def ADD64ri8_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "", // orq/addq REG, imm8 [(set GR64:$dst, (or_is_add GR64:$src1, i64immSExt8:$src2))]>; def ADD64ri32_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "", // orq/addq REG, imm [(set GR64:$dst, (or_is_add GR64:$src1, i64immSExt32:$src2))]>; } } // AddedComplexity, SchedRW //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// // Odd encoding trick: -128 fits into an 8-bit immediate field while // +128 doesn't, so in this special case use a sub instead of an add. def : Pat<(add GR16:$src1, 128), (SUB16ri8 GR16:$src1, -128)>; def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), (SUB16mi8 addr:$dst, -128)>; def : Pat<(add GR32:$src1, 128), (SUB32ri8 GR32:$src1, -128)>; def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), (SUB32mi8 addr:$dst, -128)>; def : Pat<(add GR64:$src1, 128), (SUB64ri8 GR64:$src1, -128)>; def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), (SUB64mi8 addr:$dst, -128)>; // The same trick applies for 32-bit immediate fields in 64-bit // instructions. def : Pat<(add GR64:$src1, 0x0000000080000000), (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst), (SUB64mi32 addr:$dst, 0xffffffff80000000)>; // To avoid needing to materialize an immediate in a register, use a 32-bit and // with implicit zero-extension instead of a 64-bit and if the immediate has at // least 32 bits of leading zeros. If in addition the last 32 bits can be // represented with a sign extension of a 8 bit constant, use that. // This can also reduce instruction size by eliminating the need for the REX // prefix. // AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. let AddedComplexity = 1 in { def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), (SUBREG_TO_REG (i64 0), (AND32ri8 (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo8XForm imm:$imm))), sub_32bit)>; def : Pat<(and GR64:$src, i64immZExt32:$imm), (SUBREG_TO_REG (i64 0), (AND32ri (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), sub_32bit)>; } // AddedComplexity = 1 // AddedComplexity is needed due to the increased complexity on the // i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all // the MOVZX patterns keeps thems together in DAGIsel tables. let AddedComplexity = 1 in { // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)), sub_16bit)>; // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (SUBREG_TO_REG (i64 0), (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), sub_32bit)>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), sub_32bit)>; // r & (2^8-1) ==> movz def : Pat<(and GR64:$src, 0xff), (SUBREG_TO_REG (i64 0), (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))), sub_32bit)>; } // AddedComplexity = 1 // sext_inreg patterns def : Pat<(sext_inreg GR32:$src, i16), (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>; def : Pat<(sext_inreg GR16:$src, i8), (EXTRACT_SUBREG (MOVSX32rr8 (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>; def : Pat<(sext_inreg GR64:$src, i32), (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; def : Pat<(sext_inreg GR64:$src, i16), (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; def : Pat<(sext_inreg GR64:$src, i8), (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; // sext, sext_load, zext, zext_load def: Pat<(i16 (sext GR8:$src)), (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>; def: Pat<(sextloadi16i8 addr:$src), (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>; def: Pat<(i16 (zext GR8:$src)), (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>; def: Pat<(zextloadi16i8 addr:$src), (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), (EXTRACT_SUBREG GR32:$src, sub_16bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit)>, Requires<[Not64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit)>, Requires<[Not64BitMode]>; def : Pat<(i32 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_32bit)>; def : Pat<(i16 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_16bit)>; def : Pat<(i8 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_8bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG GR32:$src, sub_8bit)>, Requires<[In64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG GR16:$src, sub_8bit)>, Requires<[In64BitMode]>; // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>, Requires<[Not64BitMode]>; def : Pat<(i8 (trunc (srl_su (i32 (anyext GR16:$src)), (i8 8)))), (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>, Requires<[Not64BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG GR32:$src, sub_8bit_hi)>, Requires<[Not64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), sub_16bit)>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; // h-register tricks. // For now, be conservative on x86-64 and use an h-register extract only if the // value is immediately zero-extended or stored, which are somewhat common // cases. This uses a bunch of code to prevent a register requiring a REX prefix // from being allocated in the same instruction as the h register, as there's // currently no way to describe this requirement to the register allocator. // h-register extract and zero-extend. def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR64:$src, sub_8bit_hi)), sub_32bit)>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), sub_32bit)>; def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), sub_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG GR64:$src, sub_8bit_hi))>; def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>, Requires<[In64BitMode]>; // (shl x, 1) ==> (add x, x) // Note that if x is undef (immediate or otherwise), we could theoretically // end up with the two uses of x getting different values, producing a result // where the least significant bit is not 0. However, the probability of this // happening is considered low enough that this is officially not a // "real problem". def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; // Helper imms to check if a mask doesn't change significant shift/rotate bits. def immShift8 : ImmLeaf(Imm) >= 3; }]>; def immShift16 : ImmLeaf(Imm) >= 4; }]>; def immShift32 : ImmLeaf(Imm) >= 5; }]>; def immShift64 : ImmLeaf(Imm) >= 6; }]>; // Shift amount is implicitly masked. multiclass MaskedShiftAmountPats { // (shift x (and y, 31)) ==> (shift x, y) def : Pat<(frag GR8:$src1, (and CL, immShift32)), (!cast(name # "8rCL") GR8:$src1)>; def : Pat<(frag GR16:$src1, (and CL, immShift32)), (!cast(name # "16rCL") GR16:$src1)>; def : Pat<(frag GR32:$src1, (and CL, immShift32)), (!cast(name # "32rCL") GR32:$src1)>; def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (!cast(name # "8mCL") addr:$dst)>; def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (!cast(name # "16mCL") addr:$dst)>; def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (!cast(name # "32mCL") addr:$dst)>; // (shift x (and y, 63)) ==> (shift x, y) def : Pat<(frag GR64:$src1, (and CL, immShift64)), (!cast(name # "64rCL") GR64:$src1)>; def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst), (!cast(name # "64mCL") addr:$dst)>; } defm : MaskedShiftAmountPats; defm : MaskedShiftAmountPats; defm : MaskedShiftAmountPats; // ROL/ROR instructions allow a stronger mask optimization than shift for 8- and // 16-bit. We can remove a mask of any (bitwidth - 1) on the rotation amount // because over-rotating produces the same result. This is noted in the Intel // docs with: "tempCOUNT <- (COUNT & COUNTMASK) MOD SIZE". Masking the rotation // amount could affect EFLAGS results, but that does not matter because we are // not tracking flags for these nodes. multiclass MaskedRotateAmountPats { // (rot x (and y, BitWidth - 1)) ==> (rot x, y) def : Pat<(frag GR8:$src1, (and CL, immShift8)), (!cast(name # "8rCL") GR8:$src1)>; def : Pat<(frag GR16:$src1, (and CL, immShift16)), (!cast(name # "16rCL") GR16:$src1)>; def : Pat<(frag GR32:$src1, (and CL, immShift32)), (!cast(name # "32rCL") GR32:$src1)>; def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift8)), addr:$dst), (!cast(name # "8mCL") addr:$dst)>; def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift16)), addr:$dst), (!cast(name # "16mCL") addr:$dst)>; def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (!cast(name # "32mCL") addr:$dst)>; // (rot x (and y, 63)) ==> (rot x, y) def : Pat<(frag GR64:$src1, (and CL, immShift64)), (!cast(name # "64rCL") GR64:$src1)>; def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst), (!cast(name # "64mCL") addr:$dst)>; } defm : MaskedRotateAmountPats; defm : MaskedRotateAmountPats; // Double shift amount is implicitly masked. multiclass MaskedDoubleShiftAmountPats { // (shift x (and y, 31)) ==> (shift x, y) def : Pat<(frag GR16:$src1, GR16:$src2, (and CL, immShift32)), (!cast(name # "16rrCL") GR16:$src1, GR16:$src2)>; def : Pat<(frag GR32:$src1, GR32:$src2, (and CL, immShift32)), (!cast(name # "32rrCL") GR32:$src1, GR32:$src2)>; // (shift x (and y, 63)) ==> (shift x, y) def : Pat<(frag GR64:$src1, GR64:$src2, (and CL, immShift64)), (!cast(name # "64rrCL") GR64:$src1, GR64:$src2)>; } defm : MaskedDoubleShiftAmountPats; defm : MaskedDoubleShiftAmountPats; let Predicates = [HasBMI2] in { let AddedComplexity = 1 in { def : Pat<(sra GR32:$src1, (and GR8:$src2, immShift32)), (SARX32rr GR32:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(sra GR64:$src1, (and GR8:$src2, immShift64)), (SARX64rr GR64:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(srl GR32:$src1, (and GR8:$src2, immShift32)), (SHRX32rr GR32:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(srl GR64:$src1, (and GR8:$src2, immShift64)), (SHRX64rr GR64:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(shl GR32:$src1, (and GR8:$src2, immShift32)), (SHLX32rr GR32:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(shl GR64:$src1, (and GR8:$src2, immShift64)), (SHLX64rr GR64:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; } let AddedComplexity = -20 in { def : Pat<(sra (loadi32 addr:$src1), (and GR8:$src2, immShift32)), (SARX32rm addr:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(sra (loadi64 addr:$src1), (and GR8:$src2, immShift64)), (SARX64rm addr:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(srl (loadi32 addr:$src1), (and GR8:$src2, immShift32)), (SHRX32rm addr:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(srl (loadi64 addr:$src1), (and GR8:$src2, immShift64)), (SHRX64rm addr:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(shl (loadi32 addr:$src1), (and GR8:$src2, immShift32)), (SHLX32rm addr:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; def : Pat<(shl (loadi64 addr:$src1), (and GR8:$src2, immShift64)), (SHLX64rm addr:$src1, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; } } // (anyext (setcc_carry)) -> (setcc_carry) def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// // add reg, reg def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; // add reg, mem def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), (ADD8rm GR8:$src1, addr:$src2)>; def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), (ADD16rm GR16:$src1, addr:$src2)>; def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), (ADD32rm GR32:$src1, addr:$src2)>; // add reg, imm def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; def : Pat<(add GR16:$src1, i16immSExt8:$src2), (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(add GR32:$src1, i32immSExt8:$src2), (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; // sub reg, reg def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; // sub reg, mem def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), (SUB8rm GR8:$src1, addr:$src2)>; def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), (SUB16rm GR16:$src1, addr:$src2)>; def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), (SUB32rm GR32:$src1, addr:$src2)>; // sub reg, imm def : Pat<(sub GR8:$src1, imm:$src2), (SUB8ri GR8:$src1, imm:$src2)>; def : Pat<(sub GR16:$src1, imm:$src2), (SUB16ri GR16:$src1, imm:$src2)>; def : Pat<(sub GR32:$src1, imm:$src2), (SUB32ri GR32:$src1, imm:$src2)>; def : Pat<(sub GR16:$src1, i16immSExt8:$src2), (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(sub GR32:$src1, i32immSExt8:$src2), (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; // sub 0, reg def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>; def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>; def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>; def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>; // sub reg, relocImm def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2), (SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>; def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt32_su:$src2), (SUB64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>; // mul reg, reg def : Pat<(mul GR16:$src1, GR16:$src2), (IMUL16rr GR16:$src1, GR16:$src2)>; def : Pat<(mul GR32:$src1, GR32:$src2), (IMUL32rr GR32:$src1, GR32:$src2)>; // mul reg, mem def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), (IMUL16rm GR16:$src1, addr:$src2)>; def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), (IMUL32rm GR32:$src1, addr:$src2)>; // mul reg, imm def : Pat<(mul GR16:$src1, imm:$src2), (IMUL16rri GR16:$src1, imm:$src2)>; def : Pat<(mul GR32:$src1, imm:$src2), (IMUL32rri GR32:$src1, imm:$src2)>; def : Pat<(mul GR16:$src1, i16immSExt8:$src2), (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(mul GR32:$src1, i32immSExt8:$src2), (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; // reg = mul mem, imm def : Pat<(mul (loadi16 addr:$src1), imm:$src2), (IMUL16rmi addr:$src1, imm:$src2)>; def : Pat<(mul (loadi32 addr:$src1), imm:$src2), (IMUL32rmi addr:$src1, imm:$src2)>; def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2), (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2), (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; // Patterns for nodes that do not produce flags, for instructions that do. // addition def : Pat<(add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; def : Pat<(add GR64:$src1, i64immSExt8:$src2), (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), (ADD64rm GR64:$src1, addr:$src2)>; // subtraction def : Pat<(sub GR64:$src1, GR64:$src2), (SUB64rr GR64:$src1, GR64:$src2)>; def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), (SUB64rm GR64:$src1, addr:$src2)>; def : Pat<(sub GR64:$src1, i64immSExt8:$src2), (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(sub GR64:$src1, i64immSExt32:$src2), (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; // Multiply def : Pat<(mul GR64:$src1, GR64:$src2), (IMUL64rr GR64:$src1, GR64:$src2)>; def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), (IMUL64rm GR64:$src1, addr:$src2)>; def : Pat<(mul GR64:$src1, i64immSExt8:$src2), (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(mul GR64:$src1, i64immSExt32:$src2), (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2), (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; // Increment/Decrement reg. // Do not make INC/DEC if it is slow let Predicates = [UseIncDec] in { def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>; def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; } // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>; // or reg/mem def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), (OR8rm GR8:$src1, addr:$src2)>; def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), (OR16rm GR16:$src1, addr:$src2)>; def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), (OR32rm GR32:$src1, addr:$src2)>; def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), (OR64rm GR64:$src1, addr:$src2)>; // or reg/imm def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; def : Pat<(or GR16:$src1, i16immSExt8:$src2), (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(or GR32:$src1, i32immSExt8:$src2), (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; def : Pat<(or GR64:$src1, i64immSExt8:$src2), (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(or GR64:$src1, i64immSExt32:$src2), (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; // xor reg/reg def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>; // xor reg/mem def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), (XOR8rm GR8:$src1, addr:$src2)>; def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), (XOR16rm GR16:$src1, addr:$src2)>; def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), (XOR32rm GR32:$src1, addr:$src2)>; def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), (XOR64rm GR64:$src1, addr:$src2)>; // xor reg/imm def : Pat<(xor GR8:$src1, imm:$src2), (XOR8ri GR8:$src1, imm:$src2)>; def : Pat<(xor GR16:$src1, imm:$src2), (XOR16ri GR16:$src1, imm:$src2)>; def : Pat<(xor GR32:$src1, imm:$src2), (XOR32ri GR32:$src1, imm:$src2)>; def : Pat<(xor GR16:$src1, i16immSExt8:$src2), (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(xor GR32:$src1, i32immSExt8:$src2), (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; def : Pat<(xor GR64:$src1, i64immSExt8:$src2), (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(xor GR64:$src1, i64immSExt32:$src2), (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; // and reg/reg def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>; // and reg/mem def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), (AND8rm GR8:$src1, addr:$src2)>; def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), (AND16rm GR16:$src1, addr:$src2)>; def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), (AND32rm GR32:$src1, addr:$src2)>; def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), (AND64rm GR64:$src1, addr:$src2)>; // and reg/imm def : Pat<(and GR8:$src1, imm:$src2), (AND8ri GR8:$src1, imm:$src2)>; def : Pat<(and GR16:$src1, imm:$src2), (AND16ri GR16:$src1, imm:$src2)>; def : Pat<(and GR32:$src1, imm:$src2), (AND32ri GR32:$src1, imm:$src2)>; def : Pat<(and GR16:$src1, i16immSExt8:$src2), (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; def : Pat<(and GR32:$src1, i32immSExt8:$src2), (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; def : Pat<(and GR64:$src1, i64immSExt8:$src2), (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(and GR64:$src1, i64immSExt32:$src2), (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; // Bit scan instruction patterns to match explicit zero-undef behavior. def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>; def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>; def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>; def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>; // When HasMOVBE is enabled it is possible to get a non-legalized // register-register 16 bit bswap. This maps it to a ROL instruction. let Predicates = [HasMOVBE] in { def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>; } diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 4b8c24a1c047..5581fd462a1d 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -1,358 +1,358 @@ //===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the X86 jump, return, call, and related instructions. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Control Flow Instructions. // // Return instructions. // // The X86retflag return instructions are variadic because we may add ST0 and // ST1 arguments when returning values on the x87 stack. let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in { def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret{l}", [], IIC_RET>, OpSize32, Requires<[Not64BitMode]>; def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret{q}", [], IIC_RET>, OpSize32, Requires<[In64BitMode]>; def RETW : I <0xC3, RawFrm, (outs), (ins), "ret{w}", [], IIC_RET>, OpSize16; def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret{l}\t$amt", [], IIC_RET_IMM>, OpSize32, Requires<[Not64BitMode]>; def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret{q}\t$amt", [], IIC_RET_IMM>, OpSize32, Requires<[In64BitMode]>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), "ret{w}\t$amt", [], IIC_RET_IMM>, OpSize16; def LRETL : I <0xCB, RawFrm, (outs), (ins), "{l}ret{l|f}", [], IIC_RET>, OpSize32; def LRETQ : RI <0xCB, RawFrm, (outs), (ins), "{l}ret{|f}q", [], IIC_RET>, Requires<[In64BitMode]>; def LRETW : I <0xCB, RawFrm, (outs), (ins), "{l}ret{w|f}", [], IIC_RET>, OpSize16; def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), "{l}ret{l|f}\t$amt", [], IIC_RET>, OpSize32; def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt), "{l}ret{|f}q\t$amt", [], IIC_RET>, Requires<[In64BitMode]>; def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize16; // The machine return from interrupt instruction, but sometimes we need to // perform a post-epilogue stack adjustment. Codegen emits the pseudo form // which expands to include an SP adjustment if necessary. def IRET16 : I <0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize16; def IRET32 : I <0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>, OpSize32; def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>, Requires<[In64BitMode]>; let isCodeGenOnly = 1 in def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>; def RET : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>; } // Unconditional branches. let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in { def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>; let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget16:$dst), "jmp\t$dst", [], IIC_JMP_REL>, OpSize16; def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget32:$dst), "jmp\t$dst", [], IIC_JMP_REL>, OpSize32; } } // Conditional Branches. let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in { multiclass ICBr opc1, bits<8> opc4, string asm, PatFrag Cond> { def _1 : Ii8PCRel ; let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def _2 : Ii16PCRel, OpSize16, TB; def _4 : Ii32PCRel, TB, OpSize32; } } } defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>; defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; // jcx/jecx/jrcx instructions. let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in { // These are the 32-bit versions of this instruction for the asmparser. In // 32-bit mode, the address size prefix is jcxz and the unprefixed version is // jecxz. let Uses = [CX] in def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), "jcxz\t$dst", [], IIC_JCXZ>, AdSize16, Requires<[Not64BitMode]>; let Uses = [ECX] in def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), "jecxz\t$dst", [], IIC_JCXZ>, AdSize32; let Uses = [RCX] in def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64, Requires<[In64BitMode]>; } // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP16r : I<0xFF, MRM4r, (outs), (ins GR16:$dst), "jmp{w}\t{*}$dst", [(brind GR16:$dst)], IIC_JMP_REG>, Requires<[Not64BitMode]>, OpSize16, Sched<[WriteJump]>; def JMP16m : I<0xFF, MRM4m, (outs), (ins i16mem:$dst), "jmp{w}\t{*}$dst", [(brind (loadi16 addr:$dst))], IIC_JMP_MEM>, Requires<[Not64BitMode]>, OpSize16, Sched<[WriteJumpLd]>; def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[Not64BitMode]>, OpSize32, Sched<[WriteJump]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[Not64BitMode]>, OpSize32, Sched<[WriteJumpLd]>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>, Sched<[WriteJump]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>, Sched<[WriteJumpLd]>; let Predicates = [Not64BitMode] in { def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), "ljmp{w}\t$seg, $off", [], IIC_JMP_FAR_PTR>, OpSize16, Sched<[WriteJump]>; def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), "ljmp{l}\t$seg, $off", [], IIC_JMP_FAR_PTR>, OpSize32, Sched<[WriteJump]>; } def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>, Sched<[WriteJump]>; def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize16, Sched<[WriteJumpLd]>; def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), "{l}jmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize32, Sched<[WriteJumpLd]>; } // Loop instructions let SchedRW = [WriteJump] in { def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>; def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>; def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>; } //===----------------------------------------------------------------------===// // Call Instructions... // let isCall = 1 in // All calls clobber the non-callee saved registers. ESP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. Uses for argument // registers are added manually. - let Uses = [ESP] in { + let Uses = [ESP, SSP] in { def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst), "call{l}\t$dst", [], IIC_CALL_RI>, OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>; let hasSideEffects = 0 in def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, (outs), (ins i16imm_pcrel:$dst), "call{w}\t$dst", [], IIC_CALL_RI>, OpSize16, Sched<[WriteJump]>; def CALL16r : I<0xFF, MRM2r, (outs), (ins GR16:$dst), "call{w}\t{*}$dst", [(X86call GR16:$dst)], IIC_CALL_RI>, OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>; def CALL16m : I<0xFF, MRM2m, (outs), (ins i16mem:$dst), "call{w}\t{*}$dst", [(X86call (loadi16 addr:$dst))], IIC_CALL_MEM>, OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>, Sched<[WriteJumpLd]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall]>, Sched<[WriteJumpLd]>; let Predicates = [Not64BitMode] in { def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), "lcall{w}\t$seg, $off", [], IIC_CALL_FAR_PTR>, OpSize16, Sched<[WriteJump]>; def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), "lcall{l}\t$seg, $off", [], IIC_CALL_FAR_PTR>, OpSize32, Sched<[WriteJump]>; } def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize16, Sched<[WriteJumpLd]>; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), "{l}call{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize32, Sched<[WriteJumpLd]>; } // Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [ESP] in { + let Uses = [ESP, SSP] in { def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable; def TCRETURNri : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; let mayLoad = 1 in def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset), []>; // FIXME: The should be pseudo instructions that are lowered when going to // mcinst. def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), (ins i32imm_pcrel:$dst), "jmp\t$dst", [], IIC_JMP_REL>; def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst), "jmp{l}\t{*}$dst", [], IIC_JMP_MEM>; } // Conditional tail calls are similar to the above, but they are branches // rather than barriers, and they use EFLAGS. let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [ESP, EFLAGS] in { + let Uses = [ESP, EFLAGS, SSP] in { def TCRETURNdicc : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; // This gets substituted to a conditional jump instruction in MC lowering. def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs), (ins i32imm_pcrel:$dst, i32imm:$cond), "", [], IIC_JMP_REL>; } //===----------------------------------------------------------------------===// // Call Instructions... // // RSP is marked as a use to prevent stack-pointer assignments that appear // immediately before calls from potentially appearing dead. Uses for argument // registers are added manually. -let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in { +let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { // NOTE: this pattern doesn't match "X86call imm", because we do not know // that the offset between an arbitrary immediate and the call will fit in // the 32-bit pcrel field that we have. def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), "call{q}\t$dst", [], IIC_CALL_RI>, OpSize32, Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)], IIC_CALL_RI>, Requires<[In64BitMode]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))], IIC_CALL_MEM>, Requires<[In64BitMode,FavorMemIndirectCall]>; def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>; } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1, + isCodeGenOnly = 1, Uses = [RSP, SSP], usesCustomInserter = 1, SchedRW = [WriteJump] in { def TCRETURNdi64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset), []>; def TCRETURNri64 : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; let mayLoad = 1 in def TCRETURNmi64 : PseudoI<(outs), (ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable; def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), "jmp\t$dst", [], IIC_JMP_REL>; def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; // Win64 wants indirect jumps leaving the function to have a REX_W prefix. let hasREX_WPrefix = 1 in { def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; let mayLoad = 1 in def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>; } } // Conditional tail calls are similar to the above, but they are branches // rather than barriers, and they use EFLAGS. let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [RSP, EFLAGS] in { + let Uses = [RSP, EFLAGS, SSP] in { def TCRETURNdi64cc : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; // This gets substituted to a conditional jump instruction in MC lowering. def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, i32imm:$cond), "", [], IIC_JMP_REL>; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index a790d1a4141d..0a6f93bbc23c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -1,3313 +1,3315 @@ //===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the X86 instruction set, defining the instructions, and // properties of the instructions which are needed for code generation, machine // code emission, and analysis. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // X86 specific DAG Nodes. // def SDTIntShiftDOp: SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>]>; def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>; def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; //def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; def SDTX86Cmov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>, SDTCisVT<4, i32>]>; // Unary and binary operator instructions that set EFLAGS as a side-effect. def SDTUnaryArithWithFlags : SDTypeProfile<2, 1, [SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisVT<1, i32>]>; def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>]>; // SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; // RES1, RES2, FLAGS = op LHS, RHS def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>]>; def SDTX86BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; def SDTX86SetCC : SDTypeProfile<1, 2, [SDTCisVT<0, i8>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; def SDTX86SetCC_C : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>; def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>; def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDTX86caspairSaveEbx8 : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; def SDTX86caspairSaveRbx16 : SDTypeProfile<1, 3, [SDTCisVT<0, i64>, SDTCisPtrTy<1>, SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDTLockUnaryArithWithFlags : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>; def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, SDTCisVT<1, iPTR>, SDTCisVT<2, iPTR>]>; def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, i8>, SDTCisVT<4, i32>]>; def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; def SDTX86Void : SDTypeProfile<0, 0, []>; def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, [SDNPHasChain,SDNPSideEffect]>; def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, [SDNPHasChain]>; def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>; def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>; def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>; def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>; def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>; def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>; def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, [SDNPHasChain]>; def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>; def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand, [SDNPHasChain, SDNPSideEffect]>; def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand, [SDNPHasChain, SDNPSideEffect]>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86caspair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86cas8save_ebx : SDNode<"X86ISD::LCMPXCHG8_SAVE_EBX_DAG", SDTX86caspairSaveEbx8, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86cas16save_rbx : SDNode<"X86ISD::LCMPXCHG16_SAVE_RBX_DAG", SDTX86caspairSaveRbx16, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue]>; def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, [SDNPHasChain, SDNPVariadic]>; def X86vaarg64 : SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def X86callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>; def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisInt<1>]>>; def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPHasChain, SDNPSideEffect]>; def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; def X86eh_sjlj_setup_dispatch : SDNode<"X86ISD::EH_SJLJ_SETUP_DISPATCH", SDTypeProfile<0, 0, []>, [SDNPHasChain, SDNPSideEffect]>; def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags, [SDNPCommutative]>; def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>; def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>; def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86lock_add : SDNode<"X86ISD::LADD", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86lock_sub : SDNode<"X86ISD::LSUB", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86lock_or : SDNode<"X86ISD::LOR", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86lock_xor : SDNode<"X86ISD::LXOR", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86lock_inc : SDNode<"X86ISD::LINC", SDTLockUnaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA, [SDNPHasChain, SDNPOutGlue]>; def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, [SDNPHasChain]>; def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def X86lwpins : SDNode<"X86ISD::LWPINS", SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. // // A version of ptr_rc which excludes SP, ESP, and RSP. This is used for // the index operand of an address, to conform to x86 encoding restrictions. def ptr_rc_nosp : PointerLikeRegClass<1>; // *mem - Operand definitions for the funky X86 addressing mode operands. // def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; } let RenderMethod = "addMemOperands", SuperClasses = [X86MemAsmOperand] in { def X86Mem8AsmOperand : AsmOperandClass { let Name = "Mem8"; } def X86Mem16AsmOperand : AsmOperandClass { let Name = "Mem16"; } def X86Mem32AsmOperand : AsmOperandClass { let Name = "Mem32"; } def X86Mem64AsmOperand : AsmOperandClass { let Name = "Mem64"; } def X86Mem80AsmOperand : AsmOperandClass { let Name = "Mem80"; } def X86Mem128AsmOperand : AsmOperandClass { let Name = "Mem128"; } def X86Mem256AsmOperand : AsmOperandClass { let Name = "Mem256"; } def X86Mem512AsmOperand : AsmOperandClass { let Name = "Mem512"; } // Gather mem operands def X86Mem64_RC128Operand : AsmOperandClass { let Name = "Mem64_RC128"; } def X86Mem128_RC128Operand : AsmOperandClass { let Name = "Mem128_RC128"; } def X86Mem256_RC128Operand : AsmOperandClass { let Name = "Mem256_RC128"; } def X86Mem128_RC256Operand : AsmOperandClass { let Name = "Mem128_RC256"; } def X86Mem256_RC256Operand : AsmOperandClass { let Name = "Mem256_RC256"; } def X86Mem64_RC128XOperand : AsmOperandClass { let Name = "Mem64_RC128X"; } def X86Mem128_RC128XOperand : AsmOperandClass { let Name = "Mem128_RC128X"; } def X86Mem256_RC128XOperand : AsmOperandClass { let Name = "Mem256_RC128X"; } def X86Mem128_RC256XOperand : AsmOperandClass { let Name = "Mem128_RC256X"; } def X86Mem256_RC256XOperand : AsmOperandClass { let Name = "Mem256_RC256X"; } def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; } def X86Mem256_RC512Operand : AsmOperandClass { let Name = "Mem256_RC512"; } def X86Mem512_RC512Operand : AsmOperandClass { let Name = "Mem512_RC512"; } } def X86AbsMemAsmOperand : AsmOperandClass { let Name = "AbsMem"; let SuperClasses = [X86MemAsmOperand]; } class X86MemOperand : Operand { let PrintMethod = printMethod; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG); let ParserMatchClass = parserMatchClass; let OperandType = "OPERAND_MEMORY"; } // Gather mem operands class X86VMemOperand : X86MemOperand { let MIOperandInfo = (ops ptr_rc, i8imm, RC, i32imm, SEGMENT_REG); } def anymem : X86MemOperand<"printanymem">; def opaque32mem : X86MemOperand<"printopaquemem">; def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; def opaque512mem : X86MemOperand<"printopaquemem">; def i8mem : X86MemOperand<"printi8mem", X86Mem8AsmOperand>; def i16mem : X86MemOperand<"printi16mem", X86Mem16AsmOperand>; def i32mem : X86MemOperand<"printi32mem", X86Mem32AsmOperand>; def i64mem : X86MemOperand<"printi64mem", X86Mem64AsmOperand>; def i128mem : X86MemOperand<"printi128mem", X86Mem128AsmOperand>; def i256mem : X86MemOperand<"printi256mem", X86Mem256AsmOperand>; def i512mem : X86MemOperand<"printi512mem", X86Mem512AsmOperand>; def f32mem : X86MemOperand<"printf32mem", X86Mem32AsmOperand>; def f64mem : X86MemOperand<"printf64mem", X86Mem64AsmOperand>; def f80mem : X86MemOperand<"printf80mem", X86Mem80AsmOperand>; def f128mem : X86MemOperand<"printf128mem", X86Mem128AsmOperand>; def f256mem : X86MemOperand<"printf256mem", X86Mem256AsmOperand>; def f512mem : X86MemOperand<"printf512mem", X86Mem512AsmOperand>; def v512mem : X86VMemOperand; // Gather mem operands def vx64mem : X86VMemOperand; def vx128mem : X86VMemOperand; def vx256mem : X86VMemOperand; def vy128mem : X86VMemOperand; def vy256mem : X86VMemOperand; def vx64xmem : X86VMemOperand; def vx128xmem : X86VMemOperand; def vx256xmem : X86VMemOperand; def vy128xmem : X86VMemOperand; def vy256xmem : X86VMemOperand; def vy512mem : X86VMemOperand; def vz256xmem : X86VMemOperand; def vz512mem : X86VMemOperand; // A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead // of a plain GPR, so that it doesn't potentially require a REX prefix. def ptr_rc_norex : PointerLikeRegClass<2>; def ptr_rc_norex_nosp : PointerLikeRegClass<3>; def i8mem_NOREX : Operand { let PrintMethod = "printi8mem"; let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm, SEGMENT_REG); let ParserMatchClass = X86Mem8AsmOperand; let OperandType = "OPERAND_MEMORY"; } // GPRs available for tailcall. // It represents GR32_TC, GR64_TC or GR64_TCW64. def ptr_rc_tailcall : PointerLikeRegClass<4>; // Special i32mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. def i32mem_TC : Operand { let PrintMethod = "printi32mem"; let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, i32imm, SEGMENT_REG); let ParserMatchClass = X86Mem32AsmOperand; let OperandType = "OPERAND_MEMORY"; } // Special i64mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. def i64mem_TC : Operand { let PrintMethod = "printi64mem"; let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, i32imm, SEGMENT_REG); let ParserMatchClass = X86Mem64AsmOperand; let OperandType = "OPERAND_MEMORY"; } let OperandType = "OPERAND_PCREL", ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "printPCRelImm" in { def i32imm_pcrel : Operand; def i16imm_pcrel : Operand; // Branch targets have OtherVT type and print as pc-relative values. def brtarget : Operand; def brtarget8 : Operand; } // Special parser to detect 16-bit mode to select 16-bit displacement. def X86AbsMem16AsmOperand : AsmOperandClass { let Name = "AbsMem16"; let RenderMethod = "addAbsMemOperands"; let SuperClasses = [X86AbsMemAsmOperand]; } // Branch targets have OtherVT type and print as pc-relative values. let OperandType = "OPERAND_PCREL", PrintMethod = "printPCRelImm" in { let ParserMatchClass = X86AbsMem16AsmOperand in def brtarget16 : Operand; let ParserMatchClass = X86AbsMemAsmOperand in def brtarget32 : Operand; } let RenderMethod = "addSrcIdxOperands" in { def X86SrcIdx8Operand : AsmOperandClass { let Name = "SrcIdx8"; let SuperClasses = [X86Mem8AsmOperand]; } def X86SrcIdx16Operand : AsmOperandClass { let Name = "SrcIdx16"; let SuperClasses = [X86Mem16AsmOperand]; } def X86SrcIdx32Operand : AsmOperandClass { let Name = "SrcIdx32"; let SuperClasses = [X86Mem32AsmOperand]; } def X86SrcIdx64Operand : AsmOperandClass { let Name = "SrcIdx64"; let SuperClasses = [X86Mem64AsmOperand]; } } // RenderMethod = "addSrcIdxOperands" let RenderMethod = "addDstIdxOperands" in { def X86DstIdx8Operand : AsmOperandClass { let Name = "DstIdx8"; let SuperClasses = [X86Mem8AsmOperand]; } def X86DstIdx16Operand : AsmOperandClass { let Name = "DstIdx16"; let SuperClasses = [X86Mem16AsmOperand]; } def X86DstIdx32Operand : AsmOperandClass { let Name = "DstIdx32"; let SuperClasses = [X86Mem32AsmOperand]; } def X86DstIdx64Operand : AsmOperandClass { let Name = "DstIdx64"; let SuperClasses = [X86Mem64AsmOperand]; } } // RenderMethod = "addDstIdxOperands" let RenderMethod = "addMemOffsOperands" in { def X86MemOffs16_8AsmOperand : AsmOperandClass { let Name = "MemOffs16_8"; let SuperClasses = [X86Mem8AsmOperand]; } def X86MemOffs16_16AsmOperand : AsmOperandClass { let Name = "MemOffs16_16"; let SuperClasses = [X86Mem16AsmOperand]; } def X86MemOffs16_32AsmOperand : AsmOperandClass { let Name = "MemOffs16_32"; let SuperClasses = [X86Mem32AsmOperand]; } def X86MemOffs32_8AsmOperand : AsmOperandClass { let Name = "MemOffs32_8"; let SuperClasses = [X86Mem8AsmOperand]; } def X86MemOffs32_16AsmOperand : AsmOperandClass { let Name = "MemOffs32_16"; let SuperClasses = [X86Mem16AsmOperand]; } def X86MemOffs32_32AsmOperand : AsmOperandClass { let Name = "MemOffs32_32"; let SuperClasses = [X86Mem32AsmOperand]; } def X86MemOffs32_64AsmOperand : AsmOperandClass { let Name = "MemOffs32_64"; let SuperClasses = [X86Mem64AsmOperand]; } def X86MemOffs64_8AsmOperand : AsmOperandClass { let Name = "MemOffs64_8"; let SuperClasses = [X86Mem8AsmOperand]; } def X86MemOffs64_16AsmOperand : AsmOperandClass { let Name = "MemOffs64_16"; let SuperClasses = [X86Mem16AsmOperand]; } def X86MemOffs64_32AsmOperand : AsmOperandClass { let Name = "MemOffs64_32"; let SuperClasses = [X86Mem32AsmOperand]; } def X86MemOffs64_64AsmOperand : AsmOperandClass { let Name = "MemOffs64_64"; let SuperClasses = [X86Mem64AsmOperand]; } } // RenderMethod = "addMemOffsOperands" class X86SrcIdxOperand : X86MemOperand { let MIOperandInfo = (ops ptr_rc, SEGMENT_REG); } class X86DstIdxOperand : X86MemOperand { let MIOperandInfo = (ops ptr_rc); } def srcidx8 : X86SrcIdxOperand<"printSrcIdx8", X86SrcIdx8Operand>; def srcidx16 : X86SrcIdxOperand<"printSrcIdx16", X86SrcIdx16Operand>; def srcidx32 : X86SrcIdxOperand<"printSrcIdx32", X86SrcIdx32Operand>; def srcidx64 : X86SrcIdxOperand<"printSrcIdx64", X86SrcIdx64Operand>; def dstidx8 : X86DstIdxOperand<"printDstIdx8", X86DstIdx8Operand>; def dstidx16 : X86DstIdxOperand<"printDstIdx16", X86DstIdx16Operand>; def dstidx32 : X86DstIdxOperand<"printDstIdx32", X86DstIdx32Operand>; def dstidx64 : X86DstIdxOperand<"printDstIdx64", X86DstIdx64Operand>; class X86MemOffsOperand : X86MemOperand { let MIOperandInfo = (ops immOperand, SEGMENT_REG); } def offset16_8 : X86MemOffsOperand; def offset16_16 : X86MemOffsOperand; def offset16_32 : X86MemOffsOperand; def offset32_8 : X86MemOffsOperand; def offset32_16 : X86MemOffsOperand; def offset32_32 : X86MemOffsOperand; def offset32_64 : X86MemOffsOperand; def offset64_8 : X86MemOffsOperand; def offset64_16 : X86MemOffsOperand; def offset64_32 : X86MemOffsOperand; def offset64_64 : X86MemOffsOperand; def SSECC : Operand { let PrintMethod = "printSSEAVXCC"; let OperandType = "OPERAND_IMMEDIATE"; } def AVXCC : Operand { let PrintMethod = "printSSEAVXCC"; let OperandType = "OPERAND_IMMEDIATE"; } def AVX512ICC : Operand { let PrintMethod = "printSSEAVXCC"; let OperandType = "OPERAND_IMMEDIATE"; } def XOPCC : Operand { let PrintMethod = "printXOPCC"; let OperandType = "OPERAND_IMMEDIATE"; } class ImmSExtAsmOperandClass : AsmOperandClass { let SuperClasses = [ImmAsmOperand]; let RenderMethod = "addImmOperands"; } def X86GR32orGR64AsmOperand : AsmOperandClass { let Name = "GR32orGR64"; } def GR32orGR64 : RegisterOperand { let ParserMatchClass = X86GR32orGR64AsmOperand; } def AVX512RCOperand : AsmOperandClass { let Name = "AVX512RC"; } def AVX512RC : Operand { let PrintMethod = "printRoundingControl"; let OperandType = "OPERAND_IMMEDIATE"; let ParserMatchClass = AVX512RCOperand; } // Sign-extended immediate classes. We don't need to define the full lattice // here because there is no instruction with an ambiguity between ImmSExti64i32 // and ImmSExti32i8. // // The strange ranges come from the fact that the assembler always works with // 64-bit immediates, but for a 16-bit target value we want to accept both "-1" // (which will be a -1ULL), and "0xFF" (-1 in 16-bits). // [0, 0x7FFFFFFF] | // [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti64i32"; } // [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti16i8"; let SuperClasses = [ImmSExti64i32AsmOperand]; } // [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti32i8"; } // [0, 0x0000007F] | // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti64i8"; let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand]; } // Unsigned immediate used by SSE/AVX instructions // [0, 0xFF] // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmUnsignedi8AsmOperand : AsmOperandClass { let Name = "ImmUnsignedi8"; let RenderMethod = "addImmOperands"; } // A couple of more descriptive operand definitions. // 16-bits but only 8 bits are significant. def i16i8imm : Operand { let ParserMatchClass = ImmSExti16i8AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } // 32-bits but only 8 bits are significant. def i32i8imm : Operand { let ParserMatchClass = ImmSExti32i8AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 32 bits are significant. def i64i32imm : Operand { let ParserMatchClass = ImmSExti64i32AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 8 bits are significant. def i64i8imm : Operand { let ParserMatchClass = ImmSExti64i8AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } // Unsigned 8-bit immediate used by SSE/AVX instructions. def u8imm : Operand { let PrintMethod = "printU8Imm"; let ParserMatchClass = ImmUnsignedi8AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } // 32-bit immediate but only 8-bits are significant and they are unsigned. // Used by some SSE/AVX instructions that use intrinsics. def i32u8imm : Operand { let PrintMethod = "printU8Imm"; let ParserMatchClass = ImmUnsignedi8AsmOperand; let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 32 bits are significant, and those bits are treated as being // pc relative. def i64i32imm_pcrel : Operand { let PrintMethod = "printPCRelImm"; let ParserMatchClass = X86AbsMemAsmOperand; let OperandType = "OPERAND_PCREL"; } def lea64_32mem : Operand { let PrintMethod = "printanymem"; let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG); let ParserMatchClass = X86MemAsmOperand; } // Memory operands that use 64-bit pointers in both ILP32 and LP64. def lea64mem : Operand { let PrintMethod = "printanymem"; let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, SEGMENT_REG); let ParserMatchClass = X86MemAsmOperand; } //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. // // Define X86-specific addressing mode. def addr : ComplexPattern; def lea32addr : ComplexPattern; // In 64-bit mode 32-bit LEAs can use RIP-relative addressing. def lea64_32addr : ComplexPattern; def tls32addr : ComplexPattern; def tls32baseaddr : ComplexPattern; def lea64addr : ComplexPattern; def tls64addr : ComplexPattern; def tls64baseaddr : ComplexPattern; def vectoraddr : ComplexPattern; // A relocatable immediate is either an immediate operand or an operand that can // be relocated by the linker to an immediate, such as a regular symbol in // non-PIC code. def relocImm : ComplexPattern; //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. def TruePredicate : Predicate<"true">; def HasCMov : Predicate<"Subtarget->hasCMov()">; def NoCMov : Predicate<"!Subtarget->hasCMov()">; def HasMMX : Predicate<"Subtarget->hasMMX()">; def Has3DNow : Predicate<"Subtarget->has3DNow()">; def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; def NoSSE41 : Predicate<"!Subtarget->hasSSE41()">; def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def NoAVX : Predicate<"!Subtarget->hasAVX()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; def HasAVX512 : Predicate<"Subtarget->hasAVX512()">, AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">, AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">; def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">, AssemblerPredicate<"FeatureVPOPCNTDQ", "AVX-512 VPOPCNTDQ ISA">; def HasPFI : Predicate<"Subtarget->hasPFI()">, AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">; def HasERI : Predicate<"Subtarget->hasERI()">, AssemblerPredicate<"FeatureERI", "AVX-512 ER ISA">; def HasDQI : Predicate<"Subtarget->hasDQI()">, AssemblerPredicate<"FeatureDQI", "AVX-512 DQ ISA">; def NoDQI : Predicate<"!Subtarget->hasDQI()">; def HasBWI : Predicate<"Subtarget->hasBWI()">, AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">; def NoBWI : Predicate<"!Subtarget->hasBWI()">; def HasVLX : Predicate<"Subtarget->hasVLX()">, AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">; def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; def HasVNNI : Predicate<"Subtarget->hasVNNI()">; def HasBITALG : Predicate<"Subtarget->hasBITALG()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; def HasVAES : Predicate<"Subtarget->hasVAES()">; def NoVLX_Or_NoVAES : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVAES()">; def HasFXSR : Predicate<"Subtarget->hasFXSR()">; def HasXSAVE : Predicate<"Subtarget->hasXSAVE()">; def HasXSAVEOPT : Predicate<"Subtarget->hasXSAVEOPT()">; def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">; def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">; def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">; def NoVLX_Or_NoVPCLMULQDQ : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVPCLMULQDQ()">; def HasVPCLMULQDQ : Predicate<"Subtarget->hasVPCLMULQDQ()">; def HasGFNI : Predicate<"Subtarget->hasGFNI()">; def HasFMA : Predicate<"Subtarget->hasFMA()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def NoFMA4 : Predicate<"!Subtarget->hasFMA4()">; def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasTBM : Predicate<"Subtarget->hasTBM()">; def NoTBM : Predicate<"!Subtarget->hasTBM()">; def HasLWP : Predicate<"Subtarget->hasLWP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">; def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">; def HasVBMI : Predicate<"Subtarget->hasVBMI()">, AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">; def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">; def HasIFMA : Predicate<"Subtarget->hasIFMA()">, AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">; def HasRTM : Predicate<"Subtarget->hasRTM()">; def HasADX : Predicate<"Subtarget->hasADX()">; def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; +def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">; +def HasIBT : Predicate<"Subtarget->hasIBT()">; def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">; def HasCLWB : Predicate<"Subtarget->hasCLWB()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate<"Mode64Bit", "64-bit mode">; def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">; def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">; def In16BitMode : Predicate<"Subtarget->is16Bit()">, AssemblerPredicate<"Mode16Bit", "16-bit mode">; def Not16BitMode : Predicate<"!Subtarget->is16Bit()">, AssemblerPredicate<"!Mode16Bit", "Not 16-bit mode">; def In32BitMode : Predicate<"Subtarget->is32Bit()">, AssemblerPredicate<"Mode32Bit", "32-bit mode">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||" "Subtarget->getFrameLowering()->hasFP(*MF)"> { let RecomputePerFunction = 1; } def IsPS4 : Predicate<"Subtarget->isTargetPS4()">; def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" "TM.getCodeModel() == CodeModel::Kernel">; def IsNotPIC : Predicate<"!TM.isPositionIndependent()">; // We could compute these on a per-module basis but doing so requires accessing // the Function object through the Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { def OptForSize : Predicate<"MF->getFunction()->optForSize()">; def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">; def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">; def UseIncDec : Predicate<"!Subtarget->slowIncDec() || " "MF->getFunction()->optForSize()">; } def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. // include "X86InstrFormats.td" //===----------------------------------------------------------------------===// // Pattern fragments. // // X86 specific condition code. These correspond to CondCode in // X86InstrInfo.h. They must be kept in synch. def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ def X86_COND_NO : PatLeaf<(i8 10)>; def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO def X86_COND_NS : PatLeaf<(i8 12)>; def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; def i16immSExt8 : ImmLeaf(Imm); }]>; def i32immSExt8 : ImmLeaf(Imm); }]>; def i64immSExt8 : ImmLeaf(Imm); }]>; def i64immSExt32 : ImmLeaf(Imm); }]>; // FIXME: Ideally we would just replace the above i*immSExt* matchers with // relocImm-based matchers, but then FastISel would be unable to use them. def i64relocImmSExt8 : PatLeaf<(i64 relocImm), [{ return isSExtRelocImm<8>(N); }]>; def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{ return isSExtRelocImm<32>(N); }]>; // If we have multiple users of an immediate, it's much smaller to reuse // the register, rather than encode the immediate in every instruction. // This has the risk of increasing register pressure from stretched live // ranges, however, the immediates should be trivial to rematerialize by // the RA in the event of high register pressure. // TODO : This is currently enabled for stores and binary ops. There are more // cases for which this can be enabled, though this catches the bulk of the // issues. // TODO2 : This should really also be enabled under O2, but there's currently // an issue with RA where we don't pull the constants into their users // when we rematerialize them. I'll follow-up on enabling O2 after we fix that // issue. // TODO3 : This is currently limited to single basic blocks (DAG creation // pulls block immediates to the top and merges them if necessary). // Eventually, it would be nice to allow ConstantHoisting to merge constants // globally for potentially added savings. // def imm8_su : PatLeaf<(i8 relocImm), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; def imm16_su : PatLeaf<(i16 relocImm), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; def imm32_su : PatLeaf<(i32 relocImm), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; def i64immSExt32_su : PatLeaf<(i64immSExt32), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; def i16immSExt8_su : PatLeaf<(i16immSExt8), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; def i32immSExt8_su : PatLeaf<(i32immSExt8), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; def i64immSExt8_su : PatLeaf<(i64immSExt8), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{ return !shouldAvoidImmediateInstFormsForSize(N); }]>; // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit // unsigned field. def i64immZExt32 : ImmLeaf(Imm); }]>; def i64immZExt32SExt8 : ImmLeaf(Imm) && isInt<8>(static_cast(Imm)); }]>; // Helper fragments for loads. // It's always safe to treat a anyext i16 load as a i32 load if the i16 is // known to be 32-bit aligned or better. Ditto for i8 to i16. def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; if (ExtType == ISD::EXTLOAD) return LD->getAlignment() >= 2 && !LD->isVolatile(); return false; }]>; def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::EXTLOAD) return LD->getAlignment() >= 2 && !LD->isVolatile(); return false; }]>; def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ LoadSDNode *LD = cast(N); ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; if (ExtType == ISD::EXTLOAD) return LD->getAlignment() >= 4 && !LD->isVolatile(); return false; }]>; def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>; def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>; def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>; def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>; def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>; def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>; def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>; def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>; def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>; def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>; def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>; def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>; def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>; def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>; def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>; def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>; def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>; def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>; def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>; def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; // An 'and' node with a single use. def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ return N->hasOneUse(); }]>; // An 'srl' node with a single use. def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{ return N->hasOneUse(); }]>; // An 'trunc' node with a single use. def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); }]>; //===----------------------------------------------------------------------===// // Instruction list. // // Nop let hasSideEffects = 0, SchedRW = [WriteZero] in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>; def NOOPW : I<0x1f, MRMXm, (outs), (ins i16mem:$zero), "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize16; def NOOPL : I<0x1f, MRMXm, (outs), (ins i32mem:$zero), "nop{l}\t$zero", [], IIC_NOP>, TB, OpSize32; def NOOPQ : RI<0x1f, MRMXm, (outs), (ins i64mem:$zero), "nop{q}\t$zero", [], IIC_NOP>, TB; // Also allow register so we can assemble/disassemble def NOOPWr : I<0x1f, MRMXr, (outs), (ins GR16:$zero), "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize16; def NOOPLr : I<0x1f, MRMXr, (outs), (ins GR32:$zero), "nop{l}\t$zero", [], IIC_NOP>, TB, OpSize32; def NOOPQr : RI<0x1f, MRMXr, (outs), (ins GR64:$zero), "nop{q}\t$zero", [], IIC_NOP>, TB; } // Constructing a stack frame. def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl), "enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>; let SchedRW = [WriteALU] in { let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, hasSideEffects=0 in def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", [], IIC_LEAVE>, Requires<[Not64BitMode]>; let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, hasSideEffects = 0 in def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", [], IIC_LEAVE>, Requires<[In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// // Miscellaneous Instructions. // let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in def Int_eh_sjlj_setup_dispatch : PseudoI<(outs), (ins), [(X86eh_sjlj_setup_dispatch)]>; let Defs = [ESP], Uses = [ESP], hasSideEffects=0 in { let mayLoad = 1, SchedRW = [WriteLoad] in { def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG16>, OpSize16; def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>; def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG>, OpSize16; def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>; } // mayLoad, SchedRW let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in { def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", [], IIC_POP_MEM>, OpSize16; def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", [], IIC_POP_MEM>, OpSize32, Requires<[Not64BitMode]>; } // mayStore, mayLoad, WriteRMW let mayStore = 1, SchedRW = [WriteStore] in { def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize16; def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>; def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize16; def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[], IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>; def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm), "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16; def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16; def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[Not64BitMode]>; def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[Not64BitMode]>; } // mayStore, SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[], IIC_PUSH_MEM>, OpSize16; def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>; } // mayLoad, mayStore, SchedRW } let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, SchedRW = [WriteRMW], Defs = [ESP] in { let Uses = [ESP] in def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins), [(set GR32:$dst, (int_x86_flags_read_u32))]>, Requires<[Not64BitMode]>; let Uses = [RSP] in def RDFLAGS64 : PseudoI<(outs GR64:$dst), (ins), [(set GR64:$dst, (int_x86_flags_read_u64))]>, Requires<[In64BitMode]>; } let mayLoad = 1, mayStore = 1, usesCustomInserter = 1, SchedRW = [WriteRMW] in { let Defs = [ESP, EFLAGS], Uses = [ESP] in def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src), [(int_x86_flags_write_u32 GR32:$src)]>, Requires<[Not64BitMode]>; let Defs = [RSP, EFLAGS], Uses = [RSP] in def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src), [(int_x86_flags_write_u64 GR64:$src)]>, Requires<[In64BitMode]>; } let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0, SchedRW = [WriteLoad] in { def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize16; def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, OpSize32, Requires<[Not64BitMode]>; } let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, hasSideEffects=0, SchedRW = [WriteStore] in { def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, OpSize16; def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>, OpSize32, Requires<[Not64BitMode]>; } let Defs = [RSP], Uses = [RSP], hasSideEffects=0 in { let mayLoad = 1, SchedRW = [WriteLoad] in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>; def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>; } // mayLoad, SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", [], IIC_POP_MEM>, OpSize32, Requires<[In64BitMode]>; let mayStore = 1, SchedRW = [WriteStore] in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; } // mayStore, SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>; } // mayLoad, mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[In64BitMode]>; def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[In64BitMode]>; } let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>, OpSize32, Requires<[In64BitMode]>, Sched<[WriteLoad]>; let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, hasSideEffects=0 in def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, OpSize32, Requires<[In64BitMode]>, Sched<[WriteStore]>; let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteLoad] in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popal", [], IIC_POP_A>, OpSize32, Requires<[Not64BitMode]>; def POPA16 : I<0x61, RawFrm, (outs), (ins), "popaw", [], IIC_POP_A>, OpSize16, Requires<[Not64BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in { def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pushal", [], IIC_PUSH_A>, OpSize32, Requires<[Not64BitMode]>; def PUSHA16 : I<0x60, RawFrm, (outs), (ins), "pushaw", [], IIC_PUSH_A>, OpSize16, Requires<[Not64BitMode]>; } let Constraints = "$src = $dst", SchedRW = [WriteALU] in { // GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", [(set GR32:$dst, (bswap GR32:$src))], IIC_BSWAP>, OpSize32, TB; def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB; } // Constraints = "$src = $dst", SchedRW // Bit scan instructions. let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))], IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteShift]>; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))], IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteShiftLd]>; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteShift]>; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))], IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteShiftLd]>; def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))], IIC_BIT_SCAN_REG>, PS, Sched<[WriteShift]>; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))], IIC_BIT_SCAN_MEM>, PS, Sched<[WriteShiftLd]>; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteShift]>; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))], IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteShiftLd]>; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteShift]>; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))], IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteShiftLd]>; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BIT_SCAN_REG>, PS, Sched<[WriteShift]>; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))], IIC_BIT_SCAN_MEM>, PS, Sched<[WriteShiftLd]>; } // Defs = [EFLAGS] let SchedRW = [WriteMicrocoded] in { // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { def MOVSB : I<0xA4, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src), "movsb\t{$src, $dst|$dst, $src}", [], IIC_MOVS>; def MOVSW : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), "movsw\t{$src, $dst|$dst, $src}", [], IIC_MOVS>, OpSize16; def MOVSL : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src), "movs{l|d}\t{$src, $dst|$dst, $src}", [], IIC_MOVS>, OpSize32; def MOVSQ : RI<0xA5, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src), "movsq\t{$src, $dst|$dst, $src}", [], IIC_MOVS>; } // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in def STOSB : I<0xAA, RawFrmDst, (outs), (ins dstidx8:$dst), "stosb\t{%al, $dst|$dst, al}", [], IIC_STOS>; let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in def STOSW : I<0xAB, RawFrmDst, (outs), (ins dstidx16:$dst), "stosw\t{%ax, $dst|$dst, ax}", [], IIC_STOS>, OpSize16; let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in def STOSL : I<0xAB, RawFrmDst, (outs), (ins dstidx32:$dst), "stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32; let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in def STOSQ : RI<0xAB, RawFrmDst, (outs), (ins dstidx64:$dst), "stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>; // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,EFLAGS], Uses = [AL,EDI,EFLAGS] in def SCASB : I<0xAE, RawFrmDst, (outs), (ins dstidx8:$dst), "scasb\t{$dst, %al|al, $dst}", [], IIC_SCAS>; let Defs = [EDI,EFLAGS], Uses = [AX,EDI,EFLAGS] in def SCASW : I<0xAF, RawFrmDst, (outs), (ins dstidx16:$dst), "scasw\t{$dst, %ax|ax, $dst}", [], IIC_SCAS>, OpSize16; let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,EFLAGS] in def SCASL : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst), "scas{l|d}\t{$dst, %eax|eax, $dst}", [], IIC_SCAS>, OpSize32; let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,EFLAGS] in def SCASQ : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst), "scasq\t{$dst, %rax|rax, $dst}", [], IIC_SCAS>; // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,EFLAGS] in { def CMPSB : I<0xA6, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src), "cmpsb\t{$dst, $src|$src, $dst}", [], IIC_CMPS>; def CMPSW : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), "cmpsw\t{$dst, $src|$src, $dst}", [], IIC_CMPS>, OpSize16; def CMPSL : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src), "cmps{l|d}\t{$dst, $src|$src, $dst}", [], IIC_CMPS>, OpSize32; def CMPSQ : RI<0xA7, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src), "cmpsq\t{$dst, $src|$src, $dst}", [], IIC_CMPS>; } } // SchedRW //===----------------------------------------------------------------------===// // Move Instructions. // let SchedRW = [WriteMove] in { let hasSideEffects = 0 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, imm:$src)], IIC_MOV>; def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src), "mov{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, imm:$src)], IIC_MOV>, OpSize16; def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, relocImm:$src)], IIC_MOV>, OpSize32; def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>; } let isReMaterializable = 1 in { def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), "movabs{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, relocImm:$src)], IIC_MOV>; } // Longer forms that use a ModR/M byte. Needed for disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOV8ri_alt : Ii8 <0xC6, MRM0r, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, FoldGenData<"MOV8ri">; def MOV16ri_alt : Ii16<0xC7, MRM0r, (outs GR16:$dst), (ins i16imm:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, FoldGenData<"MOV16ri">; def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, FoldGenData<"MOV32ri">; } } // SchedRW let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store (i8 imm8_su:$src), addr:$dst)], IIC_MOV_MEM>; def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src), "mov{w}\t{$src, $dst|$dst, $src}", [(store (i16 imm16_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16; def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32; def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32_su:$src, addr:$dst)], IIC_MOV_MEM>; } // SchedRW let hasSideEffects = 0 in { /// Memory offset versions of moves. The immediate is an address mode sized /// offset from the segment base. let SchedRW = [WriteALU] in { let mayLoad = 1 in { let Defs = [AL] in def MOV8ao32 : Ii32<0xA0, RawFrmMemOffs, (outs), (ins offset32_8:$src), "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>, AdSize32; let Defs = [AX] in def MOV16ao32 : Ii32<0xA1, RawFrmMemOffs, (outs), (ins offset32_16:$src), "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>, OpSize16, AdSize32; let Defs = [EAX] in def MOV32ao32 : Ii32<0xA1, RawFrmMemOffs, (outs), (ins offset32_32:$src), "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>, OpSize32, AdSize32; let Defs = [RAX] in def MOV64ao32 : RIi32<0xA1, RawFrmMemOffs, (outs), (ins offset32_64:$src), "mov{q}\t{$src, %rax|rax, $src}", [], IIC_MOV_MEM>, AdSize32; let Defs = [AL] in def MOV8ao16 : Ii16<0xA0, RawFrmMemOffs, (outs), (ins offset16_8:$src), "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>, AdSize16; let Defs = [AX] in def MOV16ao16 : Ii16<0xA1, RawFrmMemOffs, (outs), (ins offset16_16:$src), "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>, OpSize16, AdSize16; let Defs = [EAX] in def MOV32ao16 : Ii16<0xA1, RawFrmMemOffs, (outs), (ins offset16_32:$src), "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>, AdSize16, OpSize32; } let mayStore = 1 in { let Uses = [AL] in def MOV8o32a : Ii32<0xA2, RawFrmMemOffs, (outs), (ins offset32_8:$dst), "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>, AdSize32; let Uses = [AX] in def MOV16o32a : Ii32<0xA3, RawFrmMemOffs, (outs), (ins offset32_16:$dst), "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>, OpSize16, AdSize32; let Uses = [EAX] in def MOV32o32a : Ii32<0xA3, RawFrmMemOffs, (outs), (ins offset32_32:$dst), "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>, OpSize32, AdSize32; let Uses = [RAX] in def MOV64o32a : RIi32<0xA3, RawFrmMemOffs, (outs), (ins offset32_64:$dst), "mov{q}\t{%rax, $dst|$dst, rax}", [], IIC_MOV_MEM>, AdSize32; let Uses = [AL] in def MOV8o16a : Ii16<0xA2, RawFrmMemOffs, (outs), (ins offset16_8:$dst), "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>, AdSize16; let Uses = [AX] in def MOV16o16a : Ii16<0xA3, RawFrmMemOffs, (outs), (ins offset16_16:$dst), "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>, OpSize16, AdSize16; let Uses = [EAX] in def MOV32o16a : Ii16<0xA3, RawFrmMemOffs, (outs), (ins offset16_32:$dst), "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>, OpSize32, AdSize16; } } // These forms all have full 64-bit absolute addresses in their instructions // and use the movabs mnemonic to indicate this specific form. let mayLoad = 1 in { let Defs = [AL] in def MOV8ao64 : RIi64_NOREX<0xA0, RawFrmMemOffs, (outs), (ins offset64_8:$src), "movabs{b}\t{$src, %al|al, $src}", []>, AdSize64; let Defs = [AX] in def MOV16ao64 : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset64_16:$src), "movabs{w}\t{$src, %ax|ax, $src}", []>, OpSize16, AdSize64; let Defs = [EAX] in def MOV32ao64 : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset64_32:$src), "movabs{l}\t{$src, %eax|eax, $src}", []>, OpSize32, AdSize64; let Defs = [RAX] in def MOV64ao64 : RIi64<0xA1, RawFrmMemOffs, (outs), (ins offset64_64:$src), "movabs{q}\t{$src, %rax|rax, $src}", []>, AdSize64; } let mayStore = 1 in { let Uses = [AL] in def MOV8o64a : RIi64_NOREX<0xA2, RawFrmMemOffs, (outs), (ins offset64_8:$dst), "movabs{b}\t{%al, $dst|$dst, al}", []>, AdSize64; let Uses = [AX] in def MOV16o64a : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs), (ins offset64_16:$dst), "movabs{w}\t{%ax, $dst|$dst, ax}", []>, OpSize16, AdSize64; let Uses = [EAX] in def MOV32o64a : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs), (ins offset64_32:$dst), "movabs{l}\t{%eax, $dst|$dst, eax}", []>, OpSize32, AdSize64; let Uses = [RAX] in def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst), "movabs{q}\t{%rax, $dst|$dst, rax}", []>, AdSize64; } } // hasSideEffects = 0 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, FoldGenData<"MOV8rr">; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, FoldGenData<"MOV16rr">; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, FoldGenData<"MOV32rr">; def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, FoldGenData<"MOV64rr">; } let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>; def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "mov{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (loadi16 addr:$src))], IIC_MOV_MEM>, OpSize16; def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (loadi32 addr:$src))], IIC_MOV_MEM>, OpSize32; def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>; } let SchedRW = [WriteStore] in { def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>; def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", [(store GR16:$src, addr:$dst)], IIC_MOV_MEM>, OpSize16; def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store GR32:$src, addr:$dst)], IIC_MOV_MEM>, OpSize32; def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>; } // SchedRW // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be // encoded when a REX prefix is present. let isCodeGenOnly = 1 in { let hasSideEffects = 0 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>, Sched<[WriteMove]>; let mayStore = 1, hasSideEffects = 0 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV_MEM>, Sched<[WriteStore]>; let mayLoad = 1, hasSideEffects = 0, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV_MEM>, Sched<[WriteLoad]>; } // Condition code ops, incl. set if equal/not equal/... let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", [(set EFLAGS, (X86sahf AH))], IIC_AHF>, Requires<[HasLAHFSAHF]>; let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], IIC_AHF>, // AH = flags Requires<[HasLAHFSAHF]>; } // SchedRW //===----------------------------------------------------------------------===// // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, OpSize16, TB, NotMemoryFoldable; def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))], IIC_BT_RR>, OpSize32, TB, NotMemoryFoldable; def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB, NotMemoryFoldable; } // SchedRW // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Make these instructions disassembly // only for now. These instructions are also slow on modern CPUs so that's // another reason to avoid generating them. let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BT_MR >, OpSize16, TB, NotMemoryFoldable; def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BT_MR >, OpSize32, TB, NotMemoryFoldable; def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BT_MR >, TB, NotMemoryFoldable; } let SchedRW = [WriteALU] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))], IIC_BT_RI>, OpSize16, TB; def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))], IIC_BT_RI>, OpSize32, TB; def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))], IIC_BT_RI>, TB; } // SchedRW // Note that these instructions aren't slow because that only applies when the // other operand is in a register. When it's an immediate, bt is still fast. let SchedRW = [WriteALU] in { def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) ], IIC_BT_MI>, OpSize16, TB; def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2)) ], IIC_BT_MI>, OpSize32, TB; def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))], IIC_BT_MI>, TB; } // SchedRW let hasSideEffects = 0 in { let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize16, TB, NotMemoryFoldable; def BTC32rr : I<0xBB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize32, TB, NotMemoryFoldable; def BTC64rr : RI<0xBB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB, NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize16, TB, NotMemoryFoldable; def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize32, TB, NotMemoryFoldable; def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize16, TB; def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize32, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize16, TB; def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize32, TB; def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize16, TB, NotMemoryFoldable; def BTR32rr : I<0xB3, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize32, TB, NotMemoryFoldable; def BTR64rr : RI<0xB3, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB, NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize16, TB, NotMemoryFoldable; def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize32, TB, NotMemoryFoldable; def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize16, TB; def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize32, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize16, TB; def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize32, TB; def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize16, TB, NotMemoryFoldable; def BTS32rr : I<0xAB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize32, TB, NotMemoryFoldable; def BTS64rr : RI<0xAB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB, NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize16, TB, NotMemoryFoldable; def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize32, TB, NotMemoryFoldable; def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize16, TB; def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize32, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize16, TB; def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize32, TB; def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } } // hasSideEffects = 0 } // Defs = [EFLAGS] //===----------------------------------------------------------------------===// // Atomic support // // Atomic swap. These are just normal xchg instructions. But since a memory // operand is referenced, the atomicity is ensured. multiclass ATOMIC_SWAP opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin> { let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in { def NAME#8rm : I(frag # "_8") addr:$ptr, GR8:$val))], itin>; def NAME#16rm : I(frag # "_16") addr:$ptr, GR16:$val))], itin>, OpSize16; def NAME#32rm : I(frag # "_32") addr:$ptr, GR32:$val))], itin>, OpSize32; def NAME#64rm : RI(frag # "_64") addr:$ptr, GR64:$val))], itin>; } } defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>; // Swap between registers. let SchedRW = [WriteALU] in { let Constraints = "$val = $dst" in { def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), "xchg{w}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>, OpSize16; def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src), "xchg{l}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>, OpSize32; def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), "xchg{q}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; } // Swap between EAX and other registers. let Uses = [AX], Defs = [AX] in def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), "xchg{w}\t{$src, %ax|ax, $src}", [], IIC_XCHG_REG>, OpSize16; let Uses = [EAX], Defs = [EAX] in def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), "xchg{l}\t{$src, %eax|eax, $src}", [], IIC_XCHG_REG>, OpSize32, Requires<[Not64BitMode]>; let Uses = [EAX], Defs = [EAX] in // Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding. // xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP. def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src), "xchg{l}\t{$src, %eax|eax, $src}", [], IIC_XCHG_REG>, OpSize32, Requires<[In64BitMode]>; let Uses = [RAX], Defs = [RAX] in def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), "xchg{q}\t{$src, %rax|rax, $src}", [], IIC_XCHG_REG>; } // SchedRW let SchedRW = [WriteALU] in { def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB, OpSize16; def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB, OpSize32; def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB, OpSize16; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB, OpSize32; def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; } let SchedRW = [WriteALU] in { def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_REG8>, TB; def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_REG>, TB, OpSize16; def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_REG>, TB, OpSize32; def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_REG>, TB; } // SchedRW let SchedRW = [WriteALULd, WriteRMW] in { let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_MEM8>, TB; def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_MEM>, TB, OpSize16; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_MEM>, TB, OpSize32; def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_MEM>, TB; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), "cmpxchg8b\t$dst", [], IIC_CMPXCHG_8B>, TB; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>, TB, Requires<[HasCmpxchg16b]>; } // SchedRW // Lock instruction prefix def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>; // Rex64 instruction prefix def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>, Requires<[In64BitMode]>; // Data16 instruction prefix def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>, Requires<[Not16BitMode]>; // Data instruction prefix def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", []>, Requires<[In16BitMode]>; // Repeat string operation instruction prefixes // These uses the DF flag in the EFLAGS register to inc or dec ECX let Defs = [ECX], Uses = [ECX,EFLAGS] in { // Repeat (used with INS, OUTS, MOVS, LODS and STOS) def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>; // Repeat while not equal (used with CMPS and SCAS) def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; } // String manipulation instructions let SchedRW = [WriteMicrocoded] in { // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [AL,ESI], Uses = [ESI,EFLAGS] in def LODSB : I<0xAC, RawFrmSrc, (outs), (ins srcidx8:$src), "lodsb\t{$src, %al|al, $src}", [], IIC_LODS>; let Defs = [AX,ESI], Uses = [ESI,EFLAGS] in def LODSW : I<0xAD, RawFrmSrc, (outs), (ins srcidx16:$src), "lodsw\t{$src, %ax|ax, $src}", [], IIC_LODS>, OpSize16; let Defs = [EAX,ESI], Uses = [ESI,EFLAGS] in def LODSL : I<0xAD, RawFrmSrc, (outs), (ins srcidx32:$src), "lods{l|d}\t{$src, %eax|eax, $src}", [], IIC_LODS>, OpSize32; let Defs = [RAX,ESI], Uses = [ESI,EFLAGS] in def LODSQ : RI<0xAD, RawFrmSrc, (outs), (ins srcidx64:$src), "lodsq\t{$src, %rax|rax, $src}", [], IIC_LODS>; } let SchedRW = [WriteSystem] in { // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [ESI], Uses = [DX,ESI,EFLAGS] in { def OUTSB : I<0x6E, RawFrmSrc, (outs), (ins srcidx8:$src), "outsb\t{$src, %dx|dx, $src}", [], IIC_OUTS>; def OUTSW : I<0x6F, RawFrmSrc, (outs), (ins srcidx16:$src), "outsw\t{$src, %dx|dx, $src}", [], IIC_OUTS>, OpSize16; def OUTSL : I<0x6F, RawFrmSrc, (outs), (ins srcidx32:$src), "outs{l|d}\t{$src, %dx|dx, $src}", [], IIC_OUTS>, OpSize32; } // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI], Uses = [DX,EDI,EFLAGS] in { def INSB : I<0x6C, RawFrmDst, (outs), (ins dstidx8:$dst), "insb\t{%dx, $dst|$dst, dx}", [], IIC_INS>; def INSW : I<0x6D, RawFrmDst, (outs), (ins dstidx16:$dst), "insw\t{%dx, $dst|$dst, dx}", [], IIC_INS>, OpSize16; def INSL : I<0x6D, RawFrmDst, (outs), (ins dstidx32:$dst), "ins{l|d}\t{%dx, $dst|$dst, dx}", [], IIC_INS>, OpSize32; } } // Flag instructions let SchedRW = [WriteALU] in { def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>; def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>; def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>; def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>; def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>; def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>; def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>; def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; } // Table lookup instructions let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>, Sched<[WriteLoad]>; let SchedRW = [WriteMicrocoded] in { // ASCII Adjust After Addition let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, Requires<[Not64BitMode]>; // ASCII Adjust AX Before Division let Uses = [AX], Defs = [AX,EFLAGS], hasSideEffects = 0 in def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src), "aad\t$src", [], IIC_AAD>, Requires<[Not64BitMode]>; // ASCII Adjust AX After Multiply let Uses = [AL], Defs = [AX,EFLAGS], hasSideEffects = 0 in def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src), "aam\t$src", [], IIC_AAM>, Requires<[Not64BitMode]>; // ASCII Adjust AL After Subtraction - sets let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>, Requires<[Not64BitMode]>; // Decimal Adjust AL after Addition let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, Requires<[Not64BitMode]>; // Decimal Adjust AL after Subtraction let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, Requires<[Not64BitMode]>; } // SchedRW let SchedRW = [WriteSystem] in { // Check Array Index Against Bounds def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize16, Requires<[Not64BitMode]>; def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize32, Requires<[Not64BitMode]>; // Adjust RPL Field of Segment Selector def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>, Requires<[Not64BitMode]>; let mayStore = 1 in def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[Not64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// // MOVBE Instructions // let Predicates = [HasMOVBE] in { let SchedRW = [WriteALULd] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>, OpSize16, T8PS; def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movbe{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bswap (loadi32 addr:$src)))], IIC_MOVBE>, OpSize32, T8PS; def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movbe{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>, T8PS; } let SchedRW = [WriteStore] in { def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>, OpSize16, T8PS; def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movbe{l}\t{$src, $dst|$dst, $src}", [(store (bswap GR32:$src), addr:$dst)], IIC_MOVBE>, OpSize32, T8PS; def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movbe{q}\t{$src, $dst|$dst, $src}", [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>, T8PS; } } //===----------------------------------------------------------------------===// // RDRAND Instruction // let Predicates = [HasRDRAND], Defs = [EFLAGS] in { def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins), "rdrand{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86rdrand))]>, OpSize16, PS; def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins), "rdrand{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86rdrand))]>, OpSize32, PS; def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins), "rdrand{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86rdrand))]>, PS; } //===----------------------------------------------------------------------===// // RDSEED Instruction // let Predicates = [HasRDSEED], Defs = [EFLAGS] in { def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), "rdseed{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, PS; def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), "rdseed{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, PS; def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdseed{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86rdseed))]>, PS; } //===----------------------------------------------------------------------===// // LZCNT Instruction // let Predicates = [HasLZCNT], Defs = [EFLAGS] in { def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "lzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, XS, OpSize16; def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctlz (loadi16 addr:$src))), (implicit EFLAGS)]>, XS, OpSize16; def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, XS, OpSize32; def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctlz (loadi32 addr:$src))), (implicit EFLAGS)]>, XS, OpSize32; def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>, XS; def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctlz (loadi64 addr:$src))), (implicit EFLAGS)]>, XS; } //===----------------------------------------------------------------------===// // BMI Instructions // let Predicates = [HasBMI], Defs = [EFLAGS] in { def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "tzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS, OpSize16; def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "tzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (cttz (loadi16 addr:$src))), (implicit EFLAGS)]>, XS, OpSize16; def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "tzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS, OpSize32; def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "tzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (cttz (loadi32 addr:$src))), (implicit EFLAGS)]>, XS, OpSize32; def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "tzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>, XS; def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "tzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (cttz (loadi64 addr:$src))), (implicit EFLAGS)]>, XS; } multiclass bmi_bls { let hasSideEffects = 0 in { def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, T8PS, VEX_4V; let mayLoad = 1 in def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, T8PS, VEX_4V; } } let Predicates = [HasBMI], Defs = [EFLAGS] in { defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem>; defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem>, VEX_W; defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem>; defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem>, VEX_W; defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem>; defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem>, VEX_W; } //===----------------------------------------------------------------------===// // Pattern fragments to auto generate BMI instructions. //===----------------------------------------------------------------------===// let Predicates = [HasBMI] in { // FIXME: patterns for the load versions are not implemented def : Pat<(and GR32:$src, (add GR32:$src, -1)), (BLSR32rr GR32:$src)>; def : Pat<(and GR64:$src, (add GR64:$src, -1)), (BLSR64rr GR64:$src)>; def : Pat<(xor GR32:$src, (add GR32:$src, -1)), (BLSMSK32rr GR32:$src)>; def : Pat<(xor GR64:$src, (add GR64:$src, -1)), (BLSMSK64rr GR64:$src)>; def : Pat<(and GR32:$src, (ineg GR32:$src)), (BLSI32rr GR32:$src)>; def : Pat<(and GR64:$src, (ineg GR64:$src)), (BLSI64rr GR64:$src)>; } multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, PatFrag ld_frag> { def rr : I, T8PS, VEX; def rm : I, T8PS, VEX; } let Predicates = [HasBMI], Defs = [EFLAGS] in { defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem, int_x86_bmi_bextr_32, loadi32>; defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem, int_x86_bmi_bextr_64, loadi64>, VEX_W; } let Predicates = [HasBMI2], Defs = [EFLAGS] in { defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem, int_x86_bmi_bzhi_32, loadi32>; defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem, int_x86_bmi_bzhi_64, loadi64>, VEX_W; } def CountTrailingOnes : SDNodeXFormgetZExtValue()), SDLoc(N)); }]>; def BEXTRMaskXForm : SDNodeXFormgetZExtValue()); return getI32Imm(Length << 8, SDLoc(N)); }]>; def AndMask64 : ImmLeaf UINT32_MAX; }]>; // Use BEXTR for 64-bit 'and' with large immediate 'mask'. let Predicates = [HasBMI, NoBMI2, NoTBM] in { def : Pat<(and GR64:$src, AndMask64:$mask), (BEXTR64rr GR64:$src, (SUBREG_TO_REG (i64 0), (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>; def : Pat<(and (loadi64 addr:$src), AndMask64:$mask), (BEXTR64rm addr:$src, (SUBREG_TO_REG (i64 0), (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>; } // Use BZHI for 64-bit 'and' with large immediate 'mask'. let Predicates = [HasBMI2, NoTBM] in { def : Pat<(and GR64:$src, AndMask64:$mask), (BZHI64rr GR64:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>; def : Pat<(and (loadi64 addr:$src), AndMask64:$mask), (BZHI64rm addr:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>; } let Predicates = [HasBMI2] in { def : Pat<(and GR32:$src, (add (shl 1, GR8:$lz), -1)), (BZHI32rr GR32:$src, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; def : Pat<(and (loadi32 addr:$src), (add (shl 1, GR8:$lz), -1)), (BZHI32rm addr:$src, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)), (BZHI64rr GR64:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)), (BZHI64rm addr:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; // x & (-1 >> (32 - y)) def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))), (BZHI32rr GR32:$src, GR32:$lz)>; def : Pat<(and (loadi32 addr:$src), (srl -1, (i8 (trunc (sub 32, GR32:$lz))))), (BZHI32rm addr:$src, GR32:$lz)>; // x & (-1 >> (64 - y)) def : Pat<(and GR64:$src, (srl -1, (i8 (trunc (sub 64, GR32:$lz))))), (BZHI64rr GR64:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))), (BZHI64rm addr:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; // x << (32 - y) >> (32 - y) def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))), (i8 (trunc (sub 32, GR32:$lz)))), (BZHI32rr GR32:$src, GR32:$lz)>; def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))), (i8 (trunc (sub 32, GR32:$lz)))), (BZHI32rm addr:$src, GR32:$lz)>; // x << (64 - y) >> (64 - y) def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))), (i8 (trunc (sub 64, GR32:$lz)))), (BZHI64rr GR64:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))), (i8 (trunc (sub 64, GR32:$lz)))), (BZHI64rm addr:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; } // HasBMI2 multiclass bmi_pdep_pext { def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (Int RC:$src1, RC:$src2))]>, VEX_4V; def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>, VEX_4V; } let Predicates = [HasBMI2] in { defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem, int_x86_bmi_pdep_32, loadi32>, T8XD; defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem, int_x86_bmi_pdep_64, loadi64>, T8XD, VEX_W; defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem, int_x86_bmi_pext_32, loadi32>, T8XS; defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem, int_x86_bmi_pext_64, loadi64>, T8XS, VEX_W; } //===----------------------------------------------------------------------===// // TBM Instructions // let Predicates = [HasTBM], Defs = [EFLAGS] in { multiclass tbm_ternary_imm_intr opc, RegisterClass RC, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, Intrinsic Int, Operand immtype, SDPatternOperator immoperator> { def ri : Ii32, XOP, XOPA; def mi : Ii32, XOP, XOPA; } defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr", i32mem, loadi32, int_x86_tbm_bextri_u32, i32imm, imm>; let ImmT = Imm32S in defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr", i64mem, loadi64, int_x86_tbm_bextri_u64, i64i32imm, i64immSExt32>, VEX_W; multiclass tbm_binary_rm opc, Format FormReg, Format FormMem, RegisterClass RC, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag> { let hasSideEffects = 0 in { def rr : I, XOP_4V, XOP9; let mayLoad = 1 in def rm : I, XOP_4V, XOP9; } } multiclass tbm_binary_intr opc, string OpcodeStr, Format FormReg, Format FormMem> { defm NAME#32 : tbm_binary_rm; defm NAME#64 : tbm_binary_rm, VEX_W; } defm BLCFILL : tbm_binary_intr<0x01, "blcfill", MRM1r, MRM1m>; defm BLCI : tbm_binary_intr<0x02, "blci", MRM6r, MRM6m>; defm BLCIC : tbm_binary_intr<0x01, "blcic", MRM5r, MRM5m>; defm BLCMSK : tbm_binary_intr<0x02, "blcmsk", MRM1r, MRM1m>; defm BLCS : tbm_binary_intr<0x01, "blcs", MRM3r, MRM3m>; defm BLSFILL : tbm_binary_intr<0x01, "blsfill", MRM2r, MRM2m>; defm BLSIC : tbm_binary_intr<0x01, "blsic", MRM6r, MRM6m>; defm T1MSKC : tbm_binary_intr<0x01, "t1mskc", MRM7r, MRM7m>; defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>; } // HasTBM, EFLAGS // Use BEXTRI for 64-bit 'and' with large immediate 'mask'. let Predicates = [HasTBM] in { def : Pat<(and GR64:$src, AndMask64:$mask), (BEXTRI64ri GR64:$src, (BEXTRMaskXForm imm:$mask))>; def : Pat<(and (loadi64 addr:$src), AndMask64:$mask), (BEXTRI64mi addr:$src, (BEXTRMaskXForm imm:$mask))>; } //===----------------------------------------------------------------------===// // Lightweight Profiling Instructions let Predicates = [HasLWP] in { def LLWPCB : I<0x12, MRM0r, (outs), (ins GR32:$src), "llwpcb\t$src", [(int_x86_llwpcb GR32:$src)], IIC_LWP>, XOP, XOP9, Requires<[Not64BitMode]>; def SLWPCB : I<0x12, MRM1r, (outs GR32:$dst), (ins), "slwpcb\t$dst", [(set GR32:$dst, (int_x86_slwpcb))], IIC_LWP>, XOP, XOP9, Requires<[Not64BitMode]>; def LLWPCB64 : I<0x12, MRM0r, (outs), (ins GR64:$src), "llwpcb\t$src", [(int_x86_llwpcb GR64:$src)], IIC_LWP>, XOP, XOP9, VEX_W, Requires<[In64BitMode]>; def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst", [(set GR64:$dst, (int_x86_slwpcb))], IIC_LWP>, XOP, XOP9, VEX_W, Requires<[In64BitMode]>; multiclass lwpins_intr { def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, imm:$cntl))], IIC_LWP>, XOP_4V, XOPA; let mayLoad = 1 in def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), imm:$cntl))], IIC_LWP>, XOP_4V, XOPA; } let Defs = [EFLAGS] in { defm LWPINS32 : lwpins_intr; defm LWPINS64 : lwpins_intr, VEX_W; } // EFLAGS multiclass lwpval_intr { def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", [(Int RC:$src0, GR32:$src1, imm:$cntl)], IIC_LWP>, XOP_4V, XOPA; let mayLoad = 1 in def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", [(Int RC:$src0, (loadi32 addr:$src1), imm:$cntl)], IIC_LWP>, XOP_4V, XOPA; } defm LWPVAL32 : lwpval_intr; defm LWPVAL64 : lwpval_intr, VEX_W; } // HasLWP //===----------------------------------------------------------------------===// // MONITORX/MWAITX Instructions // let SchedRW = [ WriteSystem ] in { let usesCustomInserter = 1 in { def MONITORX : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_monitorx addr:$src1, GR32:$src2, GR32:$src3)]>, Requires<[ HasMWAITX ]>; } let Uses = [ EAX, ECX, EDX ] in { def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", [], IIC_SSE_MONITORX>, TB, Requires<[ HasMWAITX ]>; } let Uses = [ ECX, EAX, EBX ] in { def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", [(int_x86_mwaitx ECX, EAX, EBX)], IIC_SSE_MWAITX>, TB, Requires<[ HasMWAITX ]>; } } // SchedRW def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrrr)>, Requires<[ Not64BitMode ]>; def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrrr)>, Requires<[ In64BitMode ]>; def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>, Requires<[ Not64BitMode ]>; def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>, Requires<[ In64BitMode ]>; //===----------------------------------------------------------------------===// // CLZERO Instruction // let SchedRW = [WriteSystem] in { let Uses = [EAX] in def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", [], IIC_SSE_CLZERO>, TB, Requires<[HasCLZERO]>; let usesCustomInserter = 1 in { def CLZERO : PseudoI<(outs), (ins i32mem:$src1), [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>; } } // SchedRW def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>; def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. //===----------------------------------------------------------------------===// let Predicates = [HasTBM] in { // FIXME: patterns for the load versions are not implemented def : Pat<(and GR32:$src, (add GR32:$src, 1)), (BLCFILL32rr GR32:$src)>; def : Pat<(and GR64:$src, (add GR64:$src, 1)), (BLCFILL64rr GR64:$src)>; def : Pat<(or GR32:$src, (not (add GR32:$src, 1))), (BLCI32rr GR32:$src)>; def : Pat<(or GR64:$src, (not (add GR64:$src, 1))), (BLCI64rr GR64:$src)>; // Extra patterns because opt can optimize the above patterns to this. def : Pat<(or GR32:$src, (sub -2, GR32:$src)), (BLCI32rr GR32:$src)>; def : Pat<(or GR64:$src, (sub -2, GR64:$src)), (BLCI64rr GR64:$src)>; def : Pat<(and (not GR32:$src), (add GR32:$src, 1)), (BLCIC32rr GR32:$src)>; def : Pat<(and (not GR64:$src), (add GR64:$src, 1)), (BLCIC64rr GR64:$src)>; def : Pat<(xor GR32:$src, (add GR32:$src, 1)), (BLCMSK32rr GR32:$src)>; def : Pat<(xor GR64:$src, (add GR64:$src, 1)), (BLCMSK64rr GR64:$src)>; def : Pat<(or GR32:$src, (add GR32:$src, 1)), (BLCS32rr GR32:$src)>; def : Pat<(or GR64:$src, (add GR64:$src, 1)), (BLCS64rr GR64:$src)>; def : Pat<(or GR32:$src, (add GR32:$src, -1)), (BLSFILL32rr GR32:$src)>; def : Pat<(or GR64:$src, (add GR64:$src, -1)), (BLSFILL64rr GR64:$src)>; def : Pat<(or (not GR32:$src), (add GR32:$src, -1)), (BLSIC32rr GR32:$src)>; def : Pat<(or (not GR64:$src), (add GR64:$src, -1)), (BLSIC64rr GR64:$src)>; def : Pat<(or (not GR32:$src), (add GR32:$src, 1)), (T1MSKC32rr GR32:$src)>; def : Pat<(or (not GR64:$src), (add GR64:$src, 1)), (T1MSKC64rr GR64:$src)>; def : Pat<(and (not GR32:$src), (add GR32:$src, -1)), (TZMSK32rr GR32:$src)>; def : Pat<(and (not GR64:$src), (add GR64:$src, -1)), (TZMSK64rr GR64:$src)>; } // HasTBM //===----------------------------------------------------------------------===// // Memory Instructions // let Predicates = [HasCLFLUSHOPT] in def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD; let Predicates = [HasCLWB] in def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", [(int_x86_clwb addr:$src)]>, PD; //===----------------------------------------------------------------------===// // Subsystems. //===----------------------------------------------------------------------===// include "X86InstrArithmetic.td" include "X86InstrCMovSetCC.td" include "X86InstrExtension.td" include "X86InstrControl.td" include "X86InstrShiftRotate.td" // X87 Floating Point Stack. include "X86InstrFPStack.td" // SIMD support (SSE, MMX and AVX) include "X86InstrFragmentsSIMD.td" // FMA - Fused Multiply-Add support (requires FMA) include "X86InstrFMA.td" // XOP include "X86InstrXOP.td" // SSE, MMX and 3DNow! vector support. include "X86InstrSSE.td" include "X86InstrAVX512.td" include "X86InstrMMX.td" include "X86Instr3DNow.td" // MPX instructions include "X86InstrMPX.td" include "X86InstrVMX.td" include "X86InstrSVM.td" include "X86InstrTSX.td" include "X86InstrSGX.td" // System instructions. include "X86InstrSystem.td" // Compiler Pseudo Instructions and Pat Patterns include "X86InstrCompiler.td" include "X86InstrVecCompiler.td" //===----------------------------------------------------------------------===// // Assembler Mnemonic Aliases //===----------------------------------------------------------------------===// def : MnemonicAlias<"call", "callw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"cbw", "cbtw", "att">; def : MnemonicAlias<"cwde", "cwtl", "att">; def : MnemonicAlias<"cwd", "cwtd", "att">; def : MnemonicAlias<"cdq", "cltd", "att">; def : MnemonicAlias<"cdqe", "cltq", "att">; def : MnemonicAlias<"cqo", "cqto", "att">; // In 64-bit mode lret maps to lretl; it is not ambiguous with lretq. def : MnemonicAlias<"lret", "lretw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>; def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"loopz", "loope">; def : MnemonicAlias<"loopnz", "loopne">; def : MnemonicAlias<"pop", "popw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"pop", "popq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"popf", "popfw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"popf", "popfq", "intel">, Requires<[In64BitMode]>; def : MnemonicAlias<"popfd", "popfl", "att">; // FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in // all modes. However: "push (addr)" and "push $42" should default to // pushl/pushq depending on the current mode. Similar for "pop %bx" def : MnemonicAlias<"push", "pushw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"push", "pushl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"push", "pushq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"pushf", "pushfw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"pushf", "pushfq", "intel">, Requires<[In64BitMode]>; def : MnemonicAlias<"pushfd", "pushfl", "att">; def : MnemonicAlias<"popad", "popal", "intel">, Requires<[Not64BitMode]>; def : MnemonicAlias<"pushad", "pushal", "intel">, Requires<[Not64BitMode]>; def : MnemonicAlias<"popa", "popaw", "intel">, Requires<[In16BitMode]>; def : MnemonicAlias<"pusha", "pushaw", "intel">, Requires<[In16BitMode]>; def : MnemonicAlias<"popa", "popal", "intel">, Requires<[In32BitMode]>; def : MnemonicAlias<"pusha", "pushal", "intel">, Requires<[In32BitMode]>; def : MnemonicAlias<"popa", "popaw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"pusha", "pushaw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"popa", "popal", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"pusha", "pushal", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"repe", "rep">; def : MnemonicAlias<"repz", "rep">; def : MnemonicAlias<"repnz", "repne">; def : MnemonicAlias<"ret", "retw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"ret", "retl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"ret", "retq", "att">, Requires<[In64BitMode]>; // Apply 'ret' behavior to 'retn' def : MnemonicAlias<"retn", "retw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"retn", "retl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"retn", "retq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"retn", "ret", "intel">; def : MnemonicAlias<"sal", "shl", "intel">; def : MnemonicAlias<"salb", "shlb", "att">; def : MnemonicAlias<"salw", "shlw", "att">; def : MnemonicAlias<"sall", "shll", "att">; def : MnemonicAlias<"salq", "shlq", "att">; def : MnemonicAlias<"smovb", "movsb", "att">; def : MnemonicAlias<"smovw", "movsw", "att">; def : MnemonicAlias<"smovl", "movsl", "att">; def : MnemonicAlias<"smovq", "movsq", "att">; def : MnemonicAlias<"ud2a", "ud2", "att">; def : MnemonicAlias<"verrw", "verr", "att">; // MS recognizes 'xacquire'/'xrelease' as 'acquire'/'release' def : MnemonicAlias<"acquire", "xacquire", "intel">; def : MnemonicAlias<"release", "xrelease", "intel">; // System instruction aliases. def : MnemonicAlias<"iret", "iretw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"iret", "iretl", "att">, Requires<[Not16BitMode]>; def : MnemonicAlias<"sysret", "sysretl", "att">; def : MnemonicAlias<"sysexit", "sysexitl", "att">; def : MnemonicAlias<"lgdt", "lgdtw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"lgdt", "lgdtl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"lgdt", "lgdtq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"lidt", "lidtw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"lidt", "lidtl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"lidt", "lidtq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"sgdt", "sgdtw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"sgdt", "sgdtl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"sgdt", "sgdtq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"sidt", "sidtw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"sidt", "sidtl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"sidt", "sidtq", "att">, Requires<[In64BitMode]>; // Floating point stack aliases. def : MnemonicAlias<"fcmovz", "fcmove", "att">; def : MnemonicAlias<"fcmova", "fcmovnbe", "att">; def : MnemonicAlias<"fcmovnae", "fcmovb", "att">; def : MnemonicAlias<"fcmovna", "fcmovbe", "att">; def : MnemonicAlias<"fcmovae", "fcmovnb", "att">; def : MnemonicAlias<"fcomip", "fcompi">; def : MnemonicAlias<"fildq", "fildll", "att">; def : MnemonicAlias<"fistpq", "fistpll", "att">; def : MnemonicAlias<"fisttpq", "fisttpll", "att">; def : MnemonicAlias<"fldcww", "fldcw", "att">; def : MnemonicAlias<"fnstcww", "fnstcw", "att">; def : MnemonicAlias<"fnstsww", "fnstsw", "att">; def : MnemonicAlias<"fucomip", "fucompi">; def : MnemonicAlias<"fwait", "wait">; def : MnemonicAlias<"fxsaveq", "fxsave64", "att">; def : MnemonicAlias<"fxrstorq", "fxrstor64", "att">; def : MnemonicAlias<"xsaveq", "xsave64", "att">; def : MnemonicAlias<"xrstorq", "xrstor64", "att">; def : MnemonicAlias<"xsaveoptq", "xsaveopt64", "att">; def : MnemonicAlias<"xrstorsq", "xrstors64", "att">; def : MnemonicAlias<"xsavecq", "xsavec64", "att">; def : MnemonicAlias<"xsavesq", "xsaves64", "att">; class CondCodeAlias : MnemonicAlias; /// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of /// MnemonicAlias's that canonicalize the condition code in a mnemonic, for /// example "setz" -> "sete". multiclass IntegerCondCodeMnemonicAlias { def C : CondCodeAlias; // setc -> setb def Z : CondCodeAlias; // setz -> sete def NA : CondCodeAlias; // setna -> setbe def NB : CondCodeAlias; // setnb -> setae def NC : CondCodeAlias; // setnc -> setae def NG : CondCodeAlias; // setng -> setle def NL : CondCodeAlias; // setnl -> setge def NZ : CondCodeAlias; // setnz -> setne def PE : CondCodeAlias; // setpe -> setp def PO : CondCodeAlias; // setpo -> setnp def NAE : CondCodeAlias; // setnae -> setb def NBE : CondCodeAlias; // setnbe -> seta def NGE : CondCodeAlias; // setnge -> setl def NLE : CondCodeAlias; // setnle -> setg } // Aliases for set defm : IntegerCondCodeMnemonicAlias<"set", "">; // Aliases for j defm : IntegerCondCodeMnemonicAlias<"j", "">; // Aliases for cmov{w,l,q} defm : IntegerCondCodeMnemonicAlias<"cmov", "w", "att">; defm : IntegerCondCodeMnemonicAlias<"cmov", "l", "att">; defm : IntegerCondCodeMnemonicAlias<"cmov", "q", "att">; // No size suffix for intel-style asm. defm : IntegerCondCodeMnemonicAlias<"cmov", "", "intel">; //===----------------------------------------------------------------------===// // Assembler Instruction Aliases //===----------------------------------------------------------------------===// // aad/aam default to base 10 if no operand is specified. def : InstAlias<"aad", (AAD8i8 10)>, Requires<[Not64BitMode]>; def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>; // Disambiguate the mem/imm form of bt-without-a-suffix as btl. // Likewise for btc/btr/bts. def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}", (BT32mi8 i32mem:$mem, i32i8imm:$imm), 0>; def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}", (BTC32mi8 i32mem:$mem, i32i8imm:$imm), 0>; def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}", (BTR32mi8 i32mem:$mem, i32i8imm:$imm), 0>; def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}", (BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0>; // clr aliases. def : InstAlias<"clrb\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>; def : InstAlias<"clrw\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>; def : InstAlias<"clrl\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>; def : InstAlias<"clrq\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>; // lods aliases. Accept the destination being omitted because it's implicit // in the mnemonic, or the mnemonic suffix being omitted because it's implicit // in the destination. def : InstAlias<"lodsb\t$src", (LODSB srcidx8:$src), 0>; def : InstAlias<"lodsw\t$src", (LODSW srcidx16:$src), 0>; def : InstAlias<"lods{l|d}\t$src", (LODSL srcidx32:$src), 0>; def : InstAlias<"lodsq\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src), 0>; def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>; def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>; def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; def : InstAlias<"lods\t$src", (LODSB srcidx8:$src), 0>; def : InstAlias<"lods\t$src", (LODSW srcidx16:$src), 0>; def : InstAlias<"lods\t$src", (LODSL srcidx32:$src), 0>; def : InstAlias<"lods\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; // stos aliases. Accept the source being omitted because it's implicit in // the mnemonic, or the mnemonic suffix being omitted because it's implicit // in the source. def : InstAlias<"stosb\t$dst", (STOSB dstidx8:$dst), 0>; def : InstAlias<"stosw\t$dst", (STOSW dstidx16:$dst), 0>; def : InstAlias<"stos{l|d}\t$dst", (STOSL dstidx32:$dst), 0>; def : InstAlias<"stosq\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>; def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>; def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>; def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; def : InstAlias<"stos\t$dst", (STOSB dstidx8:$dst), 0>; def : InstAlias<"stos\t$dst", (STOSW dstidx16:$dst), 0>; def : InstAlias<"stos\t$dst", (STOSL dstidx32:$dst), 0>; def : InstAlias<"stos\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; // scas aliases. Accept the destination being omitted because it's implicit // in the mnemonic, or the mnemonic suffix being omitted because it's implicit // in the destination. def : InstAlias<"scasb\t$dst", (SCASB dstidx8:$dst), 0>; def : InstAlias<"scasw\t$dst", (SCASW dstidx16:$dst), 0>; def : InstAlias<"scas{l|d}\t$dst", (SCASL dstidx32:$dst), 0>; def : InstAlias<"scasq\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>; def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>; def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>; def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; def : InstAlias<"scas\t$dst", (SCASB dstidx8:$dst), 0>; def : InstAlias<"scas\t$dst", (SCASW dstidx16:$dst), 0>; def : InstAlias<"scas\t$dst", (SCASL dstidx32:$dst), 0>; def : InstAlias<"scas\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; // cmps aliases. Mnemonic suffix being omitted because it's implicit // in the destination. def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSB dstidx8:$dst, srcidx8:$src), 0>; def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSW dstidx16:$dst, srcidx16:$src), 0>; def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSL dstidx32:$dst, srcidx32:$src), 0>; def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>; // movs aliases. Mnemonic suffix being omitted because it's implicit // in the destination. def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSB dstidx8:$dst, srcidx8:$src), 0>; def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSW dstidx16:$dst, srcidx16:$src), 0>; def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSL dstidx32:$dst, srcidx32:$src), 0>; def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>; // div and idiv aliases for explicit A register. def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>; def : InstAlias<"div{w}\t{$src, %ax|ax, $src}", (DIV16r GR16:$src)>; def : InstAlias<"div{l}\t{$src, %eax|eax, $src}", (DIV32r GR32:$src)>; def : InstAlias<"div{q}\t{$src, %rax|rax, $src}", (DIV64r GR64:$src)>; def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8m i8mem :$src)>; def : InstAlias<"div{w}\t{$src, %ax|ax, $src}", (DIV16m i16mem:$src)>; def : InstAlias<"div{l}\t{$src, %eax|eax, $src}", (DIV32m i32mem:$src)>; def : InstAlias<"div{q}\t{$src, %rax|rax, $src}", (DIV64m i64mem:$src)>; def : InstAlias<"idiv{b}\t{$src, %al|al, $src}", (IDIV8r GR8 :$src)>; def : InstAlias<"idiv{w}\t{$src, %ax|ax, $src}", (IDIV16r GR16:$src)>; def : InstAlias<"idiv{l}\t{$src, %eax|eax, $src}", (IDIV32r GR32:$src)>; def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64r GR64:$src)>; def : InstAlias<"idiv{b}\t{$src, %al|al, $src}", (IDIV8m i8mem :$src)>; def : InstAlias<"idiv{w}\t{$src, %ax|ax, $src}", (IDIV16m i16mem:$src)>; def : InstAlias<"idiv{l}\t{$src, %eax|eax, $src}", (IDIV32m i32mem:$src)>; def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64m i64mem:$src)>; // Various unary fpstack operations default to operating on on ST1. // For example, "fxch" -> "fxch %st(1)" def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>; def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>; def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>; def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>; def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>; def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>; def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>; def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>; def : InstAlias<"fxch", (XCH_F ST1), 0>; def : InstAlias<"fcom", (COM_FST0r ST1), 0>; def : InstAlias<"fcomp", (COMP_FST0r ST1), 0>; def : InstAlias<"fcomi", (COM_FIr ST1), 0>; def : InstAlias<"fcompi", (COM_FIPr ST1), 0>; def : InstAlias<"fucom", (UCOM_Fr ST1), 0>; def : InstAlias<"fucomp", (UCOM_FPr ST1), 0>; def : InstAlias<"fucomi", (UCOM_FIr ST1), 0>; def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>; // Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op. // For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate // instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with // gas. multiclass FpUnaryAlias { def : InstAlias; def : InstAlias; } defm : FpUnaryAlias<"fadd", ADD_FST0r>; defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>; defm : FpUnaryAlias<"fsub", SUB_FST0r>; defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0>; defm : FpUnaryAlias<"fsubr", SUBR_FST0r>; defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>; defm : FpUnaryAlias<"fmul", MUL_FST0r>; defm : FpUnaryAlias<"fmulp", MUL_FPrST0>; defm : FpUnaryAlias<"fdiv", DIV_FST0r>; defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0>; defm : FpUnaryAlias<"fdivr", DIVR_FST0r>; defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>; defm : FpUnaryAlias<"fcomi", COM_FIr, 0>; defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>; defm : FpUnaryAlias<"fcompi", COM_FIPr>; defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; // Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they // commute. We also allow fdiv[r]p/fsubrp even though they don't commute, // solely because gas supports it. def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>; def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>; def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>; def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>; def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>; def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>; // We accept "fnstsw %eax" even though it only writes %ax. def : InstAlias<"fnstsw\t{%eax|eax}", (FNSTSW16r)>; def : InstAlias<"fnstsw\t{%al|al}" , (FNSTSW16r)>; def : InstAlias<"fnstsw" , (FNSTSW16r)>; // lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but // this is compatible with what GAS does. def : InstAlias<"lcall\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[In32BitMode]>; def : InstAlias<"ljmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[In32BitMode]>; def : InstAlias<"lcall\t{*}$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>; def : InstAlias<"ljmp\t{*}$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>; def : InstAlias<"lcall\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>; def : InstAlias<"ljmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>; def : InstAlias<"lcall\t{*}$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>; def : InstAlias<"ljmp\t{*}$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>; def : InstAlias<"call\t{*}$dst", (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>; def : InstAlias<"jmp\t{*}$dst", (JMP64m i64mem:$dst), 0>, Requires<[In64BitMode]>; def : InstAlias<"call\t{*}$dst", (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>; def : InstAlias<"jmp\t{*}$dst", (JMP32m i32mem:$dst), 0>, Requires<[In32BitMode]>; def : InstAlias<"call\t{*}$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>; def : InstAlias<"jmp\t{*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>; // "imul , B" is an alias for "imul , B, B". def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>; def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>; def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>; def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>; def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>; def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>; // ins aliases. Accept the mnemonic suffix being omitted because it's implicit // in the destination. def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSB dstidx8:$dst), 0>; def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSW dstidx16:$dst), 0>; def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSL dstidx32:$dst), 0>; // outs aliases. Accept the mnemonic suffix being omitted because it's implicit // in the source. def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSB srcidx8:$src), 0>; def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSW srcidx16:$src), 0>; def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSL srcidx32:$src), 0>; // inb %dx -> inb %al, %dx def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>; def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>; def : InstAlias<"inl\t{%dx|dx}", (IN32rr), 0>; def : InstAlias<"inb\t$port", (IN8ri u8imm:$port), 0>; def : InstAlias<"inw\t$port", (IN16ri u8imm:$port), 0>; def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>; // jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp def : InstAlias<"call\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>; def : InstAlias<"jmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>; def : InstAlias<"call\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[In32BitMode]>; def : InstAlias<"jmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[In32BitMode]>; def : InstAlias<"callw\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>; def : InstAlias<"jmpw\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>; def : InstAlias<"calll\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>; def : InstAlias<"jmpl\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>; // Force mov without a suffix with a segment and mem to prefer the 'l' form of // the move. All segment/mem forms are equivalent, this has the shortest // encoding. def : InstAlias<"mov\t{$mem, $seg|$seg, $mem}", (MOV16sm SEGMENT_REG:$seg, i16mem:$mem), 0>; def : InstAlias<"mov\t{$seg, $mem|$mem, $seg}", (MOV16ms i16mem:$mem, SEGMENT_REG:$seg), 0>; // Match 'movq , ' as an alias for movabsq. def : InstAlias<"mov{q}\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>; // Match 'movq GR64, MMX' as an alias for movd. def : InstAlias<"movq\t{$src, $dst|$dst, $src}", (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>; def : InstAlias<"movq\t{$src, $dst|$dst, $src}", (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>; // movsx aliases def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>; def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>; def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>; def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>; def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>; def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>; def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>; // movzx aliases def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>; def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>; def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>; def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>; def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr8 GR64:$dst, GR8:$src), 0>; def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr16 GR64:$dst, GR16:$src), 0>; // Note: No GR32->GR64 movzx form. // outb %dx -> outb %al, %dx def : InstAlias<"outb\t{%dx|dx}", (OUT8rr), 0>; def : InstAlias<"outw\t{%dx|dx}", (OUT16rr), 0>; def : InstAlias<"outl\t{%dx|dx}", (OUT32rr), 0>; def : InstAlias<"outb\t$port", (OUT8ir u8imm:$port), 0>; def : InstAlias<"outw\t$port", (OUT16ir u8imm:$port), 0>; def : InstAlias<"outl\t$port", (OUT32ir u8imm:$port), 0>; // 'sldt ' can be encoded with either sldtw or sldtq with the same // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity // errors, since its encoding is the most compact. def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem), 0>; // shld/shrd op,op -> shld op, op, CL def : InstAlias<"shld{w}\t{$r2, $r1|$r1, $r2}", (SHLD16rrCL GR16:$r1, GR16:$r2), 0>; def : InstAlias<"shld{l}\t{$r2, $r1|$r1, $r2}", (SHLD32rrCL GR32:$r1, GR32:$r2), 0>; def : InstAlias<"shld{q}\t{$r2, $r1|$r1, $r2}", (SHLD64rrCL GR64:$r1, GR64:$r2), 0>; def : InstAlias<"shrd{w}\t{$r2, $r1|$r1, $r2}", (SHRD16rrCL GR16:$r1, GR16:$r2), 0>; def : InstAlias<"shrd{l}\t{$r2, $r1|$r1, $r2}", (SHRD32rrCL GR32:$r1, GR32:$r2), 0>; def : InstAlias<"shrd{q}\t{$r2, $r1|$r1, $r2}", (SHRD64rrCL GR64:$r1, GR64:$r2), 0>; def : InstAlias<"shld{w}\t{$reg, $mem|$mem, $reg}", (SHLD16mrCL i16mem:$mem, GR16:$reg), 0>; def : InstAlias<"shld{l}\t{$reg, $mem|$mem, $reg}", (SHLD32mrCL i32mem:$mem, GR32:$reg), 0>; def : InstAlias<"shld{q}\t{$reg, $mem|$mem, $reg}", (SHLD64mrCL i64mem:$mem, GR64:$reg), 0>; def : InstAlias<"shrd{w}\t{$reg, $mem|$mem, $reg}", (SHRD16mrCL i16mem:$mem, GR16:$reg), 0>; def : InstAlias<"shrd{l}\t{$reg, $mem|$mem, $reg}", (SHRD32mrCL i32mem:$mem, GR32:$reg), 0>; def : InstAlias<"shrd{q}\t{$reg, $mem|$mem, $reg}", (SHRD64mrCL i64mem:$mem, GR64:$reg), 0>; /* FIXME: This is disabled because the asm matcher is currently incapable of * matching a fixed immediate like $1. // "shl X, $1" is an alias for "shl X". multiclass ShiftRotateByOneAlias { def : InstAlias(!strconcat(Opc, "8r1")) GR8:$op)>; def : InstAlias(!strconcat(Opc, "16r1")) GR16:$op)>; def : InstAlias(!strconcat(Opc, "32r1")) GR32:$op)>; def : InstAlias(!strconcat(Opc, "64r1")) GR64:$op)>; def : InstAlias(!strconcat(Opc, "8m1")) i8mem:$op)>; def : InstAlias(!strconcat(Opc, "16m1")) i16mem:$op)>; def : InstAlias(!strconcat(Opc, "32m1")) i32mem:$op)>; def : InstAlias(!strconcat(Opc, "64m1")) i64mem:$op)>; } defm : ShiftRotateByOneAlias<"rcl", "RCL">; defm : ShiftRotateByOneAlias<"rcr", "RCR">; defm : ShiftRotateByOneAlias<"rol", "ROL">; defm : ShiftRotateByOneAlias<"ror", "ROR">; FIXME */ // test: We accept "testX , " and "testX , " as synonyms. def : InstAlias<"test{b}\t{$mem, $val|$val, $mem}", (TEST8mr i8mem :$mem, GR8 :$val), 0>; def : InstAlias<"test{w}\t{$mem, $val|$val, $mem}", (TEST16mr i16mem:$mem, GR16:$val), 0>; def : InstAlias<"test{l}\t{$mem, $val|$val, $mem}", (TEST32mr i32mem:$mem, GR32:$val), 0>; def : InstAlias<"test{q}\t{$mem, $val|$val, $mem}", (TEST64mr i64mem:$mem, GR64:$val), 0>; // xchg: We accept "xchgX , " and "xchgX , " as synonyms. def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}", (XCHG8rm GR8 :$val, i8mem :$mem), 0>; def : InstAlias<"xchg{w}\t{$mem, $val|$val, $mem}", (XCHG16rm GR16:$val, i16mem:$mem), 0>; def : InstAlias<"xchg{l}\t{$mem, $val|$val, $mem}", (XCHG32rm GR32:$val, i32mem:$mem), 0>; def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}", (XCHG64rm GR64:$val, i64mem:$mem), 0>; // xchg: We accept "xchgX , %eax" and "xchgX %eax, " as synonyms. def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src), 0>; def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar GR32:$src), 0>, Requires<[Not64BitMode]>; def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar64 GR32_NOAX:$src), 0>, Requires<[In64BitMode]>; def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>; // These aliases exist to get the parser to prioritize matching 8-bit // immediate encodings over matching the implicit ax/eax/rax encodings. By // explicitly mentioning the A register here, these entries will be ordered // first due to the more explicit immediate type. def : InstAlias<"adc{w}\t{$imm, %ax|ax, $imm}", (ADC16ri8 AX, i16i8imm:$imm), 0>; def : InstAlias<"add{w}\t{$imm, %ax|ax, $imm}", (ADD16ri8 AX, i16i8imm:$imm), 0>; def : InstAlias<"and{w}\t{$imm, %ax|ax, $imm}", (AND16ri8 AX, i16i8imm:$imm), 0>; def : InstAlias<"cmp{w}\t{$imm, %ax|ax, $imm}", (CMP16ri8 AX, i16i8imm:$imm), 0>; def : InstAlias<"or{w}\t{$imm, %ax|ax, $imm}", (OR16ri8 AX, i16i8imm:$imm), 0>; def : InstAlias<"sbb{w}\t{$imm, %ax|ax, $imm}", (SBB16ri8 AX, i16i8imm:$imm), 0>; def : InstAlias<"sub{w}\t{$imm, %ax|ax, $imm}", (SUB16ri8 AX, i16i8imm:$imm), 0>; def : InstAlias<"xor{w}\t{$imm, %ax|ax, $imm}", (XOR16ri8 AX, i16i8imm:$imm), 0>; def : InstAlias<"adc{l}\t{$imm, %eax|eax, $imm}", (ADC32ri8 EAX, i32i8imm:$imm), 0>; def : InstAlias<"add{l}\t{$imm, %eax|eax, $imm}", (ADD32ri8 EAX, i32i8imm:$imm), 0>; def : InstAlias<"and{l}\t{$imm, %eax|eax, $imm}", (AND32ri8 EAX, i32i8imm:$imm), 0>; def : InstAlias<"cmp{l}\t{$imm, %eax|eax, $imm}", (CMP32ri8 EAX, i32i8imm:$imm), 0>; def : InstAlias<"or{l}\t{$imm, %eax|eax, $imm}", (OR32ri8 EAX, i32i8imm:$imm), 0>; def : InstAlias<"sbb{l}\t{$imm, %eax|eax, $imm}", (SBB32ri8 EAX, i32i8imm:$imm), 0>; def : InstAlias<"sub{l}\t{$imm, %eax|eax, $imm}", (SUB32ri8 EAX, i32i8imm:$imm), 0>; def : InstAlias<"xor{l}\t{$imm, %eax|eax, $imm}", (XOR32ri8 EAX, i32i8imm:$imm), 0>; def : InstAlias<"adc{q}\t{$imm, %rax|rax, $imm}", (ADC64ri8 RAX, i64i8imm:$imm), 0>; def : InstAlias<"add{q}\t{$imm, %rax|rax, $imm}", (ADD64ri8 RAX, i64i8imm:$imm), 0>; def : InstAlias<"and{q}\t{$imm, %rax|rax, $imm}", (AND64ri8 RAX, i64i8imm:$imm), 0>; def : InstAlias<"cmp{q}\t{$imm, %rax|rax, $imm}", (CMP64ri8 RAX, i64i8imm:$imm), 0>; def : InstAlias<"or{q}\t{$imm, %rax|rax, $imm}", (OR64ri8 RAX, i64i8imm:$imm), 0>; def : InstAlias<"sbb{q}\t{$imm, %rax|rax, $imm}", (SBB64ri8 RAX, i64i8imm:$imm), 0>; def : InstAlias<"sub{q}\t{$imm, %rax|rax, $imm}", (SUB64ri8 RAX, i64i8imm:$imm), 0>; def : InstAlias<"xor{q}\t{$imm, %rax|rax, $imm}", (XOR64ri8 RAX, i64i8imm:$imm), 0>; diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index abe20a2dd3e5..a399c6c462d4 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -1,650 +1,708 @@ //===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the X86 instructions that are generally used in // privileged modes. These are not typically used by the compiler, but are // supported for the assembler and disassembler. // //===----------------------------------------------------------------------===// let SchedRW = [WriteSystem] in { let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>, TB; let Defs = [RAX, RCX, RDX] in def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB; // CPU flow control instructions let mayLoad = 1, mayStore = 0, hasSideEffects = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB; } def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", [], IIC_HLT>; def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", [], IIC_RSM>, TB; // Interrupt and SysCall Instructions. let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>, Requires<[Not64BitMode]>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))], IIC_INT3>; } // SchedRW // The long form of "int $3" turns into int3 as a size optimization. // FIXME: This doesn't work because InstAlias can't match immediate constants. //def : InstAlias<"int\t$3", (INT3)>; let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)], IIC_INT>; def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", [], IIC_SYSCALL>, TB; def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", [], IIC_SYSCALL>, TB; def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", [], IIC_SYSCALL>, TB, Requires<[In64BitMode]>; def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", [], IIC_SYS_ENTER_EXIT>, TB; def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", [], IIC_SYS_ENTER_EXIT>, TB; def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", [], IIC_SYS_ENTER_EXIT>, TB, Requires<[In64BitMode]>; } // SchedRW def : Pat<(debugtrap), (INT3)>, Requires<[NotPS4]>; def : Pat<(debugtrap), (INT (i8 0x41))>, Requires<[IsPS4]>; //===----------------------------------------------------------------------===// // Input/Output Instructions. // let SchedRW = [WriteSystem] in { let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|al, dx}", [], IIC_IN_RR>; let Defs = [AX], Uses = [DX] in def IN16rr : I<0xED, RawFrm, (outs), (ins), "in{w}\t{%dx, %ax|ax, dx}", [], IIC_IN_RR>, OpSize16; let Defs = [EAX], Uses = [DX] in def IN32rr : I<0xED, RawFrm, (outs), (ins), "in{l}\t{%dx, %eax|eax, dx}", [], IIC_IN_RR>, OpSize32; let Defs = [AL] in def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port), "in{b}\t{$port, %al|al, $port}", [], IIC_IN_RI>; let Defs = [AX] in def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port), "in{w}\t{$port, %ax|ax, $port}", [], IIC_IN_RI>, OpSize16; let Defs = [EAX] in def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port), "in{l}\t{$port, %eax|eax, $port}", [], IIC_IN_RI>, OpSize32; let Uses = [DX, AL] in def OUT8rr : I<0xEE, RawFrm, (outs), (ins), "out{b}\t{%al, %dx|dx, al}", [], IIC_OUT_RR>; let Uses = [DX, AX] in def OUT16rr : I<0xEF, RawFrm, (outs), (ins), "out{w}\t{%ax, %dx|dx, ax}", [], IIC_OUT_RR>, OpSize16; let Uses = [DX, EAX] in def OUT32rr : I<0xEF, RawFrm, (outs), (ins), "out{l}\t{%eax, %dx|dx, eax}", [], IIC_OUT_RR>, OpSize32; let Uses = [AL] in def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port), "out{b}\t{%al, $port|$port, al}", [], IIC_OUT_IR>; let Uses = [AX] in def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port), "out{w}\t{%ax, $port|$port, ax}", [], IIC_OUT_IR>, OpSize16; let Uses = [EAX] in def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port), "out{l}\t{%eax, $port|$port, eax}", [], IIC_OUT_IR>, OpSize32; } // SchedRW //===----------------------------------------------------------------------===// // Moves to and from debug registers let SchedRW = [WriteSystem] in { def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB, Requires<[Not64BitMode]>; def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB, Requires<[In64BitMode]>; def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB, Requires<[Not64BitMode]>; def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB, Requires<[In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// // Moves to and from control registers let SchedRW = [WriteSystem] in { def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB, Requires<[Not64BitMode]>; def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB, Requires<[In64BitMode]>; def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB, Requires<[Not64BitMode]>; def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB, Requires<[In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// // Segment override instruction prefixes def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>; def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>; def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>; def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>; def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>; def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; //===----------------------------------------------------------------------===// // Moves to and from segment registers. // let SchedRW = [WriteMove] in { def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize16; def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize32; def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>; let mayStore = 1 in { def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSizeIgnore; } def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize16; def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize32; def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>; let mayLoad = 1 in { def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSizeIgnore; } } // SchedRW //===----------------------------------------------------------------------===// // Segmentation support instructions. let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; let mayLoad = 1 in def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize16; def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB, OpSize16; // i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. let mayLoad = 1 in def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize32; def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB, OpSize32; // i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. let mayLoad = 1 in def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB; def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB; let mayLoad = 1 in def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB, OpSize16; def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB, OpSize16; let mayLoad = 1 in def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB, OpSize32; def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB, OpSize32; let mayLoad = 1 in def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB; def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB; def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", [], IIC_INVLPG>, TB; def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins), "str{w}\t$dst", [], IIC_STR>, TB, OpSize16; def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins), "str{l}\t$dst", [], IIC_STR>, TB, OpSize32; def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins), "str{q}\t$dst", [], IIC_STR>, TB; let mayStore = 1 in def STRm : I<0x00, MRM1m, (outs), (ins i16mem:$dst), "str{w}\t$dst", [], IIC_STR>, TB; def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", [], IIC_LTR>, TB; let mayLoad = 1 in def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", [], IIC_LTR>, TB; def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|cs}", [], IIC_PUSH_SR>, OpSize16, Requires<[Not64BitMode]>; def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), "push{l}\t{%cs|cs}", [], IIC_PUSH_CS>, OpSize32, Requires<[Not64BitMode]>; def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), "push{w}\t{%ss|ss}", [], IIC_PUSH_SR>, OpSize16, Requires<[Not64BitMode]>; def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), "push{l}\t{%ss|ss}", [], IIC_PUSH_SR>, OpSize32, Requires<[Not64BitMode]>; def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), "push{w}\t{%ds|ds}", [], IIC_PUSH_SR>, OpSize16, Requires<[Not64BitMode]>; def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), "push{l}\t{%ds|ds}", [], IIC_PUSH_SR>, OpSize32, Requires<[Not64BitMode]>; def PUSHES16 : I<0x06, RawFrm, (outs), (ins), "push{w}\t{%es|es}", [], IIC_PUSH_SR>, OpSize16, Requires<[Not64BitMode]>; def PUSHES32 : I<0x06, RawFrm, (outs), (ins), "push{l}\t{%es|es}", [], IIC_PUSH_SR>, OpSize32, Requires<[Not64BitMode]>; def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), "push{w}\t{%fs|fs}", [], IIC_PUSH_SR>, OpSize16, TB; def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), "push{l}\t{%fs|fs}", [], IIC_PUSH_SR>, TB, OpSize32, Requires<[Not64BitMode]>; def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), "push{w}\t{%gs|gs}", [], IIC_PUSH_SR>, OpSize16, TB; def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), "push{l}\t{%gs|gs}", [], IIC_PUSH_SR>, TB, OpSize32, Requires<[Not64BitMode]>; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t{%fs|fs}", [], IIC_PUSH_SR>, TB, OpSize32, Requires<[In64BitMode]>; def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), "push{q}\t{%gs|gs}", [], IIC_PUSH_SR>, TB, OpSize32, Requires<[In64BitMode]>; // No "pop cs" instruction. def POPSS16 : I<0x17, RawFrm, (outs), (ins), "pop{w}\t{%ss|ss}", [], IIC_POP_SR_SS>, OpSize16, Requires<[Not64BitMode]>; def POPSS32 : I<0x17, RawFrm, (outs), (ins), "pop{l}\t{%ss|ss}", [], IIC_POP_SR_SS>, OpSize32, Requires<[Not64BitMode]>; def POPDS16 : I<0x1F, RawFrm, (outs), (ins), "pop{w}\t{%ds|ds}", [], IIC_POP_SR>, OpSize16, Requires<[Not64BitMode]>; def POPDS32 : I<0x1F, RawFrm, (outs), (ins), "pop{l}\t{%ds|ds}", [], IIC_POP_SR>, OpSize32, Requires<[Not64BitMode]>; def POPES16 : I<0x07, RawFrm, (outs), (ins), "pop{w}\t{%es|es}", [], IIC_POP_SR>, OpSize16, Requires<[Not64BitMode]>; def POPES32 : I<0x07, RawFrm, (outs), (ins), "pop{l}\t{%es|es}", [], IIC_POP_SR>, OpSize32, Requires<[Not64BitMode]>; def POPFS16 : I<0xa1, RawFrm, (outs), (ins), "pop{w}\t{%fs|fs}", [], IIC_POP_SR>, OpSize16, TB; def POPFS32 : I<0xa1, RawFrm, (outs), (ins), "pop{l}\t{%fs|fs}", [], IIC_POP_SR>, TB, OpSize32, Requires<[Not64BitMode]>; def POPFS64 : I<0xa1, RawFrm, (outs), (ins), "pop{q}\t{%fs|fs}", [], IIC_POP_SR>, TB, OpSize32, Requires<[In64BitMode]>; def POPGS16 : I<0xa9, RawFrm, (outs), (ins), "pop{w}\t{%gs|gs}", [], IIC_POP_SR>, OpSize16, TB; def POPGS32 : I<0xa9, RawFrm, (outs), (ins), "pop{l}\t{%gs|gs}", [], IIC_POP_SR>, TB, OpSize32, Requires<[Not64BitMode]>; def POPGS64 : I<0xa9, RawFrm, (outs), (ins), "pop{q}\t{%gs|gs}", [], IIC_POP_SR>, TB, OpSize32, Requires<[In64BitMode]>; def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16, Requires<[Not64BitMode]>; def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32, Requires<[Not64BitMode]>; def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16; def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "lss{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32; def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), "lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16, Requires<[Not64BitMode]>; def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32, Requires<[Not64BitMode]>; def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16; def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "lfs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32; def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), "lfs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), "lgs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16; def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), "lgs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32; def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), "lgs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", [], IIC_VERR>, TB; def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", [], IIC_VERW_MEM>, TB; let mayLoad = 1 in { def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", [], IIC_VERR>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", [], IIC_VERW_REG>, TB; } } // SchedRW //===----------------------------------------------------------------------===// // Descriptor-table support instructions let SchedRW = [WriteSystem] in { def SGDT16m : I<0x01, MRM0m, (outs), (ins opaque48mem:$dst), "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize16, Requires<[Not64BitMode]>; def SGDT32m : I<0x01, MRM0m, (outs), (ins opaque48mem:$dst), "sgdt{l}\t$dst", [], IIC_SGDT>, OpSize32, TB, Requires <[Not64BitMode]>; def SGDT64m : I<0x01, MRM0m, (outs), (ins opaque80mem:$dst), "sgdt{q}\t$dst", [], IIC_SGDT>, TB, Requires <[In64BitMode]>; def SIDT16m : I<0x01, MRM1m, (outs), (ins opaque48mem:$dst), "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize16, Requires<[Not64BitMode]>; def SIDT32m : I<0x01, MRM1m, (outs), (ins opaque48mem:$dst), "sidt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>; def SIDT64m : I<0x01, MRM1m, (outs), (ins opaque80mem:$dst), "sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>; def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), "sldt{w}\t$dst", [], IIC_SLDT>, TB, OpSize16; let mayStore = 1 in def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst), "sldt{w}\t$dst", [], IIC_SLDT>, TB; def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins), "sldt{l}\t$dst", [], IIC_SLDT>, OpSize32, TB; // LLDT is not interpreted specially in 64-bit mode because there is no sign // extension. def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), "sldt{q}\t$dst", [], IIC_SLDT>, TB; let mayStore = 1 in def SLDT64m : RI<0x00, MRM0m, (outs), (ins i16mem:$dst), "sldt{q}\t$dst", [], IIC_SLDT>, TB; def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize16, Requires<[Not64BitMode]>; def LGDT32m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), "lgdt{l}\t$src", [], IIC_LGDT>, OpSize32, TB, Requires<[Not64BitMode]>; def LGDT64m : I<0x01, MRM2m, (outs), (ins opaque80mem:$src), "lgdt{q}\t$src", [], IIC_LGDT>, TB, Requires<[In64BitMode]>; def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize16, Requires<[Not64BitMode]>; def LIDT32m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), "lidt{l}\t$src", [], IIC_LIDT>, OpSize32, TB, Requires<[Not64BitMode]>; def LIDT64m : I<0x01, MRM3m, (outs), (ins opaque80mem:$src), "lidt{q}\t$src", [], IIC_LIDT>, TB, Requires<[In64BitMode]>; def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), "lldt{w}\t$src", [], IIC_LLDT_REG>, TB; let mayLoad = 1 in def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB; } // SchedRW //===----------------------------------------------------------------------===// // Specialized register support let SchedRW = [WriteSystem] in { let Uses = [EAX, ECX, EDX] in def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB; let Defs = [EAX, EDX], Uses = [ECX] in def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB; let Defs = [RAX, RDX], Uses = [ECX] in def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>, TB; def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), "smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB; def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), "smsw{l}\t$dst", [], IIC_SMSW>, OpSize32, TB; // no m form encodable; use SMSW16m def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), "smsw{q}\t$dst", [], IIC_SMSW>, TB; // For memory operands, there is only a 16-bit form def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst), "smsw{w}\t$dst", [], IIC_SMSW>, TB; def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src), "lmsw{w}\t$src", [], IIC_LMSW_MEM>, TB; let mayLoad = 1 in def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB; let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB; } // SchedRW //===----------------------------------------------------------------------===// // Cache instructions let SchedRW = [WriteSystem] in { def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB; } // SchedRW +//===----------------------------------------------------------------------===// +// CET instructions +let SchedRW = [WriteSystem], Predicates = [HasSHSTK] in{ + let Uses = [SSP] in { + let Defs = [SSP] in { + def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src", + [(int_x86_incsspd GR32:$src)]>, XS; + def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src", + [(int_x86_incsspq GR64:$src)]>, XS, + Requires<[In64BitMode]>; + } // Defs SSP + + let Constraints = "$src = $dst" in { + def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src), + "rdsspd\t$dst", + [(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS; + def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src), + "rdsspq\t$dst", + [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS, + Requires<[In64BitMode]>; + } + + let Defs = [SSP] in { + def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp", + [(int_x86_saveprevssp)]>, XS; + def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src), + "rstorssp\t$src", + [(int_x86_rstorssp addr:$src)]>, XS; + } // Defs SSP + } // Uses SSP + + def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "wrssd\t{$src, $dst|$dst, $src}", + [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8; + def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "wrssq\t{$src, $dst|$dst, $src}", + [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8, + Requires<[In64BitMode]>; + def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "wrussd\t{$src, $dst|$dst, $src}", + [(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD; + def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "wrussq\t{$src, $dst|$dst, $src}", + [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD, + Requires<[In64BitMode]>; + + let Defs = [SSP] in { + let Uses = [SSP] in { + def SETSSBSY : I<0x01, MRM_E8, (outs), (ins), "setssbsy", + [(int_x86_setssbsy)]>, XS; + } // Uses SSP + + def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src), + "clrssbsy\t$src", + [(int_x86_clrssbsy addr:$src)]>, XS; + } // Defs SSP +} // SchedRW && HasSHSTK + //===----------------------------------------------------------------------===// // XSAVE instructions let SchedRW = [WriteSystem] in { let Predicates = [HasXSAVE] in { let Defs = [EDX, EAX], Uses = [ECX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; let Uses = [EDX, EAX, ECX] in def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", [(int_x86_xsetbv ECX, EDX, EAX)]>, TB; } // HasXSAVE let Uses = [EDX, EAX] in { let Predicates = [HasXSAVE] in { def XSAVE : I<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), "xsave\t$dst", [(int_x86_xsave addr:$dst, EDX, EAX)]>, PS; def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), "xsave64\t$dst", [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), "xrstor\t$dst", [(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS; def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), "xrstor64\t$dst", [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; } let Predicates = [HasXSAVEOPT] in { def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), "xsaveopt\t$dst", [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS; def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), "xsaveopt64\t$dst", [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; } let Predicates = [HasXSAVEC] in { def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), "xsavec\t$dst", [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB; def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), "xsavec64\t$dst", [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; } let Predicates = [HasXSAVES] in { def XSAVES : I<0xC7, MRM5m, (outs), (ins opaque512mem:$dst), "xsaves\t$dst", [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB; def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaque512mem:$dst), "xsaves64\t$dst", [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), "xrstors\t$dst", [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB; def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), "xrstors64\t$dst", [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; } } // Uses } // SchedRW //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions let Defs = [RAX, RDI], Uses = [RDX, RDI] in def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB; def : InstAlias<"xstorerng", (XSTORE)>; let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in { def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB; def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB; def XCRYPTCTR : I<0xa7, MRM_D8, (outs), (ins), "xcryptctr", []>, TB; def XCRYPTCFB : I<0xa7, MRM_E0, (outs), (ins), "xcryptcfb", []>, TB; def XCRYPTOFB : I<0xa7, MRM_E8, (outs), (ins), "xcryptofb", []>, TB; } let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in { def XSHA1 : I<0xa6, MRM_C8, (outs), (ins), "xsha1", []>, TB; def XSHA256 : I<0xa6, MRM_D0, (outs), (ins), "xsha256", []>, TB; } let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB; //==-----------------------------------------------------------------------===// // PKU - enable protection key let usesCustomInserter = 1 in { def WRPKRU : PseudoI<(outs), (ins GR32:$src), [(int_x86_wrpkru GR32:$src)]>; def RDPKRU : PseudoI<(outs GR32:$dst), (ins), [(set GR32:$dst, (int_x86_rdpkru))]>; } let Defs = [EAX, EDX], Uses = [ECX] in def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB; let Uses = [EAX, ECX, EDX] in def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB; //===----------------------------------------------------------------------===// // FS/GS Base Instructions let Predicates = [HasFSGSBase, In64BitMode] in { def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins), "rdfsbase{l}\t$dst", [(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS; def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins), "rdfsbase{q}\t$dst", [(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS; def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins), "rdgsbase{l}\t$dst", [(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS; def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins), "rdgsbase{q}\t$dst", [(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS; def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src), "wrfsbase{l}\t$src", [(int_x86_wrfsbase_32 GR32:$src)]>, XS; def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src), "wrfsbase{q}\t$src", [(int_x86_wrfsbase_64 GR64:$src)]>, XS; def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src), "wrgsbase{l}\t$src", [(int_x86_wrgsbase_32 GR32:$src)]>, XS; def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src), "wrgsbase{q}\t$src", [(int_x86_wrgsbase_64 GR64:$src)]>, XS; } //===----------------------------------------------------------------------===// // INVPCID Instruction def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[Not64BitMode]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // SMAP Instruction let Defs = [EFLAGS] in { def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB; def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB; } //===----------------------------------------------------------------------===// // SMX Instruction let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in { def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB; } //===----------------------------------------------------------------------===// // RDPID Instruction def RDPID32 : I<0xC7, MRM7r, (outs GR32:$src), (ins), "rdpid\t$src", []>, XS, Requires<[Not64BitMode]>; def RDPID64 : I<0xC7, MRM7r, (outs GR64:$src), (ins), "rdpid\t$src", []>, XS, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // PTWRITE Instruction def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst), "ptwrite{l}\t$dst", []>, XS; def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst), "ptwrite{q}\t$dst", []>, XS, Requires<[In64BitMode]>; def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst), "ptwrite{l}\t$dst", []>, XS; def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst), "ptwrite{q}\t$dst", []>, XS, Requires<[In64BitMode]>; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index efa0cd2c6bc1..5a2230d394f9 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -1,759 +1,762 @@ //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the X86 implementation of the TargetRegisterInfo class. // This file is responsible for the frame pointer elimination optimization // on X86. // //===----------------------------------------------------------------------===// #include "X86RegisterInfo.h" #include "X86FrameLowering.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "X86GenRegisterInfo.inc" static cl::opt EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); X86RegisterInfo::X86RegisterInfo(const Triple &TT) : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), X86_MC::getDwarfRegFlavour(TT, false), X86_MC::getDwarfRegFlavour(TT, true), (TT.isArch64Bit() ? X86::RIP : X86::EIP)) { X86_MC::initLLVMToSEHAndCVRegMapping(this); // Cache some information. Is64Bit = TT.isArch64Bit(); IsWin64 = Is64Bit && TT.isOSWindows(); // Use a callee-saved register as the base pointer. These registers must // not conflict with any ABI requirements. For example, in 32-bit mode PIC // requires GOT in the EBX register before function calls via PLT GOT pointer. if (Is64Bit) { SlotSize = 8; // This matches the simplified 32-bit pointer code in the data layout // computation. // FIXME: Should use the data layout? bool Use64BitReg = TT.getEnvironment() != Triple::GNUX32; StackPtr = Use64BitReg ? X86::RSP : X86::ESP; FramePtr = Use64BitReg ? X86::RBP : X86::EBP; BasePtr = Use64BitReg ? X86::RBX : X86::EBX; } else { SlotSize = 4; StackPtr = X86::ESP; FramePtr = X86::EBP; BasePtr = X86::ESI; } } bool X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { // ExecutionDepsFixer and PostRAScheduler require liveness. return true; } int X86RegisterInfo::getSEHRegNum(unsigned i) const { return getEncodingValue(i); } const TargetRegisterClass * X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const { // The sub_8bit sub-register index is more constrained in 32-bit mode. // It behaves just like the sub_8bit_hi index. if (!Is64Bit && Idx == X86::sub_8bit) Idx = X86::sub_8bit_hi; // Forward to TableGen's default version. return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); } const TargetRegisterClass * X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned SubIdx) const { // The sub_8bit sub-register index is more constrained in 32-bit mode. if (!Is64Bit && SubIdx == X86::sub_8bit) { A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi); if (!A) return nullptr; } return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); } const TargetRegisterClass * X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const { // Don't allow super-classes of GR8_NOREX. This class is only used after // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied // to the full GR8 register class in 64-bit mode, so we cannot allow the // reigster class inflation. // // The GR8_NOREX class is always used in a way that won't be constrained to a // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the // full GR8 class. if (RC == &X86::GR8_NOREXRegClass) return RC; const X86Subtarget &Subtarget = MF.getSubtarget(); const TargetRegisterClass *Super = RC; TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { switch (Super->getID()) { case X86::FR32RegClassID: case X86::FR64RegClassID: // If AVX-512 isn't supported we should only inflate to these classes. if (!Subtarget.hasAVX512() && getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128RegClassID: case X86::VR256RegClassID: // If VLX isn't supported we should only inflate to these classes. if (!Subtarget.hasVLX() && getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128XRegClassID: case X86::VR256XRegClassID: // If VLX isn't support we shouldn't inflate to these classes. if (Subtarget.hasVLX() && getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::FR32XRegClassID: case X86::FR64XRegClassID: // If AVX-512 isn't support we shouldn't inflate to these classes. if (Subtarget.hasAVX512() && getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::GR8RegClassID: case X86::GR16RegClassID: case X86::GR32RegClassID: case X86::GR64RegClassID: case X86::RFP32RegClassID: case X86::RFP64RegClassID: case X86::RFP80RegClassID: case X86::VR512RegClassID: // Don't return a super-class that would shrink the spill size. // That can happen with the vector and float classes. if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; } Super = *I++; } while (Super); return RC; } const TargetRegisterClass * X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { const X86Subtarget &Subtarget = MF.getSubtarget(); switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. if (Subtarget.isTarget64BitLP64()) return &X86::GR64RegClass; // If the target is 64bit but we have been told to use 32bit addresses, // we can still use 64-bit register as long as we know the high bits // are zeros. // Reflect that in the returned register class. if (Is64Bit) { // When the target also allows 64-bit frame pointer and we do have a // frame, this is fine to use it for the address accesses as well. const X86FrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) && TFI->Uses64BitFramePtr ? &X86::LOW32_ADDR_ACCESS_RBPRegClass : &X86::LOW32_ADDR_ACCESSRegClass; } return &X86::GR32RegClass; case 1: // Normal GPRs except the stack pointer (for encoding reasons). if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOSPRegClass; // NOSP does not contain RIP, so no special case here. return &X86::GR32_NOSPRegClass; case 2: // NOREX GPRs. if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOREXRegClass; return &X86::GR32_NOREXRegClass; case 3: // NOREX GPRs except the stack pointer (for encoding reasons). if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOREX_NOSPRegClass; // NOSP does not contain RIP, so no special case here. return &X86::GR32_NOREX_NOSPRegClass; case 4: // Available for tailcall (not callee-saved GPRs). return getGPRsForTailCall(MF); } } const TargetRegisterClass * X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { const Function *F = MF.getFunction(); if (IsWin64 || (F && F->getCallingConv() == CallingConv::Win64)) return &X86::GR64_TCW64RegClass; else if (Is64Bit) return &X86::GR64_TCRegClass; bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); if (hasHipeCC) return &X86::GR32RegClass; return &X86::GR32_TCRegClass; } const TargetRegisterClass * X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &X86::CCRRegClass) { if (Is64Bit) return &X86::GR64RegClass; else return &X86::GR32RegClass; } return RC; } unsigned X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const X86FrameLowering *TFI = getFrameLowering(MF); unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; switch (RC->getID()) { default: return 0; case X86::GR32RegClassID: return 4 - FPDiff; case X86::GR64RegClassID: return 12 - FPDiff; case X86::VR128RegClassID: return Is64Bit ? 10 : 4; case X86::VR64RegClassID: return 4; } } const MCPhysReg * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "MachineFunction required"); const X86Subtarget &Subtarget = MF->getSubtarget(); const Function *F = MF->getFunction(); bool HasSSE = Subtarget.hasSSE1(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); bool CallsEHReturn = MF->callsEHReturn(); CallingConv::ID CC = F->getCallingConv(); // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling // convention because it has the CSR list. if (MF->getFunction()->hasFnAttribute("no_caller_saved_registers")) CC = CallingConv::X86_INTR; switch (CC) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_SaveList; case CallingConv::AnyReg: if (HasAVX) return CSR_64_AllRegs_AVX_SaveList; return CSR_64_AllRegs_SaveList; case CallingConv::PreserveMost: return CSR_64_RT_MostRegs_SaveList; case CallingConv::PreserveAll: if (HasAVX) return CSR_64_RT_AllRegs_AVX_SaveList; return CSR_64_RT_AllRegs_SaveList; case CallingConv::CXX_FAST_TLS: if (Is64Bit) return MF->getInfo()->isSplitCSR() ? CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; if (HasAVX512 && Is64Bit) return CSR_64_Intel_OCL_BI_AVX512_SaveList; if (HasAVX && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX_SaveList; if (HasAVX && Is64Bit) return CSR_64_Intel_OCL_BI_AVX_SaveList; if (!HasAVX && !IsWin64 && Is64Bit) return CSR_64_Intel_OCL_BI_SaveList; break; } case CallingConv::HHVM: return CSR_64_HHVM_SaveList; case CallingConv::X86_RegCall: if (Is64Bit) { if (IsWin64) { return (HasSSE ? CSR_Win64_RegCall_SaveList : CSR_Win64_RegCall_NoSSE_SaveList); } else { return (HasSSE ? CSR_SysV64_RegCall_SaveList : CSR_SysV64_RegCall_NoSSE_SaveList); } } else { return (HasSSE ? CSR_32_RegCall_SaveList : CSR_32_RegCall_NoSSE_SaveList); } case CallingConv::Cold: if (Is64Bit) return CSR_64_MostRegs_SaveList; break; case CallingConv::Win64: if (!HasSSE) return CSR_Win64_NoSSE_SaveList; return CSR_Win64_SaveList; case CallingConv::X86_64_SysV: if (CallsEHReturn) return CSR_64EHRet_SaveList; return CSR_64_SaveList; case CallingConv::X86_INTR: if (Is64Bit) { if (HasAVX512) return CSR_64_AllRegs_AVX512_SaveList; if (HasAVX) return CSR_64_AllRegs_AVX_SaveList; if (HasSSE) return CSR_64_AllRegs_SaveList; return CSR_64_AllRegs_NoSSE_SaveList; } else { if (HasAVX512) return CSR_32_AllRegs_AVX512_SaveList; if (HasAVX) return CSR_32_AllRegs_AVX_SaveList; if (HasSSE) return CSR_32_AllRegs_SSE_SaveList; return CSR_32_AllRegs_SaveList; } default: break; } if (Is64Bit) { bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && F->getAttributes().hasAttrSomewhere(Attribute::SwiftError); if (IsSwiftCC) return IsWin64 ? CSR_Win64_SwiftError_SaveList : CSR_64_SwiftError_SaveList; if (IsWin64) return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList; if (CallsEHReturn) return CSR_64EHRet_SaveList; return CSR_64_SaveList; } return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; } const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getInfo()->isSplitCSR()) return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; return nullptr; } const uint32_t * X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const X86Subtarget &Subtarget = MF.getSubtarget(); bool HasSSE = Subtarget.hasSSE1(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); switch (CC) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_RegMask; case CallingConv::AnyReg: if (HasAVX) return CSR_64_AllRegs_AVX_RegMask; return CSR_64_AllRegs_RegMask; case CallingConv::PreserveMost: return CSR_64_RT_MostRegs_RegMask; case CallingConv::PreserveAll: if (HasAVX) return CSR_64_RT_AllRegs_AVX_RegMask; return CSR_64_RT_AllRegs_RegMask; case CallingConv::CXX_FAST_TLS: if (Is64Bit) return CSR_64_TLS_Darwin_RegMask; break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; if (HasAVX512 && Is64Bit) return CSR_64_Intel_OCL_BI_AVX512_RegMask; if (HasAVX && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX_RegMask; if (HasAVX && Is64Bit) return CSR_64_Intel_OCL_BI_AVX_RegMask; if (!HasAVX && !IsWin64 && Is64Bit) return CSR_64_Intel_OCL_BI_RegMask; break; } case CallingConv::HHVM: return CSR_64_HHVM_RegMask; case CallingConv::X86_RegCall: if (Is64Bit) { if (IsWin64) { return (HasSSE ? CSR_Win64_RegCall_RegMask : CSR_Win64_RegCall_NoSSE_RegMask); } else { return (HasSSE ? CSR_SysV64_RegCall_RegMask : CSR_SysV64_RegCall_NoSSE_RegMask); } } else { return (HasSSE ? CSR_32_RegCall_RegMask : CSR_32_RegCall_NoSSE_RegMask); } case CallingConv::Cold: if (Is64Bit) return CSR_64_MostRegs_RegMask; break; case CallingConv::Win64: return CSR_Win64_RegMask; case CallingConv::X86_64_SysV: return CSR_64_RegMask; case CallingConv::X86_INTR: if (Is64Bit) { if (HasAVX512) return CSR_64_AllRegs_AVX512_RegMask; if (HasAVX) return CSR_64_AllRegs_AVX_RegMask; if (HasSSE) return CSR_64_AllRegs_RegMask; return CSR_64_AllRegs_NoSSE_RegMask; } else { if (HasAVX512) return CSR_32_AllRegs_AVX512_RegMask; if (HasAVX) return CSR_32_AllRegs_AVX_RegMask; if (HasSSE) return CSR_32_AllRegs_SSE_RegMask; return CSR_32_AllRegs_RegMask; } default: break; } // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check // callsEHReturn(). if (Is64Bit) { const Function *F = MF.getFunction(); bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && F->getAttributes().hasAttrSomewhere(Attribute::SwiftError); if (IsSwiftCC) return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask; } return CSR_32_RegMask; } const uint32_t* X86RegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { return CSR_64_TLS_Darwin_RegMask; } BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const X86FrameLowering *TFI = getFrameLowering(MF); // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); + // Set the Shadow Stack Pointer as reserved. + Reserved.set(X86::SSP); + // Set the instruction pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); // Set the frame-pointer register and its aliases as reserved if needed. if (TFI->hasFP(MF)) { for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); } // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) { CallingConv::ID CC = MF.getFunction()->getCallingConv(); const uint32_t *RegMask = getCallPreservedMask(MF, CC); if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) report_fatal_error( "Stack realignment in presence of dynamic allocas is not supported with" "this calling convention."); unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); } // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); Reserved.set(X86::DS); Reserved.set(X86::ES); Reserved.set(X86::FS); Reserved.set(X86::GS); // Mark the floating point stack registers as reserved. for (unsigned n = 0; n != 8; ++n) Reserved.set(X86::ST0 + n); // Reserve the registers that only exist in 64-bit mode. if (!Is64Bit) { // These 8-bit registers are part of the x86-64 extension even though their // super-registers are old 32-bits. Reserved.set(X86::SIL); Reserved.set(X86::DIL); Reserved.set(X86::BPL); Reserved.set(X86::SPL); for (unsigned n = 0; n != 8; ++n) { // R8, R9, ... for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); // XMM8, XMM9, ... for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } if (!Is64Bit || !MF.getSubtarget().hasAVX512()) { for (unsigned n = 16; n != 32; ++n) { for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } assert(checkAllSuperRegsMarked(Reserved, {X86::SIL, X86::DIL, X86::BPL, X86::SPL})); return Reserved; } void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { // Check if the EFLAGS register is marked as live-out. This shouldn't happen, // because the calling convention defines the EFLAGS register as NOT // preserved. // // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding // an assert to track this and clear the register afterwards to avoid // unnecessary crashes during release builds. assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && "EFLAGS are not live-out from a patchpoint."); // Also clean other registers that don't need preserving (IP). for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) Mask[Reg / 32] &= ~(1U << (Reg % 32)); } //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// static bool CantUseSP(const MachineFrameInfo &MFI) { return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); } bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!EnableBasePointer) return false; // When we need stack realignment, we can't address the stack from the frame // pointer. When we have dynamic allocas or stack-adjusting inline asm, we // can't address variables from the stack pointer. MS inline asm can // reference locals while also adjusting the stack pointer. When we can't // use both the SP and the FP, we need a separate base pointer register. bool CantUseFP = needsStackRealignment(MF); return CantUseFP && CantUseSP(MFI); } bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { if (!TargetRegisterInfo::canRealignStack(MF)) return false; const MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(FramePtr)) return false; // If a base pointer is necessary. Check that it isn't too late to reserve // it. if (CantUseSP(MFI)) return MRI->canReserveReg(BasePtr); return true; } bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { // Since X86 defines assignCalleeSavedSpillSlots which always return true // this function neither used nor tested. llvm_unreachable("Unused function on X86. Otherwise need a test case."); } // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. // TODO: In this case we should be really trying first to entirely eliminate // this instruction which is a plain copy. static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { MachineInstr &MI = *II; unsigned Opc = II->getOpcode(); // Check if this is a LEA of the form 'lea (%esp), %ebx' if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || MI.getOperand(2).getImm() != 1 || MI.getOperand(3).getReg() != X86::NoRegister || MI.getOperand(4).getImm() != 0 || MI.getOperand(5).getReg() != X86::NoRegister) return false; unsigned BasePtr = MI.getOperand(1).getReg(); // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will // be replaced with a 32-bit operand MOV which will zero extend the upper // 32-bits of the super register. if (Opc == X86::LEA64_32r) BasePtr = getX86SubSuperRegister(BasePtr, 32); unsigned NewDestReg = MI.getOperand(0).getReg(); const X86InstrInfo *TII = MI.getParent()->getParent()->getSubtarget().getInstrInfo(); TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, MI.getOperand(1).isKill()); MI.eraseFromParent(); return true; } void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Determine base register and offset. int FIOffset; unsigned BasePtr; if (MI.isReturn()) { assert((!needsStackRealignment(MF) || MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && "Return instruction can only reference SP relative frame objects"); FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); } else { FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); } // LOCAL_ESCAPE uses a single offset, with no register. It only works in the // simple FP case, and doesn't work with stack realignment. On 32-bit, the // offset is from the traditional base pointer location. On 64-bit, the // offset is from the SP at the end of the prologue, not the FP location. This // matches the behavior of llvm.frameaddress. unsigned Opc = MI.getOpcode(); if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); FI.ChangeToImmediate(FIOffset); return; } // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit // register as source operand, semantic is the same and destination is // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. // Don't change BasePtr since it is used later for stack adjustment. unsigned MachineBasePtr = BasePtr; if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) MachineBasePtr = getX86SubSuperRegister(BasePtr, 64); // This must be part of a four operand memory reference. Replace the // FrameIndex with base register. Add an offset to the offset. MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); if (BasePtr == StackPtr) FIOffset += SPAdj; // The frame index format for stackmaps and patchpoints is different from the // X86 format. It only has a FI and an offset. if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { assert(BasePtr == FramePtr && "Expected the FP as base register"); int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } if (MI.getOperand(FIOperandNum+3).isImm()) { // Offset is a 32-bit integer. int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); int Offset = FIOffset + Imm; assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && "Requesting 64-bit offset in 32-bit immediate!"); if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); MI.getOperand(FIOperandNum + 3).setOffset(Offset); } } unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const X86FrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? FramePtr : StackPtr; } unsigned X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { const X86Subtarget &Subtarget = MF.getSubtarget(); unsigned FrameReg = getFrameRegister(MF); if (Subtarget.isTarget64BitILP32()) FrameReg = getX86SubSuperRegister(FrameReg, 32); return FrameReg; } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 3a61a7247c72..b6eb37d5f0e5 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -1,531 +1,534 @@ //===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the X86 Register file, defining the registers themselves, // aliases between the registers, and the register classes built out of the // registers. // //===----------------------------------------------------------------------===// class X86Reg Enc, list subregs = []> : Register { let Namespace = "X86"; let HWEncoding = Enc; let SubRegs = subregs; } // Subregister indices. let Namespace = "X86" in { def sub_8bit : SubRegIndex<8>; def sub_8bit_hi : SubRegIndex<8, 8>; def sub_16bit : SubRegIndex<16>; def sub_32bit : SubRegIndex<32>; def sub_xmm : SubRegIndex<128>; def sub_ymm : SubRegIndex<256>; } //===----------------------------------------------------------------------===// // Register definitions... // // In the register alias definitions below, we define which registers alias // which others. We only specify which registers the small registers alias, // because the register file generator is smart enough to figure out that // AL aliases AX if we tell it that AX aliased AL (for example). // Dwarf numbering is different for 32-bit and 64-bit, and there are // variations by target as well. Currently the first entry is for X86-64, // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux // and debug information on X86-32/Darwin) // 8-bit registers // Low registers def AL : X86Reg<"al", 0>; def DL : X86Reg<"dl", 2>; def CL : X86Reg<"cl", 1>; def BL : X86Reg<"bl", 3>; // High registers. On x86-64, these cannot be used in any instruction // with a REX prefix. def AH : X86Reg<"ah", 4>; def DH : X86Reg<"dh", 6>; def CH : X86Reg<"ch", 5>; def BH : X86Reg<"bh", 7>; // X86-64 only, requires REX. let CostPerUse = 1 in { def SIL : X86Reg<"sil", 6>; def DIL : X86Reg<"dil", 7>; def BPL : X86Reg<"bpl", 5>; def SPL : X86Reg<"spl", 4>; def R8B : X86Reg<"r8b", 8>; def R9B : X86Reg<"r9b", 9>; def R10B : X86Reg<"r10b", 10>; def R11B : X86Reg<"r11b", 11>; def R12B : X86Reg<"r12b", 12>; def R13B : X86Reg<"r13b", 13>; def R14B : X86Reg<"r14b", 14>; def R15B : X86Reg<"r15b", 15>; } // 16-bit registers let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in { def AX : X86Reg<"ax", 0, [AL,AH]>; def DX : X86Reg<"dx", 2, [DL,DH]>; def CX : X86Reg<"cx", 1, [CL,CH]>; def BX : X86Reg<"bx", 3, [BL,BH]>; } let SubRegIndices = [sub_8bit] in { def SI : X86Reg<"si", 6, [SIL]>; def DI : X86Reg<"di", 7, [DIL]>; def BP : X86Reg<"bp", 5, [BPL]>; def SP : X86Reg<"sp", 4, [SPL]>; } def IP : X86Reg<"ip", 0>; // X86-64 only, requires REX. let SubRegIndices = [sub_8bit], CostPerUse = 1 in { def R8W : X86Reg<"r8w", 8, [R8B]>; def R9W : X86Reg<"r9w", 9, [R9B]>; def R10W : X86Reg<"r10w", 10, [R10B]>; def R11W : X86Reg<"r11w", 11, [R11B]>; def R12W : X86Reg<"r12w", 12, [R12B]>; def R13W : X86Reg<"r13w", 13, [R13B]>; def R14W : X86Reg<"r14w", 14, [R14B]>; def R15W : X86Reg<"r15w", 15, [R15B]>; } // 32-bit registers let SubRegIndices = [sub_16bit] in { def EAX : X86Reg<"eax", 0, [AX]>, DwarfRegNum<[-2, 0, 0]>; def EDX : X86Reg<"edx", 2, [DX]>, DwarfRegNum<[-2, 2, 2]>; def ECX : X86Reg<"ecx", 1, [CX]>, DwarfRegNum<[-2, 1, 1]>; def EBX : X86Reg<"ebx", 3, [BX]>, DwarfRegNum<[-2, 3, 3]>; def ESI : X86Reg<"esi", 6, [SI]>, DwarfRegNum<[-2, 6, 6]>; def EDI : X86Reg<"edi", 7, [DI]>, DwarfRegNum<[-2, 7, 7]>; def EBP : X86Reg<"ebp", 5, [BP]>, DwarfRegNum<[-2, 4, 5]>; def ESP : X86Reg<"esp", 4, [SP]>, DwarfRegNum<[-2, 5, 4]>; def EIP : X86Reg<"eip", 0, [IP]>, DwarfRegNum<[-2, 8, 8]>; // X86-64 only, requires REX let CostPerUse = 1 in { def R8D : X86Reg<"r8d", 8, [R8W]>; def R9D : X86Reg<"r9d", 9, [R9W]>; def R10D : X86Reg<"r10d", 10, [R10W]>; def R11D : X86Reg<"r11d", 11, [R11W]>; def R12D : X86Reg<"r12d", 12, [R12W]>; def R13D : X86Reg<"r13d", 13, [R13W]>; def R14D : X86Reg<"r14d", 14, [R14W]>; def R15D : X86Reg<"r15d", 15, [R15W]>; }} // 64-bit registers, X86-64 only let SubRegIndices = [sub_32bit] in { def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>; def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>; def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>; def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>; def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>; def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>; def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>; def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>; // These also require REX. let CostPerUse = 1 in { def R8 : X86Reg<"r8", 8, [R8D]>, DwarfRegNum<[ 8, -2, -2]>; def R9 : X86Reg<"r9", 9, [R9D]>, DwarfRegNum<[ 9, -2, -2]>; def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>; def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>; def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>; def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>; def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>; def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>; def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>; }} // MMX Registers. These are actually aliased to ST0 .. ST7 def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>; def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>; def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>; def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>; def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>; def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>; def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>; def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>; // Pseudo Floating Point registers def FP0 : X86Reg<"fp0", 0>; def FP1 : X86Reg<"fp1", 0>; def FP2 : X86Reg<"fp2", 0>; def FP3 : X86Reg<"fp3", 0>; def FP4 : X86Reg<"fp4", 0>; def FP5 : X86Reg<"fp5", 0>; def FP6 : X86Reg<"fp6", 0>; def FP7 : X86Reg<"fp7", 0>; // XMM Registers, used by the various SSE instruction set extensions. def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>; def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>; def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>; def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>; def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>; def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>; def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>; def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>; // X86-64 only let CostPerUse = 1 in { def XMM8: X86Reg<"xmm8", 8>, DwarfRegNum<[25, -2, -2]>; def XMM9: X86Reg<"xmm9", 9>, DwarfRegNum<[26, -2, -2]>; def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>; def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>; def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>; def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>; def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>; def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>; def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>; def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>; def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>; def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[70, -2, -2]>; def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[71, -2, -2]>; def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[72, -2, -2]>; def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[73, -2, -2]>; def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[74, -2, -2]>; def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[75, -2, -2]>; def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[76, -2, -2]>; def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[77, -2, -2]>; def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[78, -2, -2]>; def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>; def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>; def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>; def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>; } // CostPerUse // YMM0-15 registers, used by AVX instructions and // YMM16-31 registers, used by AVX-512 instructions. let SubRegIndices = [sub_xmm] in { foreach Index = 0-31 in { def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast("XMM"#Index)]>, DwarfRegAlias("XMM"#Index)>; } } // ZMM Registers, used by AVX-512 instructions. let SubRegIndices = [sub_ymm] in { foreach Index = 0-31 in { def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast("YMM"#Index)]>, DwarfRegAlias("XMM"#Index)>; } } // Mask Registers, used by AVX-512 instructions. def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, 93, 93]>; def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, 94, 94]>; def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, 95, 95]>; def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, 96, 96]>; def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, 97, 97]>; def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, 98, 98]>; def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, 99, 99]>; def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>; // Floating point stack registers. These don't map one-to-one to the FP // pseudo registers, but we still mark them as aliasing FP registers. That // way both kinds can be live without exceeding the stack depth. ST registers // are only live around inline assembly. def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>; def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>; def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>; def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>; def ST4 : X86Reg<"st(4)", 4>, DwarfRegNum<[37, 16, 15]>; def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>; def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>; def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>; // Floating-point status word def FPSW : X86Reg<"fpsw", 0>; // Status flags register def EFLAGS : X86Reg<"flags", 0>; // Segment registers def CS : X86Reg<"cs", 1>; def DS : X86Reg<"ds", 3>; def SS : X86Reg<"ss", 2>; def ES : X86Reg<"es", 0>; def FS : X86Reg<"fs", 4>; def GS : X86Reg<"gs", 5>; // Debug registers def DR0 : X86Reg<"dr0", 0>; def DR1 : X86Reg<"dr1", 1>; def DR2 : X86Reg<"dr2", 2>; def DR3 : X86Reg<"dr3", 3>; def DR4 : X86Reg<"dr4", 4>; def DR5 : X86Reg<"dr5", 5>; def DR6 : X86Reg<"dr6", 6>; def DR7 : X86Reg<"dr7", 7>; def DR8 : X86Reg<"dr8", 8>; def DR9 : X86Reg<"dr9", 9>; def DR10 : X86Reg<"dr10", 10>; def DR11 : X86Reg<"dr11", 11>; def DR12 : X86Reg<"dr12", 12>; def DR13 : X86Reg<"dr13", 13>; def DR14 : X86Reg<"dr14", 14>; def DR15 : X86Reg<"dr15", 15>; // Control registers def CR0 : X86Reg<"cr0", 0>; def CR1 : X86Reg<"cr1", 1>; def CR2 : X86Reg<"cr2", 2>; def CR3 : X86Reg<"cr3", 3>; def CR4 : X86Reg<"cr4", 4>; def CR5 : X86Reg<"cr5", 5>; def CR6 : X86Reg<"cr6", 6>; def CR7 : X86Reg<"cr7", 7>; def CR8 : X86Reg<"cr8", 8>; def CR9 : X86Reg<"cr9", 9>; def CR10 : X86Reg<"cr10", 10>; def CR11 : X86Reg<"cr11", 11>; def CR12 : X86Reg<"cr12", 12>; def CR13 : X86Reg<"cr13", 13>; def CR14 : X86Reg<"cr14", 14>; def CR15 : X86Reg<"cr15", 15>; // Pseudo index registers def EIZ : X86Reg<"eiz", 4>; def RIZ : X86Reg<"riz", 4>; // Bound registers, used in MPX instructions def BND0 : X86Reg<"bnd0", 0>; def BND1 : X86Reg<"bnd1", 1>; def BND2 : X86Reg<"bnd2", 2>; def BND3 : X86Reg<"bnd3", 3>; +// CET registers - Shadow Stack Pointer +def SSP : X86Reg<"ssp", 0>; + //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is // implicitly defined to be the register allocation order. // // List call-clobbered registers before callee-save registers. RBX, RBP, (and // R12, R13, R14, and R15 for X86-64) are callee-save registers. // In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and // R8B, ... R15B. // Allocate R12 and R13 last, as these require an extra byte when // encoded in x86_64 instructions. // FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in // 64-bit mode. The main complication is that they cannot be encoded in an // instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc. // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d" // cannot be encoded. def GR8 : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL, R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> { let AltOrders = [(sub GR8, AH, BH, CH, DH)]; let AltOrderSelect = [{ return MF.getSubtarget().is64Bit(); }]; } def GR16 : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>; def GR32 : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>; // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since // RIP isn't really a register and it can't be used anywhere except in an // address, but it doesn't cause trouble. // FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra // tests because of the inclusion of RIP in this register class. def GR64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP)>; // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>; // Debug registers. def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>; // Control registers. def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>; // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" // registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers // that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD, // and GR64_ABCD are classes for registers that support 8-bit h-register // operations. def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>; def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>; def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>; def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>; def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>; def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>; def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, R11, RIP)>; def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, R8, R9, R10, R11, RIP)>; // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, AH, CH, DH, BL, BH)> { let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)]; let AltOrderSelect = [{ return MF.getSubtarget().is64Bit(); }]; } // GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, SI, DI, BX, BP, SP)>; // GR32_NOREX - GR32 registers which do not require a REX prefix. def GR32_NOREX : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>; // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>; // GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit // mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs // to clear upper 32-bits of RAX so is not a NOP. def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)>; // GR32_NOSP - GR32 registers except ESP. def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>; // GR64_NOSP - GR64 registers except RSP (and RIP). def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>; // GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except // ESP. def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32, (and GR32_NOREX, GR32_NOSP)>; // GR64_NOREX_NOSP - GR64_NOREX registers except RSP. def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, (and GR64_NOREX, GR64_NOSP)>; // Register classes used for ABIs that use 32-bit address accesses, // while using the whole x84_64 ISA. // In such cases, it is fine to use RIP as we are sure the 32 high // bits are not set. We do not need variants for NOSP as RIP is not // allowed there. // RIP is not spilled anywhere for now, so stick to 32-bit alignment // to save on memory space. // FIXME: We could allow all 64bit registers, but we would need // something to check that the 32 high bits are not set, // which we do not have right now. def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>; // When RBP is used as a base pointer in a 32-bit addresses environement, // this is also safe to use the full register to access addresses. // Since RBP will never be spilled, stick to a 32 alignment to save // on memory consumption. def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32, (add LOW32_ADDR_ACCESS, RBP)>; // A class to support the 'A' assembler constraint: [ER]AX then [ER]DX. def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>; def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>; // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>; def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>; // FIXME: This sets up the floating point register files as though they are f64 // values, though they really are f80 values. This will cause us to spill // values as 64-bit quantities instead of 80-bit quantities, which is much much // faster on common hardware. In reality, this should be controlled by a // command line option or something. def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>; def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>; def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>; // Floating point stack registers (these are not allocatable by the // register allocator - the floating point stackifier is responsible // for transforming FPn allocations to STn registers) def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> { let isAllocatable = 0; } // Generic vector registers: VR64 and VR128. // Ensure that float types are declared first - only float is legal on SSE1. def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>; def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], 128, (add FR32)>; def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 0, 15)>; // Special classes that help the assembly parser choose some alternate // instructions to favor 2-byte VEX encodings. def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], 128, (sequence "XMM%u", 0, 7)>; def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], 128, (sequence "XMM%u", 8, 15)>; def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 0, 7)>; def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 8, 15)>; // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> { let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> { let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } // AVX-512 vector/mask registers. def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512, (sequence "ZMM%u", 0, 31)>; // Scalar AVX-512 floating point registers. def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>; def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>; // Extended VR128 and VR256 for AVX-512 instructions def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], 128, (add FR32X)>; def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 0, 31)>; // Mask registers def VK1 : RegisterClass<"X86", [v1i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;} def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;} def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;} def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;} def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;} def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;} def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;} def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;} def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;} def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;} def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;} def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;} def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;} def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;} // Bound registers def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 72c08e217990..963a9c30de0d 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -1,430 +1,432 @@ //===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the X86 specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "X86.h" #include "X86CallLowering.h" #include "X86LegalizerInfo.h" #include "X86RegisterBankInfo.h" #include "X86Subtarget.h" #include "MCTargetDesc/X86BaseInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include #if defined(_MSC_VER) #include #endif using namespace llvm; #define DEBUG_TYPE "subtarget" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "X86GenSubtargetInfo.inc" // Temporary option to control early if-conversion for x86 while adding machine // models. static cl::opt X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, cl::desc("Enable early if-conversion on X86")); /// Classify a blockaddress reference for the current subtarget according to how /// we should reference it in a non-pcrel context. unsigned char X86Subtarget::classifyBlockAddressReference() const { return classifyLocalReference(nullptr); } /// Classify a global variable reference for the current subtarget according to /// how we should reference it in a non-pcrel context. unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const { return classifyGlobalReference(GV, *GV->getParent()); } unsigned char X86Subtarget::classifyLocalReference(const GlobalValue *GV) const { // 64 bits can use %rip addressing for anything local. if (is64Bit()) return X86II::MO_NO_FLAG; // If this is for a position dependent executable, the static linker can // figure it out. if (!isPositionIndependent()) return X86II::MO_NO_FLAG; // The COFF dynamic linker just patches the executable sections. if (isTargetCOFF()) return X86II::MO_NO_FLAG; if (isTargetDarwin()) { // 32 bit macho has no relocation for a-b if a is undefined, even if // b is in the section that is being relocated. // This means we have to use o load even for GVs that are known to be // local to the dso. if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) return X86II::MO_DARWIN_NONLAZY_PIC_BASE; return X86II::MO_PIC_BASE_OFFSET; } return X86II::MO_GOTOFF; } unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV, const Module &M) const { // Large model never uses stubs. if (TM.getCodeModel() == CodeModel::Large) return X86II::MO_NO_FLAG; // Absolute symbols can be referenced directly. if (GV) { if (Optional CR = GV->getAbsoluteSymbolRange()) { // See if we can use the 8-bit immediate form. Note that some instructions // will sign extend the immediate operand, so to be conservative we only // accept the range [0,128). if (CR->getUnsignedMax().ult(128)) return X86II::MO_ABS8; else return X86II::MO_NO_FLAG; } } if (TM.shouldAssumeDSOLocal(M, GV)) return classifyLocalReference(GV); if (isTargetCOFF()) return X86II::MO_DLLIMPORT; if (is64Bit()) return X86II::MO_GOTPCREL; if (isTargetDarwin()) { if (!isPositionIndependent()) return X86II::MO_DARWIN_NONLAZY; return X86II::MO_DARWIN_NONLAZY_PIC_BASE; } return X86II::MO_GOT; } unsigned char X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const { return classifyGlobalFunctionReference(GV, *GV->getParent()); } unsigned char X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV, const Module &M) const { if (TM.shouldAssumeDSOLocal(M, GV)) return X86II::MO_NO_FLAG; if (isTargetCOFF()) { assert(GV->hasDLLImportStorageClass() && "shouldAssumeDSOLocal gave inconsistent answer"); return X86II::MO_DLLIMPORT; } const Function *F = dyn_cast_or_null(GV); if (isTargetELF()) { if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv())) // According to psABI, PLT stub clobbers XMM8-XMM15. // In Regcall calling convention those registers are used for passing // parameters. Thus we need to prevent lazy binding in Regcall. return X86II::MO_GOTPCREL; if (F && F->hasFnAttribute(Attribute::NonLazyBind) && is64Bit()) return X86II::MO_GOTPCREL; return X86II::MO_PLT; } if (is64Bit()) { if (F && F->hasFnAttribute(Attribute::NonLazyBind)) // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). return X86II::MO_GOTPCREL; return X86II::MO_NO_FLAG; } return X86II::MO_NO_FLAG; } /// This function returns the name of a function which has an interface like /// the non-standard bzero function, if such a function exists on the /// current subtarget and it is considered preferable over memset with zero /// passed as the second argument. Otherwise it returns null. const char *X86Subtarget::getBZeroEntry() const { // Darwin 10 has a __bzero entry point for this purpose. if (getTargetTriple().isMacOSX() && !getTargetTriple().isMacOSXVersionLT(10, 6)) return "__bzero"; return nullptr; } bool X86Subtarget::hasSinCos() const { if (getTargetTriple().isMacOSX()) { return !getTargetTriple().isMacOSXVersionLT(10, 9) && is64Bit(); } else if (getTargetTriple().isOSFuchsia()) { return true; } return false; } /// Return true if the subtarget allows calls to immediate address. bool X86Subtarget::isLegalToCallImmediateAddr() const { // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32 // but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does, // the following check for Win32 should be removed. if (In64BitMode || isTargetWin32()) return false; return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "generic"; // Make sure 64-bit features are available in 64-bit mode. (But make sure // SSE2 can be turned off explicitly.) std::string FullFS = FS; if (In64BitMode) { if (!FullFS.empty()) FullFS = "+64bit,+sse2," + FullFS; else FullFS = "+64bit,+sse2"; } // LAHF/SAHF are always supported in non-64-bit mode. if (!In64BitMode) { if (!FullFS.empty()) FullFS = "+sahf," + FullFS; else FullFS = "+sahf"; } // Parse features string and set the CPU. ParseSubtargetFeatures(CPUName, FullFS); // All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of // 16-bytes and under that are reasonably fast. These features were // introduced with Intel's Nehalem/Silvermont and AMD's Family10h // micro-architectures respectively. if (hasSSE42() || hasSSE4A()) IsUAMem16Slow = false; InstrItins = getInstrItineraryForCPU(CPUName); // It's important to keep the MCSubtargetInfo feature bits in sync with // target data structure which is shared with MC code emitter, etc. if (In64BitMode) ToggleFeature(X86::Mode64Bit); else if (In32BitMode) ToggleFeature(X86::Mode32Bit); else if (In16BitMode) ToggleFeature(X86::Mode16Bit); else llvm_unreachable("Not 16-bit, 32-bit or 64-bit mode!"); DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) stackAlignment = StackAlignOverride; else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetKFreeBSD() || In64BitMode) stackAlignment = 16; // Some CPUs have more overhead for gather. The specified overhead is relative // to the Load operation. "2" is the number provided by Intel architects. This // parameter is used for cost estimation of Gather Op and comparison with // other alternatives. // TODO: Remove the explicit hasAVX512()?, That would mean we would only // enable gather with a -march. if (hasAVX512() || (hasAVX2() && hasFastGather())) GatherOverhead = 2; if (hasAVX512()) ScatterOverhead = 2; } void X86Subtarget::initializeEnvironment() { X86SSELevel = NoSSE; X863DNowLevel = NoThreeDNow; HasX87 = false; HasCMov = false; HasX86_64 = false; HasPOPCNT = false; HasSSE4A = false; HasAES = false; HasVAES = false; HasFXSR = false; HasXSAVE = false; HasXSAVEOPT = false; HasXSAVEC = false; HasXSAVES = false; HasPCLMUL = false; HasVPCLMULQDQ = false; HasGFNI = false; HasFMA = false; HasFMA4 = false; HasXOP = false; HasTBM = false; HasLWP = false; HasMOVBE = false; HasRDRAND = false; HasF16C = false; HasFSGSBase = false; HasLZCNT = false; HasBMI = false; HasBMI2 = false; HasVBMI = false; HasVBMI2 = false; HasIFMA = false; HasRTM = false; HasERI = false; HasCDI = false; HasPFI = false; HasDQI = false; HasVPOPCNTDQ = false; HasBWI = false; HasVLX = false; HasADX = false; HasPKU = false; HasVNNI = false; HasBITALG = false; HasSHA = false; HasPRFCHW = false; HasRDSEED = false; HasLAHFSAHF = false; HasMWAITX = false; HasCLZERO = false; HasMPX = false; + HasSHSTK = false; + HasIBT = false; HasSGX = false; HasCLFLUSHOPT = false; HasCLWB = false; IsPMULLDSlow = false; IsSHLDSlow = false; IsUAMem16Slow = false; IsUAMem32Slow = false; HasSSEUnalignedMem = false; HasCmpxchg16b = false; UseLeaForSP = false; HasFastPartialYMMorZMMWrite = false; HasFastGather = false; HasFastScalarFSQRT = false; HasFastVectorFSQRT = false; HasFastLZCNT = false; HasFastSHLDRotate = false; HasMacroFusion = false; HasERMSB = false; HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; SlowTwoMemOps = false; LEAUsesAG = false; SlowLEA = false; Slow3OpsLEA = false; SlowIncDec = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; UseSoftFloat = false; X86ProcFamily = Others; GatherOverhead = 1024; ScatterOverhead = 1024; } X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); initSubtargetFeatures(CPU, FS); return *this; } X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, unsigned StackAlignOverride) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride), In64BitMode(TargetTriple.getArch() == Triple::x86_64), In32BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() != Triple::CODE16), In16BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() == Triple::CODE16), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) { // Determine the PICStyle based on the target selected. if (!isPositionIndependent()) setPICStyle(PICStyles::None); else if (is64Bit()) setPICStyle(PICStyles::RIPRel); else if (isTargetCOFF()) setPICStyle(PICStyles::None); else if (isTargetDarwin()) setPICStyle(PICStyles::StubPIC); else if (isTargetELF()) setPICStyle(PICStyles::GOT); CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering())); Legalizer.reset(new X86LegalizerInfo(*this, TM)); auto *RBI = new X86RegisterBankInfo(*getRegisterInfo()); RegBankInfo.reset(RBI); InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI)); } const CallLowering *X86Subtarget::getCallLowering() const { return CallLoweringInfo.get(); } const InstructionSelector *X86Subtarget::getInstructionSelector() const { return InstSelector.get(); } const LegalizerInfo *X86Subtarget::getLegalizerInfo() const { return Legalizer.get(); } const RegisterBankInfo *X86Subtarget::getRegBankInfo() const { return RegBankInfo.get(); } bool X86Subtarget::enableEarlyIfConversion() const { return hasCMov() && X86EarlyIfConv; } diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 740b9ddba094..be4d46c470de 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -1,708 +1,718 @@ //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file declares the X86 specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H #include "X86FrameLowering.h" #include "X86ISelLowering.h" #include "X86InstrInfo.h" #include "X86SelectionDAGInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include #define GET_SUBTARGETINFO_HEADER #include "X86GenSubtargetInfo.inc" namespace llvm { class GlobalValue; /// The X86 backend supports a number of different styles of PIC. /// namespace PICStyles { enum Style { StubPIC, // Used on i386-darwin in pic mode. GOT, // Used on 32 bit elf on when in pic mode. RIPRel, // Used on X86-64 when in pic mode. None // Set when not in pic mode. }; } // end namespace PICStyles class X86Subtarget final : public X86GenSubtargetInfo { public: enum X86ProcFamilyEnum { Others, IntelAtom, IntelSLM, IntelGLM, IntelHaswell, IntelBroadwell, IntelSkylake, IntelKNL, IntelSKX, IntelCannonlake, IntelIcelake, }; protected: enum X86SSEEnum { NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F }; enum X863DNowEnum { NoThreeDNow, MMX, ThreeDNow, ThreeDNowA }; /// X86 processor family: Intel Atom, and others X86ProcFamilyEnum X86ProcFamily; /// Which PIC style to use PICStyles::Style PICStyle; const TargetMachine &TM; /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. X86SSEEnum X86SSELevel; /// MMX, 3DNow, 3DNow Athlon, or none supported. X863DNowEnum X863DNowLevel; /// True if the processor supports X87 instructions. bool HasX87; /// True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; /// True if the processor supports X86-64 instructions. bool HasX86_64; /// True if the processor supports POPCNT. bool HasPOPCNT; /// True if the processor supports SSE4A instructions. bool HasSSE4A; /// Target has AES instructions bool HasAES; bool HasVAES; /// Target has FXSAVE/FXRESTOR instructions bool HasFXSR; /// Target has XSAVE instructions bool HasXSAVE; /// Target has XSAVEOPT instructions bool HasXSAVEOPT; /// Target has XSAVEC instructions bool HasXSAVEC; /// Target has XSAVES instructions bool HasXSAVES; /// Target has carry-less multiplication bool HasPCLMUL; bool HasVPCLMULQDQ; /// Target has Galois Field Arithmetic instructions bool HasGFNI; /// Target has 3-operand fused multiply-add bool HasFMA; /// Target has 4-operand fused multiply-add bool HasFMA4; /// Target has XOP instructions bool HasXOP; /// Target has TBM instructions. bool HasTBM; /// Target has LWP instructions bool HasLWP; /// True if the processor has the MOVBE instruction. bool HasMOVBE; /// True if the processor has the RDRAND instruction. bool HasRDRAND; /// Processor has 16-bit floating point conversion instructions. bool HasF16C; /// Processor has FS/GS base insturctions. bool HasFSGSBase; /// Processor has LZCNT instruction. bool HasLZCNT; /// Processor has BMI1 instructions. bool HasBMI; /// Processor has BMI2 instructions. bool HasBMI2; /// Processor has VBMI instructions. bool HasVBMI; /// Processor has VBMI2 instructions. bool HasVBMI2; /// Processor has Integer Fused Multiply Add bool HasIFMA; /// Processor has RTM instructions. bool HasRTM; /// Processor has ADX instructions. bool HasADX; /// Processor has SHA instructions. bool HasSHA; /// Processor has PRFCHW instructions. bool HasPRFCHW; /// Processor has RDSEED instructions. bool HasRDSEED; /// Processor has LAHF/SAHF instructions. bool HasLAHFSAHF; /// Processor has MONITORX/MWAITX instructions. bool HasMWAITX; /// Processor has Cache Line Zero instruction bool HasCLZERO; /// Processor has Prefetch with intent to Write instruction bool HasPFPREFETCHWT1; /// True if SHLD instructions are slow. bool IsSHLDSlow; /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and // PMULUDQ. bool IsPMULLDSlow; /// True if unaligned memory accesses of 16-bytes are slow. bool IsUAMem16Slow; /// True if unaligned memory accesses of 32-bytes are slow. bool IsUAMem32Slow; /// True if SSE operations can have unaligned memory operands. /// This may require setting a configuration bit in the processor. bool HasSSEUnalignedMem; /// True if this processor has the CMPXCHG16B instruction; /// this is true for most x86-64 chips, but not the first AMD chips. bool HasCmpxchg16b; /// True if the LEA instruction should be used for adjusting /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; /// True if there is no performance penalty to writing only the lower parts /// of a YMM or ZMM register without clearing the upper part. bool HasFastPartialYMMorZMMWrite; /// True if gather is reasonably fast. This is true for Skylake client and /// all AVX-512 CPUs. bool HasFastGather; /// True if hardware SQRTSS instruction is at least as fast (latency) as /// RSQRTSS followed by a Newton-Raphson iteration. bool HasFastScalarFSQRT; /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. bool HasFastVectorFSQRT; /// True if 8-bit divisions are significantly faster than /// 32-bit divisions and should be used when possible. bool HasSlowDivide32; /// True if 32-bit divides are significantly faster than /// 64-bit divisions and should be used when possible. bool HasSlowDivide64; /// True if LZCNT instruction is fast. bool HasFastLZCNT; /// True if SHLD based rotate is fast. bool HasFastSHLDRotate; /// True if the processor supports macrofusion. bool HasMacroFusion; /// True if the processor has enhanced REP MOVSB/STOSB. bool HasERMSB; /// True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; /// True if two memory operand instructions should use a temporary register /// instead. bool SlowTwoMemOps; /// True if the LEA instruction inputs have to be ready at address generation /// (AG) time. bool LEAUsesAG; /// True if the LEA instruction with certain arguments is slow bool SlowLEA; /// True if the LEA instruction has all three source operands: base, index, /// and offset or if the LEA instruction uses base and index registers where /// the base is EBP, RBP,or R13 bool Slow3OpsLEA; /// True if INC and DEC instructions are slow when writing to flags bool SlowIncDec; /// Processor has AVX-512 PreFetch Instructions bool HasPFI; /// Processor has AVX-512 Exponential and Reciprocal Instructions bool HasERI; /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; /// Processor has AVX-512 population count Instructions bool HasVPOPCNTDQ; /// Processor has AVX-512 Doubleword and Quadword instructions bool HasDQI; /// Processor has AVX-512 Byte and Word instructions bool HasBWI; /// Processor has AVX-512 Vector Length eXtenstions bool HasVLX; /// Processor has PKU extenstions bool HasPKU; /// Processor has AVX-512 Vector Neural Network Instructions bool HasVNNI; /// Processor has AVX-512 Bit Algorithms instructions bool HasBITALG; /// Processor supports MPX - Memory Protection Extensions bool HasMPX; + /// Processor supports CET SHSTK - Control-Flow Enforcement Technology + /// using Shadow Stack + bool HasSHSTK; + + /// Processor supports CET IBT - Control-Flow Enforcement Technology + /// using Indirect Branch Tracking + bool HasIBT; + /// Processor has Software Guard Extensions bool HasSGX; /// Processor supports Flush Cache Line instruction bool HasCLFLUSHOPT; /// Processor supports Cache Line Write Back instruction bool HasCLWB; /// Use software floating point for code generation. bool UseSoftFloat; /// The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. /// unsigned MaxInlineSizeThreshold; /// What processor and OS we're targeting. Triple TargetTriple; /// Instruction itineraries for scheduling InstrItineraryData InstrItins; /// GlobalISel related APIs. std::unique_ptr CallLoweringInfo; std::unique_ptr Legalizer; std::unique_ptr RegBankInfo; std::unique_ptr InstSelector; private: /// Override the stack alignment. unsigned StackAlignOverride; /// True if compiling for 64-bit, false for 16-bit or 32-bit. bool In64BitMode; /// True if compiling for 32-bit, false for 16-bit or 64-bit. bool In32BitMode; /// True if compiling for 16-bit, false for 32-bit or 64-bit. bool In16BitMode; /// Contains the Overhead of gather\scatter instructions int GatherOverhead; int ScatterOverhead; X86SelectionDAGInfo TSInfo; // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which // X86TargetLowering needs. X86InstrInfo InstrInfo; X86TargetLowering TLInfo; X86FrameLowering FrameLowering; public: /// This constructor initializes the data members to match that /// of the specified triple. /// X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, unsigned StackAlignOverride); const X86TargetLowering *getTargetLowering() const override { return &TLInfo; } const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } const X86FrameLowering *getFrameLowering() const override { return &FrameLowering; } const X86SelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } const X86RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } /// Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. unsigned getStackAlignment() const { return stackAlignment; } /// Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// Methods used by Global ISel const CallLowering *getCallLowering() const override; const InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; private: /// Initialize the full set of dependencies so we can use an initializer /// list for X86Subtarget. X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); void initializeEnvironment(); void initSubtargetFeatures(StringRef CPU, StringRef FS); public: /// Is this x86_64? (disregarding specific ABI / programming model) bool is64Bit() const { return In64BitMode; } bool is32Bit() const { return In32BitMode; } bool is16Bit() const { return In16BitMode; } /// Is this x86_64 with the ILP32 programming model (x32 ABI)? bool isTarget64BitILP32() const { return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32 || TargetTriple.isOSNaCl()); } /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? bool isTarget64BitLP64() const { return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 && !TargetTriple.isOSNaCl()); } PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } bool hasX87() const { return HasX87; } bool hasCMov() const { return HasCMov; } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } bool hasSSE3() const { return X86SSELevel >= SSE3; } bool hasSSSE3() const { return X86SSELevel >= SSSE3; } bool hasSSE41() const { return X86SSELevel >= SSE41; } bool hasSSE42() const { return X86SSELevel >= SSE42; } bool hasAVX() const { return X86SSELevel >= AVX; } bool hasAVX2() const { return X86SSELevel >= AVX2; } bool hasAVX512() const { return X86SSELevel >= AVX512F; } bool hasFp256() const { return hasAVX(); } bool hasInt256() const { return hasAVX2(); } bool hasSSE4A() const { return HasSSE4A; } bool hasMMX() const { return X863DNowLevel >= MMX; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } bool hasAES() const { return HasAES; } bool hasVAES() const { return HasVAES; } bool hasFXSR() const { return HasFXSR; } bool hasXSAVE() const { return HasXSAVE; } bool hasXSAVEOPT() const { return HasXSAVEOPT; } bool hasXSAVEC() const { return HasXSAVEC; } bool hasXSAVES() const { return HasXSAVES; } bool hasPCLMUL() const { return HasPCLMUL; } bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } bool hasGFNI() const { return HasGFNI; } // Prefer FMA4 to FMA - its better for commutation/memory folding and // has equal or better performance on all supported targets. bool hasFMA() const { return HasFMA; } bool hasFMA4() const { return HasFMA4; } bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasXOP() const { return HasXOP; } bool hasTBM() const { return HasTBM; } bool hasLWP() const { return HasLWP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } bool hasFSGSBase() const { return HasFSGSBase; } bool hasLZCNT() const { return HasLZCNT; } bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } bool hasVBMI() const { return HasVBMI; } bool hasVBMI2() const { return HasVBMI2; } bool hasIFMA() const { return HasIFMA; } bool hasRTM() const { return HasRTM; } bool hasADX() const { return HasADX; } bool hasSHA() const { return HasSHA; } bool hasPRFCHW() const { return HasPRFCHW; } bool hasRDSEED() const { return HasRDSEED; } bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } bool hasCLZERO() const { return HasCLZERO; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isPMULLDSlow() const { return IsPMULLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } int getGatherOverhead() const { return GatherOverhead; } int getScatterOverhead() const { return ScatterOverhead; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } bool hasFastPartialYMMorZMMWrite() const { return HasFastPartialYMMorZMMWrite; } bool hasFastGather() const { return HasFastGather; } bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; } bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } bool hasMacroFusion() const { return HasMacroFusion; } bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } bool slowTwoMemOps() const { return SlowTwoMemOps; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } bool slow3OpsLEA() const { return Slow3OpsLEA; } bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } bool hasPKU() const { return HasPKU; } bool hasVNNI() const { return HasVNNI; } bool hasBITALG() const { return HasBITALG; } bool hasMPX() const { return HasMPX; } + bool hasSHSTK() const { return HasSHSTK; } + bool hasIBT() const { return HasIBT; } bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } bool isXRaySupported() const override { return is64Bit(); } X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; } /// TODO: to be removed later and replaced with suitable properties bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for /// no-sse2). There isn't any reason to disable it if the target processor /// supports it. bool hasMFence() const { return hasSSE2() || is64Bit(); } const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } bool isTargetPS4() const { return TargetTriple.isPS4CPU(); } bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } bool isTargetWindowsMSVC() const { return TargetTriple.isWindowsMSVCEnvironment(); } bool isTargetKnownWindowsMSVC() const { return TargetTriple.isKnownWindowsMSVCEnvironment(); } bool isTargetWindowsCoreCLR() const { return TargetTriple.isWindowsCoreCLREnvironment(); } bool isTargetWindowsCygwin() const { return TargetTriple.isWindowsCygwinEnvironment(); } bool isTargetWindowsGNU() const { return TargetTriple.isWindowsGNUEnvironment(); } bool isTargetWindowsItanium() const { return TargetTriple.isWindowsItaniumEnvironment(); } bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } bool isOSWindows() const { return TargetTriple.isOSWindows(); } bool isTargetWin64() const { return In64BitMode && isOSWindows(); } bool isTargetWin32() const { return !In64BitMode && isOSWindows(); } bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } bool isPICStyleStubPIC() const { return PICStyle == PICStyles::StubPIC; } bool isPositionIndependent() const { return TM.isPositionIndependent(); } bool isCallingConvWin64(CallingConv::ID CC) const { switch (CC) { // On Win64, all these conventions just use the default convention. case CallingConv::C: case CallingConv::Fast: case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: case CallingConv::X86_VectorCall: case CallingConv::Intel_OCL_BI: return isTargetWin64(); // This convention allows using the Win64 convention on other targets. case CallingConv::Win64: return true; // This convention allows using the SysV convention on Windows targets. case CallingConv::X86_64_SysV: return false; // Otherwise, who knows what this is. default: return false; } } /// Classify a global variable reference for the current subtarget according /// to how we should reference it in a non-pcrel context. unsigned char classifyLocalReference(const GlobalValue *GV) const; unsigned char classifyGlobalReference(const GlobalValue *GV, const Module &M) const; unsigned char classifyGlobalReference(const GlobalValue *GV) const; /// Classify a global function reference for the current subtarget. unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, const Module &M) const; unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const; /// Classify a blockaddress reference for the current subtarget according to /// how we should reference it in a non-pcrel context. unsigned char classifyBlockAddressReference() const; /// Return true if the subtarget allows calls to immediate address. bool isLegalToCallImmediateAddr() const; /// This function returns the name of a function which has an interface /// like the non-standard bzero function, if such a function exists on /// the current subtarget and it is considered prefereable over /// memset with zero passed as the second argument. Otherwise it /// returns null. const char *getBZeroEntry() const; /// This function returns true if the target has sincos() routine in its /// compiler runtime or math libraries. bool hasSinCos() const; /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } // TODO: Update the regression tests and return true. bool supportPrintSchedInfo() const override { return false; } bool enableEarlyIfConversion() const override; /// Return the instruction itineraries based on the subtarget selection. const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; } AntiDepBreakMode getAntiDepBreakMode() const override { return TargetSubtargetInfo::ANTIDEP_CRITICAL; } bool enableAdvancedRASplitCost() const override { return true; } }; } // end namespace llvm #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H diff --git a/llvm/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir b/llvm/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir index 14bb5db5a51d..d9b117bd9c24 100644 --- a/llvm/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir +++ b/llvm/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir @@ -1,73 +1,73 @@ # RUN: llc -march=x86-64 -run-pass none -o - %s | FileCheck %s # This test ensures that the MIR parser parses the save and restore points in # the machine frame info correctly. --- | define i32 @foo(i32 %a, i32 %b) { entry: %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: store i32 %a, i32* %tmp, align 4 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) br label %false false: %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %entry ] ret i32 %tmp.0 } declare i32 @doSomething(i32, i32*) ... --- name: foo tracksRegLiveness: true liveins: - { reg: '%edi' } - { reg: '%esi' } # CHECK: frameInfo: # CHECK: savePoint: '%bb.2.true' # CHECK-NEXT: restorePoint: '%bb.2.true' # CHECK: stack frameInfo: maxAlignment: 4 hasCalls: true savePoint: '%bb.2.true' restorePoint: '%bb.2.true' stack: - { id: 0, name: tmp, offset: 0, size: 4, alignment: 4 } body: | bb.0: successors: %bb.2.true, %bb.1 liveins: %edi, %esi %eax = COPY %edi CMP32rr %eax, killed %esi, implicit-def %eflags JL_1 %bb.2.true, implicit killed %eflags bb.1: successors: %bb.3.false liveins: %eax JMP_1 %bb.3.false bb.2.true: successors: %bb.3.false liveins: %eax MOV32mr %stack.0.tmp, 1, _, 0, _, killed %eax - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def dead %eflags, implicit %rsp + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %ssp, implicit-def dead %eflags, implicit %rsp, implicit %ssp %rsi = LEA64r %stack.0.tmp, 1, _, 0, _ %edi = MOV32r0 implicit-def dead %eflags - CALL64pcrel32 @doSomething, csr_64, implicit %rsp, implicit %edi, implicit %rsi, implicit-def %rsp, implicit-def %eax - ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def dead %eflags, implicit %rsp + CALL64pcrel32 @doSomething, csr_64, implicit %rsp, implicit %ssp, implicit %edi, implicit %rsi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax + ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %ssp, implicit-def dead %eflags, implicit %rsp, implicit %ssp bb.3.false: liveins: %eax RETQ %eax ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll index 4cc2ee566a51..0b906e7a9859 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -1,796 +1,796 @@ ; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse2 -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 @a1_8bit = external global i8 @a7_8bit = external global i8 @a8_8bit = external global i8 define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) { ; ALL-LABEL: name: test_i8_args_8 ; X64: fixedStack: ; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, ; X64-NEXT: isImmutable: true, ; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, ; X64-NEXT: isImmutable: true, ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d ; X64: [[ARG1_TMP:%[0-9]+]]:_(s32) = COPY %edi ; X64: [[ARG1:%[0-9]+]]:_(s8) = G_TRUNC [[ARG1_TMP]](s32) ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %esi ; X64-NEXT: %{{[0-9]+}}:_(s8) = G_TRUNC %{{[0-9]+}}(s32) ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %edx ; X64-NEXT: %{{[0-9]+}}:_(s8) = G_TRUNC %{{[0-9]+}}(s32) ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %ecx ; X64-NEXT: %{{[0-9]+}}:_(s8) = G_TRUNC %{{[0-9]+}}(s32) ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %r8d ; X64-NEXT: %{{[0-9]+}}:_(s8) = G_TRUNC %{{[0-9]+}}(s32) ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %r9d ; X64-NEXT: %{{[0-9]+}}:_(s8) = G_TRUNC %{{[0-9]+}}(s32) ; X64-NEXT: [[ARG7_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X64-NEXT: [[ARG7:%[0-9]+]]:_(s8) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0) ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X64-NEXT: [[ARG8:%[0-9]+]]:_(s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: ; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 1, alignment: 4, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 1, alignment: 8, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 1, alignment: 4, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 1, alignment: 16, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 1, alignment: 4, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, ;X32-NEXT: isImmutable: true, ; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 1, alignment: 4, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, ; X32-NEXT: isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]]:_(s8) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] ; X32-NEXT: [[ARG2:%[0-9]+]]:_(s8) = G_LOAD [[ARG2_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK4]], align 0) ; X32-NEXT: [[ARG3_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X32-NEXT: [[ARG3:%[0-9]+]]:_(s8) = G_LOAD [[ARG3_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) ; X32-NEXT: [[ARG4_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]] ; X32-NEXT: [[ARG4:%[0-9]+]]:_(s8) = G_LOAD [[ARG4_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK12]], align 0) ; X32-NEXT: [[ARG5_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] ; X32-NEXT: [[ARG5:%[0-9]+]]:_(s8) = G_LOAD [[ARG5_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK16]], align 0) ; X32-NEXT: [[ARG6_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK20]] ; X32-NEXT: [[ARG6:%[0-9]+]]:_(s8) = G_LOAD [[ARG6_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK20]], align 0) ; X32-NEXT: [[ARG7_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK24]] ; X32-NEXT: [[ARG7:%[0-9]+]]:_(s8) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK24]], align 0) ; X32-NEXT: [[ARG8_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK28]] ; X32-NEXT: [[ARG8:%[0-9]+]]:_(s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK28]], align 0) ; ALL-NEXT: [[GADDR_A1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit ; ALL-NEXT: [[GADDR_A7:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit ; ALL-NEXT: [[GADDR_A8:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit ; ALL-NEXT: G_STORE [[ARG1]](s8), [[GADDR_A1]](p0) :: (store 1 into @a1_8bit) ; ALL-NEXT: G_STORE [[ARG7]](s8), [[GADDR_A7]](p0) :: (store 1 into @a7_8bit) ; ALL-NEXT: G_STORE [[ARG8]](s8), [[GADDR_A8]](p0) :: (store 1 into @a8_8bit) ; ALL-NEXT: %al = COPY [[ARG1]](s8) ; ALL-NEXT: RET 0, implicit %al entry: store i8 %arg1, i8* @a1_8bit store i8 %arg7, i8* @a7_8bit store i8 %arg8, i8* @a8_8bit ret i8 %arg1 } @a1_32bit = external global i32 @a7_32bit = external global i32 @a8_32bit = external global i32 define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) { ; ALL-LABEL: name: test_i32_args_8 ; X64: fixedStack: ; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, ; X64-NEXT: isImmutable: true, ; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, ; X64-NEXT: isImmutable: true, ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d ; X64: [[ARG1:%[0-9]+]]:_(s32) = COPY %edi ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %esi ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %edx ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %ecx ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %r8d ; X64-NEXT: %{{[0-9]+}}:_(s32) = COPY %r9d ; X64-NEXT: [[ARG7_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X64-NEXT: [[ARG7:%[0-9]+]]:_(s32) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X64-NEXT: [[ARG8:%[0-9]+]]:_(s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: ; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16 ; X32-NEXT: isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]]:_(s32) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] ; X32-NEXT: [[ARG2:%[0-9]+]]:_(s32) = G_LOAD [[ARG2_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0) ; X32-NEXT: [[ARG3_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X32-NEXT: [[ARG3:%[0-9]+]]:_(s32) = G_LOAD [[ARG3_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0) ; X32-NEXT: [[ARG4_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]] ; X32-NEXT: [[ARG4:%[0-9]+]]:_(s32) = G_LOAD [[ARG4_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK12]], align 0) ; X32-NEXT: [[ARG5_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] ; X32-NEXT: [[ARG5:%[0-9]+]]:_(s32) = G_LOAD [[ARG5_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK16]], align 0) ; X32-NEXT: [[ARG6_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK20]] ; X32-NEXT: [[ARG6:%[0-9]+]]:_(s32) = G_LOAD [[ARG6_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK20]], align 0) ; X32-NEXT: [[ARG7_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK24]] ; X32-NEXT: [[ARG7:%[0-9]+]]:_(s32) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK24]], align 0) ; X32-NEXT: [[ARG8_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK28]] ; X32-NEXT: [[ARG8:%[0-9]+]]:_(s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK28]], align 0) ; ALL-NEXT: [[GADDR_A1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit ; ALL-NEXT: [[GADDR_A7:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit ; ALL-NEXT: [[GADDR_A8:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit ; ALL-NEXT: G_STORE [[ARG1]](s32), [[GADDR_A1]](p0) :: (store 4 into @a1_32bit) ; ALL-NEXT: G_STORE [[ARG7]](s32), [[GADDR_A7]](p0) :: (store 4 into @a7_32bit) ; ALL-NEXT: G_STORE [[ARG8]](s32), [[GADDR_A8]](p0) :: (store 4 into @a8_32bit) ; ALL-NEXT: %eax = COPY [[ARG1]](s32) ; ALL-NEXT: RET 0, implicit %eax entry: store i32 %arg1, i32* @a1_32bit store i32 %arg7, i32* @a7_32bit store i32 %arg8, i32* @a8_32bit ret i32 %arg1 } @a1_64bit = external global i64 @a7_64bit = external global i64 @a8_64bit = external global i64 define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8) { ; ALL-LABEL: name: test_i64_args_8 ; X64: fixedStack: ; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, ; X64-NEXT: isImmutable: true, ; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, ; X64-NEXT: isImmutable: true, ; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9 ; X64: [[ARG1:%[0-9]+]]:_(s64) = COPY %rdi ; X64-NEXT: %{{[0-9]+}}:_(s64) = COPY %rsi ; X64-NEXT: %{{[0-9]+}}:_(s64) = COPY %rdx ; X64-NEXT: %{{[0-9]+}}:_(s64) = COPY %rcx ; X64-NEXT: %{{[0-9]+}}:_(s64) = COPY %r8 ; X64-NEXT: %{{[0-9]+}}:_(s64) = COPY %r9 ; X64-NEXT: [[ARG7_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X64-NEXT: [[ARG7:%[0-9]+]]:_(s64) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X64-NEXT: [[ARG8:%[0-9]+]]:_(s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: ; X32: id: [[STACK60:[0-9]+]], type: default, offset: 60, size: 4, alignment: 4, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK56:[0-9]+]], type: default, offset: 56, size: 4, alignment: 8, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK52:[0-9]+]], type: default, offset: 52, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK48:[0-9]+]], type: default, offset: 48, size: 4, alignment: 16 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK44:[0-9]+]], type: default, offset: 44, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK40:[0-9]+]], type: default, offset: 40, size: 4, alignment: 8 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK36:[0-9]+]], type: default, offset: 36, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK32:[0-9]+]], type: default, offset: 32, size: 4, alignment: 16 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4 ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16 ; X32-NEXT: isImmutable: true, ; X32: [[ARG1L_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1L:%[0-9]+]]:_(s32) = G_LOAD [[ARG1L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG1H_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] ; X32-NEXT: [[ARG1H:%[0-9]+]]:_(s32) = G_LOAD [[ARG1H_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK12]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK16]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK20]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK20]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK24]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK24]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK28]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK28]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK32]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK32]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK36]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK36]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK40]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK40]], align 0) ; X32-NEXT: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK44]] ; X32-NEXT: %{{[0-9]+}}:_(s32) = G_LOAD %{{[0-9]+}}(p0) :: (invariant load 4 from %fixed-stack.[[STACK44]], align 0) ; X32-NEXT: [[ARG7L_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK48]] ; X32-NEXT: [[ARG7L:%[0-9]+]]:_(s32) = G_LOAD [[ARG7L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK48]], align 0) ; X32-NEXT: [[ARG7H_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK52]] ; X32-NEXT: [[ARG7H:%[0-9]+]]:_(s32) = G_LOAD [[ARG7H_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK52]], align 0) ; X32-NEXT: [[ARG8L_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK56]] ; X32-NEXT: [[ARG8L:%[0-9]+]]:_(s32) = G_LOAD [[ARG8L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK56]], align 0) ; X32-NEXT: [[ARG8H_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK60]] ; X32-NEXT: [[ARG8H:%[0-9]+]]:_(s32) = G_LOAD [[ARG8H_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK60]], align 0) ; X32-NEXT: [[ARG1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARG1L]](s32), [[ARG1H]](s32) ; ... a bunch more that we don't track ... ; X32-NEXT: G_MERGE_VALUES ; X32-NEXT: G_MERGE_VALUES ; X32-NEXT: G_MERGE_VALUES ; X32-NEXT: G_MERGE_VALUES ; X32-NEXT: G_MERGE_VALUES ; X32-NEXT: [[ARG7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARG7L]](s32), [[ARG7H]](s32) ; X32-NEXT: [[ARG8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARG8L]](s32), [[ARG8H]](s32) ; ALL-NEXT: [[GADDR_A1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit ; ALL-NEXT: [[GADDR_A7:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit ; ALL-NEXT: [[GADDR_A8:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit ; ALL-NEXT: G_STORE [[ARG1]](s64), [[GADDR_A1]](p0) :: (store 8 into @a1_64bit ; ALL-NEXT: G_STORE [[ARG7]](s64), [[GADDR_A7]](p0) :: (store 8 into @a7_64bit ; ALL-NEXT: G_STORE [[ARG8]](s64), [[GADDR_A8]](p0) :: (store 8 into @a8_64bit ; X64-NEXT: %rax = COPY [[ARG1]](s64) ; X64-NEXT: RET 0, implicit %rax ; X32-NEXT: [[RETL:%[0-9]+]]:_(s32), [[RETH:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](s64) ; X32-NEXT: %eax = COPY [[RETL:%[0-9]+]](s32) ; X32-NEXT: %edx = COPY [[RETH:%[0-9]+]](s32) ; X32-NEXT: RET 0, implicit %eax, implicit %edx entry: store i64 %arg1, i64* @a1_64bit store i64 %arg7, i64* @a7_64bit store i64 %arg8, i64* @a8_64bit ret i64 %arg1 } define float @test_float_args(float %arg1, float %arg2) { ; ALL-LABEL:name: test_float_args ; X64: liveins: %xmm0, %xmm1 ; X64: [[ARG1:%[0-9]+]]:_(s32) = COPY %xmm0 ; X64-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %xmm1 ; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](s32) ; X64-NEXT: RET 0, implicit %xmm0 ; X32: fixedStack: ; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16 ; X32-NEXT: isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]]:_(s32) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] ; X32-NEXT: [[ARG2:%[0-9]+]]:_(s32) = G_LOAD [[ARG2_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0) ; X32-NEXT: %fp0 = COPY [[ARG2:%[0-9]+]](s32) ; X32-NEXT: RET 0, implicit %fp0 ret float %arg2 } define double @test_double_args(double %arg1, double %arg2) { ; ALL-LABEL:name: test_double_args ; X64: liveins: %xmm0, %xmm1 ; X64: [[ARG1:%[0-9]+]]:_(s64) = COPY %xmm0 ; X64-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %xmm1 ; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](s64) ; X64-NEXT: RET 0, implicit %xmm0 ; X32: fixedStack: ; X32: id: [[STACK4:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, ; X32-NEXT: isImmutable: true, ; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, ; X32-NEXT: isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]]:_(s64) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] ; X32-NEXT: [[ARG2:%[0-9]+]]:_(s64) = G_LOAD [[ARG2_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK4]], align 0) ; X32-NEXT: %fp0 = COPY [[ARG2:%[0-9]+]](s64) ; X32-NEXT: RET 0, implicit %fp0 ret double %arg2 } define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) { ; ALL: name: test_v4i32_args ; ALL: liveins: %xmm0, %xmm1 ; ALL: [[ARG1:%[0-9]+]]:_(<4 x s32>) = COPY %xmm0 ; ALL-NEXT: [[ARG2:%[0-9]+]]:_(<4 x s32>) = COPY %xmm1 ; ALL-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>) ; ALL-NEXT: RET 0, implicit %xmm0 ret <4 x i32> %arg2 } define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) { ; ALL: name: test_v8i32_args ; ALL: liveins: %xmm0, %xmm1 ; ALL: [[ARG1L:%[0-9]+]]:_(<4 x s32>) = COPY %xmm0 ; ALL-NEXT: [[ARG1H:%[0-9]+]]:_(<4 x s32>) = COPY %xmm1 ; ALL-NEXT: [[ARG1:%[0-9]+]]:_(<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>) ; ALL-NEXT: [[RETL:%[0-9]+]]:_(<4 x s32>), [[RETH:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>) ; ALL-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>) ; ALL-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>) ; ALL-NEXT: RET 0, implicit %xmm0, implicit %xmm1 ret <8 x i32> %arg1 } define void @test_void_return() { ; ALL-LABEL: name: test_void_return ; ALL: bb.1.entry: ; ALL-NEXT: RET 0 entry: ret void } define i32 * @test_memop_i32(i32 * %p1) { ; ALL-LABEL:name: test_memop_i32 ;X64 liveins: %rdi ;X64: %0:_(p0) = COPY %rdi ;X64-NEXT: %rax = COPY %0(p0) ;X64-NEXT: RET 0, implicit %rax ;X32: fixedStack: ;X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, ;X32-NEXT: isImmutable: true, ;X32: %1:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ;X32-NEXT: %0:_(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ;X32-NEXT: %eax = COPY %0(p0) ;X32-NEXT: RET 0, implicit %eax ret i32 * %p1; } declare void @trivial_callee() define void @test_trivial_call() { ; ALL-LABEL: name: test_trivial_call -; X32: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: CALLpcrel32 @trivial_callee, csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: RET 0 -; X64: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: CALL64pcrel32 @trivial_callee, csr_64, implicit %rsp -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: RET 0 call void @trivial_callee() ret void } declare void @simple_arg_callee(i32 %in0, i32 %in1) define void @test_simple_arg(i32 %in0, i32 %in1) { ; ALL-LABEL: name: test_simple_arg ; X32: fixedStack: ; X32: - { id: 0, type: default, offset: 4, size: 4, alignment: 4, ; X32-NEXT: isImmutable: true, ; X32: - { id: 1, type: default, offset: 0, size: 4, alignment: 16, ; X32-NEXT: isImmutable: true, ; X32: body: | ; X32-NEXT: bb.1 (%ir-block.0): ; X32-NEXT: %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 ; X32-NEXT: %0:_(s32) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 0) ; X32-NEXT: %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X32-NEXT: %1:_(s32) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0) -; X32-NEXT: ADJCALLSTACKDOWN32 8, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKDOWN32 8, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %4:_(p0) = COPY %esp ; X32-NEXT: %5:_(s32) = G_CONSTANT i32 0 ; X32-NEXT: %6:_(p0) = G_GEP %4, %5(s32) ; X32-NEXT: G_STORE %1(s32), %6(p0) :: (store 4 into stack, align 0) ; X32-NEXT: %7:_(p0) = COPY %esp ; X32-NEXT: %8:_(s32) = G_CONSTANT i32 4 ; X32-NEXT: %9:_(p0) = G_GEP %7, %8(s32) ; X32-NEXT: G_STORE %0(s32), %9(p0) :: (store 4 into stack + 4, align 0) ; X32-NEXT: CALLpcrel32 @simple_arg_callee, csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 8, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 8, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: RET 0 ; X64: %0:_(s32) = COPY %edi ; X64-NEXT: %1:_(s32) = COPY %esi -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %edi = COPY %1(s32) ; X64-NEXT: %esi = COPY %0(s32) -; X64-NEXT: CALL64pcrel32 @simple_arg_callee, csr_64, implicit %rsp, implicit %edi, implicit %esi -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: CALL64pcrel32 @simple_arg_callee, csr_64, implicit %rsp, implicit %ssp, implicit %edi, implicit %esi +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: RET 0 call void @simple_arg_callee(i32 %in1, i32 %in0) ret void } declare void @simple_arg8_callee(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) define void @test_simple_arg8_call(i32 %in0) { ; ALL-LABEL: name: test_simple_arg8_call ; X32: fixedStack: ; X32: - { id: 0, type: default, offset: 0, size: 4, alignment: 16, ; X32-NEXT: isImmutable: true, ; X32: body: | ; X32-NEXT: bb.1 (%ir-block.0): ; X32-NEXT: %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X32-NEXT: %0:_(s32) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 0) -; X32-NEXT: ADJCALLSTACKDOWN32 32, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKDOWN32 32, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %2:_(p0) = COPY %esp ; X32-NEXT: %3:_(s32) = G_CONSTANT i32 0 ; X32-NEXT: %4:_(p0) = G_GEP %2, %3(s32) ; X32-NEXT: G_STORE %0(s32), %4(p0) :: (store 4 into stack, align 0) ; X32-NEXT: %5:_(p0) = COPY %esp ; X32-NEXT: %6:_(s32) = G_CONSTANT i32 4 ; X32-NEXT: %7:_(p0) = G_GEP %5, %6(s32) ; X32-NEXT: G_STORE %0(s32), %7(p0) :: (store 4 into stack + 4, align 0) ; X32-NEXT: %8:_(p0) = COPY %esp ; X32-NEXT: %9:_(s32) = G_CONSTANT i32 8 ; X32-NEXT: %10:_(p0) = G_GEP %8, %9(s32) ; X32-NEXT: G_STORE %0(s32), %10(p0) :: (store 4 into stack + 8, align 0) ; X32-NEXT: %11:_(p0) = COPY %esp ; X32-NEXT: %12:_(s32) = G_CONSTANT i32 12 ; X32-NEXT: %13:_(p0) = G_GEP %11, %12(s32) ; X32-NEXT: G_STORE %0(s32), %13(p0) :: (store 4 into stack + 12, align 0) ; X32-NEXT: %14:_(p0) = COPY %esp ; X32-NEXT: %15:_(s32) = G_CONSTANT i32 16 ; X32-NEXT: %16:_(p0) = G_GEP %14, %15(s32) ; X32-NEXT: G_STORE %0(s32), %16(p0) :: (store 4 into stack + 16, align 0) ; X32-NEXT: %17:_(p0) = COPY %esp ; X32-NEXT: %18:_(s32) = G_CONSTANT i32 20 ; X32-NEXT: %19:_(p0) = G_GEP %17, %18(s32) ; X32-NEXT: G_STORE %0(s32), %19(p0) :: (store 4 into stack + 20, align 0) ; X32-NEXT: %20:_(p0) = COPY %esp ; X32-NEXT: %21:_(s32) = G_CONSTANT i32 24 ; X32-NEXT: %22:_(p0) = G_GEP %20, %21(s32) ; X32-NEXT: G_STORE %0(s32), %22(p0) :: (store 4 into stack + 24, align 0) ; X32-NEXT: %23:_(p0) = COPY %esp ; X32-NEXT: %24:_(s32) = G_CONSTANT i32 28 ; X32-NEXT: %25:_(p0) = G_GEP %23, %24(s32) ; X32-NEXT: G_STORE %0(s32), %25(p0) :: (store 4 into stack + 28, align 0) ; X32-NEXT: CALLpcrel32 @simple_arg8_callee, csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 32, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 32, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: RET 0 ; X64: %0:_(s32) = COPY %edi -; X64-NEXT: ADJCALLSTACKDOWN64 16, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKDOWN64 16, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %edi = COPY %0(s32) ; X64-NEXT: %esi = COPY %0(s32) ; X64-NEXT: %edx = COPY %0(s32) ; X64-NEXT: %ecx = COPY %0(s32) ; X64-NEXT: %r8d = COPY %0(s32) ; X64-NEXT: %r9d = COPY %0(s32) ; X64-NEXT: %1:_(p0) = COPY %rsp ; X64-NEXT: %2:_(s64) = G_CONSTANT i64 0 ; X64-NEXT: %3:_(p0) = G_GEP %1, %2(s64) ; X64-NEXT: G_STORE %0(s32), %3(p0) :: (store 4 into stack, align 0) ; X64-NEXT: %4:_(p0) = COPY %rsp ; X64-NEXT: %5:_(s64) = G_CONSTANT i64 8 ; X64-NEXT: %6:_(p0) = G_GEP %4, %5(s64) ; X64-NEXT: G_STORE %0(s32), %6(p0) :: (store 4 into stack + 8, align 0) -; X64-NEXT: CALL64pcrel32 @simple_arg8_callee, csr_64, implicit %rsp, implicit %edi, implicit %esi, implicit %edx, implicit %ecx, implicit %r8d, implicit %r9d -; X64-NEXT: ADJCALLSTACKUP64 16, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: CALL64pcrel32 @simple_arg8_callee, csr_64, implicit %rsp, implicit %ssp, implicit %edi, implicit %esi, implicit %edx, implicit %ecx, implicit %r8d, implicit %r9d +; X64-NEXT: ADJCALLSTACKUP64 16, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: RET 0 call void @simple_arg8_callee(i32 %in0, i32 %in0, i32 %in0, i32 %in0,i32 %in0, i32 %in0, i32 %in0, i32 %in0) ret void } declare i32 @simple_return_callee(i32 %in0) define i32 @test_simple_return_callee() { ; ALL-LABEL: name: test_simple_return_callee ; X32: %1:_(s32) = G_CONSTANT i32 5 -; X32-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %2:_(p0) = COPY %esp ; X32-NEXT: %3:_(s32) = G_CONSTANT i32 0 ; X32-NEXT: %4:_(p0) = G_GEP %2, %3(s32) ; X32-NEXT: G_STORE %1(s32), %4(p0) :: (store 4 into stack, align 0) -; X32-NEXT: CALLpcrel32 @simple_return_callee, csr_32, implicit %esp, implicit-def %eax +; X32-NEXT: CALLpcrel32 @simple_return_callee, csr_32, implicit %esp, implicit %ssp, implicit-def %eax ; X32-NEXT: %0:_(s32) = COPY %eax -; X32-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %5:_(s32) = G_ADD %0, %0 ; X32-NEXT: %eax = COPY %5(s32) ; X32-NEXT: RET 0, implicit %eax ; X64: %1:_(s32) = G_CONSTANT i32 5 -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %edi = COPY %1(s32) -; X64-NEXT: CALL64pcrel32 @simple_return_callee, csr_64, implicit %rsp, implicit %edi, implicit-def %eax +; X64-NEXT: CALL64pcrel32 @simple_return_callee, csr_64, implicit %rsp, implicit %ssp, implicit %edi, implicit-def %eax ; X64-NEXT: %0:_(s32) = COPY %eax -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %2:_(s32) = G_ADD %0, %0 ; X64-NEXT: %eax = COPY %2(s32) ; X64-NEXT: RET 0, implicit %eax %call = call i32 @simple_return_callee(i32 5) %r = add i32 %call, %call ret i32 %r } declare <8 x i32> @split_return_callee(<8 x i32> %in0) define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) { ; ALL-LABEL: name: test_split_return_callee ; X32: fixedStack: ; X32-NEXT: - { id: 0, type: default, offset: 0, size: 16, alignment: 16, ; X32-NEXT: isImmutable: true, ; X32: %2:_(<4 x s32>) = COPY %xmm0 ; X32-NEXT: %3:_(<4 x s32>) = COPY %xmm1 ; X32-NEXT: %4:_(<4 x s32>) = COPY %xmm2 ; X32-NEXT: %6:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X32-NEXT: %5:_(<4 x s32>) = G_LOAD %6(p0) :: (invariant load 16 from %fixed-stack.0, align 0) ; X32-NEXT: %0:_(<8 x s32>) = G_MERGE_VALUES %2(<4 x s32>), %3(<4 x s32>) ; X32-NEXT: %1:_(<8 x s32>) = G_MERGE_VALUES %4(<4 x s32>), %5(<4 x s32>) -; X32-NEXT: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %8:_(<4 x s32>), %9:_(<4 x s32>) = G_UNMERGE_VALUES %1(<8 x s32>) ; X32-NEXT: %xmm0 = COPY %8(<4 x s32>) ; X32-NEXT: %xmm1 = COPY %9(<4 x s32>) -; X32-NEXT: CALLpcrel32 @split_return_callee, csr_32, implicit %esp, implicit %xmm0, implicit %xmm1, implicit-def %xmm0, implicit-def %xmm1 +; X32-NEXT: CALLpcrel32 @split_return_callee, csr_32, implicit %esp, implicit %ssp, implicit %xmm0, implicit %xmm1, implicit-def %xmm0, implicit-def %xmm1 ; X32-NEXT: %10:_(<4 x s32>) = COPY %xmm0 ; X32-NEXT: %11:_(<4 x s32>) = COPY %xmm1 ; X32-NEXT: %7:_(<8 x s32>) = G_MERGE_VALUES %10(<4 x s32>), %11(<4 x s32>) -; X32-NEXT: ADJCALLSTACKUP32 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %12:_(<8 x s32>) = G_ADD %0, %7 ; X32-NEXT: %13:_(<4 x s32>), %14:_(<4 x s32>) = G_UNMERGE_VALUES %12(<8 x s32>) ; X32-NEXT: %xmm0 = COPY %13(<4 x s32>) ; X32-NEXT: %xmm1 = COPY %14(<4 x s32>) ; X32-NEXT: RET 0, implicit %xmm0, implicit %xmm1 ; X64: %2:_(<4 x s32>) = COPY %xmm0 ; X64-NEXT: %3:_(<4 x s32>) = COPY %xmm1 ; X64-NEXT: %4:_(<4 x s32>) = COPY %xmm2 ; X64-NEXT: %5:_(<4 x s32>) = COPY %xmm3 ; X64-NEXT: %0:_(<8 x s32>) = G_MERGE_VALUES %2(<4 x s32>), %3(<4 x s32>) ; X64-NEXT: %1:_(<8 x s32>) = G_MERGE_VALUES %4(<4 x s32>), %5(<4 x s32>) -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %7:_(<4 x s32>), %8:_(<4 x s32>) = G_UNMERGE_VALUES %1(<8 x s32>) ; X64-NEXT: %xmm0 = COPY %7(<4 x s32>) ; X64-NEXT: %xmm1 = COPY %8(<4 x s32>) -; X64-NEXT: CALL64pcrel32 @split_return_callee, csr_64, implicit %rsp, implicit %xmm0, implicit %xmm1, implicit-def %xmm0, implicit-def %xmm1 +; X64-NEXT: CALL64pcrel32 @split_return_callee, csr_64, implicit %rsp, implicit %ssp, implicit %xmm0, implicit %xmm1, implicit-def %xmm0, implicit-def %xmm1 ; X64-NEXT: %9:_(<4 x s32>) = COPY %xmm0 ; X64-NEXT: %10:_(<4 x s32>) = COPY %xmm1 ; X64-NEXT: %6:_(<8 x s32>) = G_MERGE_VALUES %9(<4 x s32>), %10(<4 x s32>) -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %11:_(<8 x s32>) = G_ADD %0, %6 ; X64-NEXT: %12:_(<4 x s32>), %13:_(<4 x s32>) = G_UNMERGE_VALUES %11(<8 x s32>) ; X64-NEXT: %xmm0 = COPY %12(<4 x s32>) ; X64-NEXT: %xmm1 = COPY %13(<4 x s32>) ; X64-NEXT: RET 0, implicit %xmm0, implicit %xmm1 %call = call <8 x i32> @split_return_callee(<8 x i32> %arg2) %r = add <8 x i32> %arg1, %call ret <8 x i32> %r } define void @test_indirect_call(void()* %func) { ; ALL-LABEL: name: test_indirect_call ; X32: registers: ; X32-NEXT: - { id: 0, class: gr32, preferred-register: '' } ; X32-NEXT: - { id: 1, class: _, preferred-register: '' } ; X32: %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X32-NEXT: %0:gr32(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 0) -; X32-NEXT: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: CALL32r %0(p0), csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: RET 0 ; X64: registers: ; X64-NEXT: - { id: 0, class: gr64, preferred-register: '' } ; X64: %0:gr64(p0) = COPY %rdi -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: CALL64r %0(p0), csr_64, implicit %rsp -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: RET 0 call void %func() ret void } declare void @take_char(i8) define void @test_abi_exts_call(i8* %addr) { ; ALL-LABEL: name: test_abi_exts_call ; X32: fixedStack: ; X32-NEXT: - { id: 0, type: default, offset: 0, size: 4, alignment: 16, ; X32-NEXT: isImmutable: true, ; X32: %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X32-NEXT: %0:_(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 0) ; X32-NEXT: %2:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) -; X32-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %3:_(p0) = COPY %esp ; X32-NEXT: %4:_(s32) = G_CONSTANT i32 0 ; X32-NEXT: %5:_(p0) = G_GEP %3, %4(s32) ; X32-NEXT: %6:_(s32) = G_ANYEXT %2(s8) ; X32-NEXT: G_STORE %6(s32), %5(p0) :: (store 4 into stack, align 0) ; X32-NEXT: CALLpcrel32 @take_char, csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def %esp, implicit-def %eflags, implicit %esp -; X32-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp +; X32-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %7:_(p0) = COPY %esp ; X32-NEXT: %8:_(s32) = G_CONSTANT i32 0 ; X32-NEXT: %9:_(p0) = G_GEP %7, %8(s32) ; X32-NEXT: %10:_(s32) = G_SEXT %2(s8) ; X32-NEXT: G_STORE %10(s32), %9(p0) :: (store 4 into stack, align 0) ; X32-NEXT: CALLpcrel32 @take_char, csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def %esp, implicit-def %eflags, implicit %esp -; X32-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp +; X32-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %11:_(p0) = COPY %esp ; X32-NEXT: %12:_(s32) = G_CONSTANT i32 0 ; X32-NEXT: %13:_(p0) = G_GEP %11, %12(s32) ; X32-NEXT: %14:_(s32) = G_ZEXT %2(s8) ; X32-NEXT: G_STORE %14(s32), %13(p0) :: (store 4 into stack, align 0) ; X32-NEXT: CALLpcrel32 @take_char, csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: RET 0 ; X64: %0:_(p0) = COPY %rdi ; X64-NEXT: %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %2:_(s32) = G_ANYEXT %1(s8) ; X64-NEXT: %edi = COPY %2(s32) -; X64-NEXT: CALL64pcrel32 @take_char, csr_64, implicit %rsp, implicit %edi -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: CALL64pcrel32 @take_char, csr_64, implicit %rsp, implicit %ssp, implicit %edi +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %3:_(s32) = G_SEXT %1(s8) ; X64-NEXT: %edi = COPY %3(s32) -; X64-NEXT: CALL64pcrel32 @take_char, csr_64, implicit %rsp, implicit %edi -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: CALL64pcrel32 @take_char, csr_64, implicit %rsp, implicit %ssp, implicit %edi +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %4:_(s32) = G_ZEXT %1(s8) ; X64-NEXT: %edi = COPY %4(s32) -; X64-NEXT: CALL64pcrel32 @take_char, csr_64, implicit %rsp, implicit %edi -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: CALL64pcrel32 @take_char, csr_64, implicit %rsp, implicit %ssp, implicit %edi +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: RET 0 %val = load i8, i8* %addr call void @take_char(i8 %val) call void @take_char(i8 signext %val) call void @take_char(i8 zeroext %val) ret void } declare void @variadic_callee(i8*, ...) define void @test_variadic_call_1(i8** %addr_ptr, i32* %val_ptr) { ; ALL-LABEL: name: test_variadic_call_1 ; X32: fixedStack: ; X32-NEXT: - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: 0, ; X32-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true } ; X32-NEXT: - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: 0, ; X32-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true } ; X32: %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 ; X32-NEXT: %0:_(p0) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 0) ; X32-NEXT: %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X32-NEXT: %1:_(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0) ; X32-NEXT: %4:_(p0) = G_LOAD %0(p0) :: (load 4 from %ir.addr_ptr) ; X32-NEXT: %5:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.val_ptr) -; X32-NEXT: ADJCALLSTACKDOWN32 8, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKDOWN32 8, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %6:_(p0) = COPY %esp ; X32-NEXT: %7:_(s32) = G_CONSTANT i32 0 ; X32-NEXT: %8:_(p0) = G_GEP %6, %7(s32) ; X32-NEXT: G_STORE %4(p0), %8(p0) :: (store 4 into stack, align 0) ; X32-NEXT: %9:_(p0) = COPY %esp ; X32-NEXT: %10:_(s32) = G_CONSTANT i32 4 ; X32-NEXT: %11:_(p0) = G_GEP %9, %10(s32) ; X32-NEXT: G_STORE %5(s32), %11(p0) :: (store 4 into stack + 4, align 0) ; X32-NEXT: CALLpcrel32 @variadic_callee, csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 8, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 8, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: RET 0 ; X64: %0:_(p0) = COPY %rdi ; X64-NEXT: %1:_(p0) = COPY %rsi ; X64-NEXT: %2:_(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr_ptr) ; X64-NEXT: %3:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.val_ptr) -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %rdi = COPY %2(p0) ; X64-NEXT: %esi = COPY %3(s32) ; X64-NEXT: %al = MOV8ri 0 -; X64-NEXT: CALL64pcrel32 @variadic_callee, csr_64, implicit %rsp, implicit %rdi, implicit %esi, implicit %al -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: CALL64pcrel32 @variadic_callee, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit %esi, implicit %al +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: RET 0 %addr = load i8*, i8** %addr_ptr %val = load i32, i32* %val_ptr call void (i8*, ...) @variadic_callee(i8* %addr, i32 %val) ret void } define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) { ; ALL-LABEL: name: test_variadic_call_2 ; X32: fixedStack: ; X32-NEXT: - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: 0, ; X32-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true } ; X32-NEXT: - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: 0, ; X32-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true } ; X32: %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 ; X32-NEXT: %0:_(p0) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 0) ; X32-NEXT: %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X32-NEXT: %1:_(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0) ; X32-NEXT: %4:_(p0) = G_LOAD %0(p0) :: (load 4 from %ir.addr_ptr) ; X32-NEXT: %5:_(s64) = G_LOAD %1(p0) :: (load 8 from %ir.val_ptr, align 4) -; X32-NEXT: ADJCALLSTACKDOWN32 12, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKDOWN32 12, 0, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: %6:_(p0) = COPY %esp ; X32-NEXT: %7:_(s32) = G_CONSTANT i32 0 ; X32-NEXT: %8:_(p0) = G_GEP %6, %7(s32) ; X32-NEXT: G_STORE %4(p0), %8(p0) :: (store 4 into stack, align 0) ; X32-NEXT: %9:_(p0) = COPY %esp ; X32-NEXT: %10:_(s32) = G_CONSTANT i32 4 ; X32-NEXT: %11:_(p0) = G_GEP %9, %10(s32) ; X32-NEXT: G_STORE %5(s64), %11(p0) :: (store 8 into stack + 4, align 0) ; X32-NEXT: CALLpcrel32 @variadic_callee, csr_32, implicit %esp -; X32-NEXT: ADJCALLSTACKUP32 12, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 12, 0, implicit-def %esp, implicit-def %eflags, implicit-def %ssp, implicit %esp, implicit %ssp ; X32-NEXT: RET 0 ; X64: %1:_(p0) = COPY %rsi ; X64-NEXT: %2:_(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr_ptr) ; X64-NEXT: %3:_(s64) = G_LOAD %1(p0) :: (load 8 from %ir.val_ptr) -; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: %rdi = COPY %2(p0) ; X64-NEXT: %xmm0 = COPY %3(s64) ; X64-NEXT: %al = MOV8ri 1 -; X64-NEXT: CALL64pcrel32 @variadic_callee, csr_64, implicit %rsp, implicit %rdi, implicit %xmm0, implicit %al -; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: CALL64pcrel32 @variadic_callee, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit %xmm0, implicit %al +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit-def %ssp, implicit %rsp, implicit %ssp ; X64-NEXT: RET 0 %addr = load i8*, i8** %addr_ptr %val = load double, double* %val_ptr call void (i8*, ...) @variadic_callee(i8* %addr, double %val) ret void } diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll index ca97472bb820..6a84ab8ab750 100644 --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -1,12 +1,12 @@ ; RUN: llc -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: CS DS EFLAGS EIP EIZ ES FPSW FS GS IP RIP RIZ SS BND0 BND1 BND2 BND3 CR0 CR1 CR2 CR3 CR4 CR5 CR6 CR7 CR8 CR9 CR10 CR11 CR12 CR13 CR14 CR15 DR0 DR1 DR2 DR3 DR4 DR5 DR6 DR7 DR8 DR9 DR10 DR11 DR12 DR13 DR14 DR15 FP0 FP1 FP2 FP3 FP4 FP5 FP6 FP7 K0 K1 K2 K3 K4 K5 K6 K7 MM0 MM1 MM2 MM3 MM4 MM5 MM6 MM7 R11 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 XMM16 XMM17 XMM18 XMM19 XMM20 XMM21 XMM22 XMM23 XMM24 XMM25 XMM26 XMM27 XMM28 XMM29 XMM30 XMM31 YMM0 YMM1 YMM2 YMM3 YMM4 YMM5 YMM6 YMM7 YMM8 YMM9 YMM10 YMM11 YMM12 YMM13 YMM14 YMM15 YMM16 YMM17 YMM18 YMM19 YMM20 YMM21 YMM22 YMM23 YMM24 YMM25 YMM26 YMM27 YMM28 YMM29 YMM30 YMM31 ZMM0 ZMM1 ZMM2 ZMM3 ZMM4 ZMM5 ZMM6 ZMM7 ZMM8 ZMM9 ZMM10 ZMM11 ZMM12 ZMM13 ZMM14 ZMM15 ZMM16 ZMM17 ZMM18 ZMM19 ZMM20 ZMM21 ZMM22 ZMM23 ZMM24 ZMM25 ZMM26 ZMM27 ZMM28 ZMM29 ZMM30 ZMM31 R11B R11D R11W +; CHECK: foo Clobbered Registers: CS DS EFLAGS EIP EIZ ES FPSW FS GS IP RIP RIZ SS SSP BND0 BND1 BND2 BND3 CR0 CR1 CR2 CR3 CR4 CR5 CR6 CR7 CR8 CR9 CR10 CR11 CR12 CR13 CR14 CR15 DR0 DR1 DR2 DR3 DR4 DR5 DR6 DR7 DR8 DR9 DR10 DR11 DR12 DR13 DR14 DR15 FP0 FP1 FP2 FP3 FP4 FP5 FP6 FP7 K0 K1 K2 K3 K4 K5 K6 K7 MM0 MM1 MM2 MM3 MM4 MM5 MM6 MM7 R11 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 XMM16 XMM17 XMM18 XMM19 XMM20 XMM21 XMM22 XMM23 XMM24 XMM25 XMM26 XMM27 XMM28 XMM29 XMM30 XMM31 YMM0 YMM1 YMM2 YMM3 YMM4 YMM5 YMM6 YMM7 YMM8 YMM9 YMM10 YMM11 YMM12 YMM13 YMM14 YMM15 YMM16 YMM17 YMM18 YMM19 YMM20 YMM21 YMM22 YMM23 YMM24 YMM25 YMM26 YMM27 YMM28 YMM29 YMM30 YMM31 ZMM0 ZMM1 ZMM2 ZMM3 ZMM4 ZMM5 ZMM6 ZMM7 ZMM8 ZMM9 ZMM10 ZMM11 ZMM12 ZMM13 ZMM14 ZMM15 ZMM16 ZMM17 ZMM18 ZMM19 ZMM20 ZMM21 ZMM22 ZMM23 ZMM24 ZMM25 ZMM26 ZMM27 ZMM28 ZMM29 ZMM30 ZMM31 R11B R11D R11W call void @bar1() call void @bar2() ret void } declare void @bar2() attributes #0 = {nounwind} diff --git a/llvm/test/CodeGen/X86/movtopush.mir b/llvm/test/CodeGen/X86/movtopush.mir index 4b8fac8d411f..95ba9490c317 100644 --- a/llvm/test/CodeGen/X86/movtopush.mir +++ b/llvm/test/CodeGen/X86/movtopush.mir @@ -1,125 +1,125 @@ # RUN: llc -mtriple=i686-windows --run-pass="x86-cf-opt" %s -o - | FileCheck %s # PR34903 --- | target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686--windows-msvc" %struct.s = type { i64 } declare void @good(i32, i32, i32, i32) declare void @struct(%struct.s* byval, i32, i32, i32) ; Function Attrs: optsize define void @test9() #0 { entry: %p = alloca i32, align 4 %q = alloca i32, align 4 %s = alloca %struct.s, align 4 call void @good(i32 1, i32 2, i32 3, i32 4) %pv = ptrtoint i32* %p to i32 %qv = ptrtoint i32* %q to i32 call void @struct(%struct.s* byval %s, i32 6, i32 %qv, i32 %pv) ret void } ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #1 attributes #0 = { optsize } attributes #1 = { nounwind } ... --- # CHECK-LABEL: test9 -# CHECK: ADJCALLSTACKDOWN32 16, 0, 16, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp +# CHECK: ADJCALLSTACKDOWN32 16, 0, 16, implicit-def dead %esp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %esp, implicit %ssp # CHECK-NEXT: PUSH32i8 4, implicit-def %esp, implicit %esp # CHECK-NEXT: PUSH32i8 3, implicit-def %esp, implicit %esp # CHECK-NEXT: PUSH32i8 2, implicit-def %esp, implicit %esp # CHECK-NEXT: PUSH32i8 1, implicit-def %esp, implicit %esp -# CHECK-NEXT: CALLpcrel32 @good, csr_32, implicit %esp, implicit-def %esp -# CHECK-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp -# CHECK-NEXT: ADJCALLSTACKDOWN32 20, 0, 20, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp +# CHECK-NEXT: CALLpcrel32 @good, csr_32, implicit %esp, implicit %ssp, implicit-def %esp, implicit-def %ssp +# CHECK-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %esp, implicit %ssp +# CHECK-NEXT: ADJCALLSTACKDOWN32 20, 0, 20, implicit-def dead %esp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %esp, implicit %ssp # CHECK-NEXT: %1:gr32 = MOV32rm %stack.2.s, 1, _, 0, _ :: (load 4 from %stack.2.s, align 8) # CHECK-NEXT: %2:gr32 = MOV32rm %stack.2.s, 1, _, 4, _ :: (load 4 from %stack.2.s + 4) # CHECK-NEXT: %4:gr32 = LEA32r %stack.0.p, 1, _, 0, _ # CHECK-NEXT: %5:gr32 = LEA32r %stack.1.q, 1, _, 0, _ # CHECK-NEXT: PUSH32r %4, implicit-def %esp, implicit %esp # CHECK-NEXT: PUSH32r %5, implicit-def %esp, implicit %esp # CHECK-NEXT: PUSH32i8 6, implicit-def %esp, implicit %esp # CHECK-NEXT: PUSH32r %2, implicit-def %esp, implicit %esp # CHECK-NEXT: PUSH32r %1, implicit-def %esp, implicit %esp -# CHECK-NEXT: CALLpcrel32 @struct, csr_32, implicit %esp, implicit-def %esp -# CHECK-NEXT: ADJCALLSTACKUP32 20, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp +# CHECK-NEXT: CALLpcrel32 @struct, csr_32, implicit %esp, implicit %ssp, implicit-def %esp, implicit-def %ssp +# CHECK-NEXT: ADJCALLSTACKUP32 20, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %esp, implicit %ssp # CHECK-NEXT: RET 0 name: test9 alignment: 0 exposesReturnsTwice: false legalized: false regBankSelected: false selected: false tracksRegLiveness: true registers: - { id: 0, class: gr32, preferred-register: '' } - { id: 1, class: gr32, preferred-register: '' } - { id: 2, class: gr32, preferred-register: '' } - { id: 3, class: gr32, preferred-register: '' } - { id: 4, class: gr32, preferred-register: '' } - { id: 5, class: gr32, preferred-register: '' } liveins: frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false hasPatchPoint: false stackSize: 0 offsetAdjustment: 0 maxAlignment: 8 adjustsStack: false hasCalls: true stackProtector: '' maxCallFrameSize: 4294967295 hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' fixedStack: stack: - { id: 0, name: p, type: default, offset: 0, size: 4, alignment: 4, stack-id: 0, callee-saved-register: '', callee-saved-restored: true, di-variable: '', di-expression: '', di-location: '' } - { id: 1, name: q, type: default, offset: 0, size: 4, alignment: 4, stack-id: 0, callee-saved-register: '', callee-saved-restored: true, di-variable: '', di-expression: '', di-location: '' } - { id: 2, name: s, type: default, offset: 0, size: 8, alignment: 8, stack-id: 0, callee-saved-register: '', callee-saved-restored: true, di-variable: '', di-expression: '', di-location: '' } constants: body: | bb.0.entry: - ADJCALLSTACKDOWN32 16, 0, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp + ADJCALLSTACKDOWN32 16, 0, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %esp, implicit %ssp %0 = COPY %esp MOV32mi %0, 1, _, 12, _, 4 :: (store 4 into stack + 12) MOV32mi %0, 1, _, 8, _, 3 :: (store 4 into stack + 8) MOV32mi %0, 1, _, 4, _, 2 :: (store 4 into stack + 4) MOV32mi %0, 1, _, 0, _, 1 :: (store 4 into stack) - CALLpcrel32 @good, csr_32, implicit %esp, implicit-def %esp - ADJCALLSTACKUP32 16, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp - ADJCALLSTACKDOWN32 20, 0, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp + CALLpcrel32 @good, csr_32, implicit %esp, implicit %ssp, implicit-def %esp, implicit-def %ssp + ADJCALLSTACKUP32 16, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %esp, implicit %ssp + ADJCALLSTACKDOWN32 20, 0, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %esp, implicit %ssp %1 = MOV32rm %stack.2.s, 1, _, 0, _ :: (load 4 from %stack.2.s, align 8) %2 = MOV32rm %stack.2.s, 1, _, 4, _ :: (load 4 from %stack.2.s + 4) %3 = COPY %esp MOV32mr %3, 1, _, 4, _, killed %2 :: (store 4) MOV32mr %3, 1, _, 0, _, killed %1 :: (store 4) %4 = LEA32r %stack.0.p, 1, _, 0, _ MOV32mr %3, 1, _, 16, _, killed %4 :: (store 4 into stack + 16) %5 = LEA32r %stack.1.q, 1, _, 0, _ MOV32mr %3, 1, _, 12, _, killed %5 :: (store 4 into stack + 12) MOV32mi %3, 1, _, 8, _, 6 :: (store 4 into stack + 8) - CALLpcrel32 @struct, csr_32, implicit %esp, implicit-def %esp - ADJCALLSTACKUP32 20, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp + CALLpcrel32 @struct, csr_32, implicit %esp, implicit %ssp, implicit-def %esp, implicit-def %ssp, + ADJCALLSTACKUP32 20, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %esp, implicit %ssp RET 0 ... diff --git a/llvm/test/CodeGen/X86/tail-call-conditional.mir b/llvm/test/CodeGen/X86/tail-call-conditional.mir index e006138ba848..300b2734f52f 100644 --- a/llvm/test/CodeGen/X86/tail-call-conditional.mir +++ b/llvm/test/CodeGen/X86/tail-call-conditional.mir @@ -1,85 +1,85 @@ # RUN: llc -mtriple x86_64-- -verify-machineinstrs -run-pass branch-folder -o - %s | FileCheck %s # Check the TCRETURNdi64cc optimization. --- | target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" define i64 @test(i64 %arg, i8* %arg1) optsize { %tmp = icmp ult i64 %arg, 100 br i1 %tmp, label %1, label %4 %tmp3 = icmp ult i64 %arg, 10 br i1 %tmp3, label %2, label %3 %tmp5 = tail call i64 @f1(i8* %arg1, i64 %arg) ret i64 %tmp5 %tmp7 = tail call i64 @f2(i8* %arg1, i64 %arg) ret i64 %tmp7 ret i64 123 } declare i64 @f1(i8*, i64) declare i64 @f2(i8*, i64) ... --- name: test tracksRegLiveness: true liveins: - { reg: '%rdi' } - { reg: '%rsi' } body: | bb.0: successors: %bb.1, %bb.4 liveins: %rdi, %rsi %rax = COPY %rdi CMP64ri8 %rax, 99, implicit-def %eflags JA_1 %bb.4, implicit %eflags JMP_1 %bb.1 ; CHECK: bb.1: ; CHECK-NEXT: successors: %bb.2({{[^)]+}}){{$}} ; CHECK-NEXT: liveins: %rax, %rsi ; CHECK-NEXT: {{^ $}} ; CHECK-NEXT: %rdi = COPY %rsi ; CHECK-NEXT: %rsi = COPY %rax ; CHECK-NEXT: CMP64ri8 %rax, 9, implicit-def %eflags - ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 3, csr_64, implicit %rsp, implicit %eflags, implicit %rsp, implicit %rdi, implicit %rsi, implicit %rax, implicit-def %rax, implicit %sil, implicit-def %sil, implicit %si, implicit-def %si, implicit %esi, implicit-def %esi, implicit %rsi, implicit-def %rsi, implicit %dil, implicit-def %dil, implicit %di, implicit-def %di, implicit %edi, implicit-def %edi, implicit %rdi, implicit-def %rdi, implicit %ah, implicit-def %ah, implicit %al, implicit-def %al, implicit %ax, implicit-def %ax, implicit %eax, implicit-def %eax + ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 3, csr_64, implicit %rsp, implicit %eflags, implicit %ssp, implicit %rsp, implicit %rdi, implicit %rsi, implicit %rax, implicit-def %rax, implicit %sil, implicit-def %sil, implicit %si, implicit-def %si, implicit %esi, implicit-def %esi, implicit %rsi, implicit-def %rsi, implicit %dil, implicit-def %dil, implicit %di, implicit-def %di, implicit %edi, implicit-def %edi, implicit %rdi, implicit-def %rdi, implicit %ah, implicit-def %ah, implicit %al, implicit-def %al, implicit %ax, implicit-def %ax, implicit %eax, implicit-def %eax bb.1: successors: %bb.2, %bb.3 liveins: %rax, %rsi CMP64ri8 %rax, 9, implicit-def %eflags JA_1 %bb.3, implicit %eflags JMP_1 %bb.2 bb.2: liveins: %rax, %rsi %rdi = COPY %rsi %rsi = COPY %rax TCRETURNdi64 @f1, 0, csr_64, implicit %rsp, implicit %rdi, implicit %rsi ; CHECK: bb.2: ; CHECK-NEXT: liveins: %rax, %rdi, %rsi ; CHECK-NEXT: {{^ $}} ; CHECK-NEXT: TCRETURNdi64 @f2, 0, csr_64, implicit %rsp, implicit %rdi, implicit %rsi bb.3: liveins: %rax, %rsi %rdi = COPY %rsi %rsi = COPY %rax TCRETURNdi64 @f2, 0, csr_64, implicit %rsp, implicit %rdi, implicit %rsi bb.4: dead %eax = MOV32ri64 123, implicit-def %rax RET 0, %rax ... diff --git a/llvm/test/CodeGen/X86/x32-cet-intrinsics.ll b/llvm/test/CodeGen/X86/x32-cet-intrinsics.ll new file mode 100644 index 000000000000..4d45014d18f2 --- /dev/null +++ b/llvm/test/CodeGen/X86/x32-cet-intrinsics.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+shstk -mattr=+ibt | FileCheck %s + +define void @test_incsspd(i32 %a) local_unnamed_addr { +; CHECK-LABEL: test_incsspd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: incsspd %eax +; CHECK-NEXT: retl +entry: + tail call void @llvm.x86.incsspd(i32 %a) + ret void +} + +declare void @llvm.x86.incsspd(i32) + +define i32 @test_rdsspd(i32 %a) { +; CHECK-LABEL: test_rdsspd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: rdsspd %eax +; CHECK-NEXT: retl +entry: + %0 = call i32 @llvm.x86.rdsspd(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.rdsspd(i32) + +define void @test_saveprevssp() { +; CHECK-LABEL: test_saveprevssp: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: saveprevssp +; CHECK-NEXT: retl +entry: + tail call void @llvm.x86.saveprevssp() + ret void +} + +declare void @llvm.x86.saveprevssp() + +define void @test_rstorssp(i8* %__p) { +; CHECK-LABEL: test_rstorssp: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: rstorssp (%eax) +; CHECK-NEXT: retl +entry: + tail call void @llvm.x86.rstorssp(i8* %__p) + ret void +} + +declare void @llvm.x86.rstorssp(i8*) + +define void @test_wrssd(i32 %a, i8* %__p) { +; CHECK-LABEL: test_wrssd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: wrssd %eax, (%ecx) +; CHECK-NEXT: retl +entry: + tail call void @llvm.x86.wrssd(i32 %a, i8* %__p) + ret void +} + +declare void @llvm.x86.wrssd(i32, i8*) + +define void @test_wrussd(i32 %a, i8* %__p) { +; CHECK-LABEL: test_wrussd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: wrussd %eax, (%ecx) +; CHECK-NEXT: retl +entry: + tail call void @llvm.x86.wrussd(i32 %a, i8* %__p) + ret void +} + +declare void @llvm.x86.wrussd(i32, i8*) + +define void @test_setssbsy() { +; CHECK-LABEL: test_setssbsy: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: setssbsy +; CHECK-NEXT: retl +entry: + tail call void @llvm.x86.setssbsy() + ret void +} + +declare void @llvm.x86.setssbsy() + +define void @test_clrssbsy(i8* %__p) { +; CHECK-LABEL: test_clrssbsy: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: clrssbsy (%eax) +; CHECK-NEXT: retl +entry: + tail call void @llvm.x86.clrssbsy(i8* %__p) + ret void +} + +declare void @llvm.x86.clrssbsy(i8* %__p) diff --git a/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll b/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll new file mode 100644 index 000000000000..f9cba0056dbf --- /dev/null +++ b/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk -mattr=+ibt | FileCheck %s + +define void @test_incsspd(i32 %a) local_unnamed_addr { +; CHECK-LABEL: test_incsspd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: incsspd %edi +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.incsspd(i32 %a) + ret void +} + +declare void @llvm.x86.incsspd(i32) + +define void @test_incsspq(i32 %a) local_unnamed_addr { +; CHECK-LABEL: test_incsspq: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: incsspq %rax +; CHECK-NEXT: retq +entry: + %conv.i = sext i32 %a to i64 + tail call void @llvm.x86.incsspq(i64 %conv.i) + ret void +} + +declare void @llvm.x86.incsspq(i64) + +define i32 @test_rdsspd(i32 %a) { +; CHECK-LABEL: test_rdsspd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: rdsspd %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq +entry: + %0 = call i32 @llvm.x86.rdsspd(i32 %a) + ret i32 %0 +} + +declare i32 @llvm.x86.rdsspd(i32) + +define i64 @test_rdsspq(i64 %a) { +; CHECK-LABEL: test_rdsspq: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: rdsspq %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq +entry: + %0 = call i64 @llvm.x86.rdsspq(i64 %a) + ret i64 %0 +} + +declare i64 @llvm.x86.rdsspq(i64) + +define void @test_saveprevssp() { +; CHECK-LABEL: test_saveprevssp: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: saveprevssp +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.saveprevssp() + ret void +} + +declare void @llvm.x86.saveprevssp() + +define void @test_rstorssp(i8* %__p) { +; CHECK-LABEL: test_rstorssp: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: rstorssp (%rdi) +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.rstorssp(i8* %__p) + ret void +} + +declare void @llvm.x86.rstorssp(i8*) + +define void @test_wrssd(i32 %a, i8* %__p) { +; CHECK-LABEL: test_wrssd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: wrssd %edi, (%rsi) +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.wrssd(i32 %a, i8* %__p) + ret void +} + +declare void @llvm.x86.wrssd(i32, i8*) + +define void @test_wrssq(i64 %a, i8* %__p) { +; CHECK-LABEL: test_wrssq: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: wrssq %rdi, (%rsi) +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.wrssq(i64 %a, i8* %__p) + ret void +} + +declare void @llvm.x86.wrssq(i64, i8*) + +define void @test_wrussd(i32 %a, i8* %__p) { +; CHECK-LABEL: test_wrussd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: wrussd %edi, (%rsi) +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.wrussd(i32 %a, i8* %__p) + ret void +} + +declare void @llvm.x86.wrussd(i32, i8*) + +define void @test_wrussq(i64 %a, i8* %__p) { +; CHECK-LABEL: test_wrussq: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: wrussq %rdi, (%rsi) +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.wrussq(i64 %a, i8* %__p) + ret void +} + +declare void @llvm.x86.wrussq(i64, i8*) + +define void @test_setssbsy() { +; CHECK-LABEL: test_setssbsy: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: setssbsy +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.setssbsy() + ret void +} + +declare void @llvm.x86.setssbsy() + +define void @test_clrssbsy(i8* %__p) { +; CHECK-LABEL: test_clrssbsy: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: clrssbsy (%rdi) +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.clrssbsy(i8* %__p) + ret void +} + +declare void @llvm.x86.clrssbsy(i8* %__p) diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir index 1389ad443174..fdb8660dc067 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir @@ -1,163 +1,163 @@ # RUN: llc -O1 -start-before=greedy -stop-after=virtregrewriter -o /dev/null %s -debug-only=livedebugvars 2>&1 | FileCheck -check-prefix=CHECKDBG %s # REQUIRES: asserts # This test case was generated by using the following c program: # extern void foo(int, int); # # int bar[2] = {1, 2}; # # int main(int argc, char** argv) # { # int a0 = bar[0]; # int a1 = bar[1]; # foo(a0, a1); # return 0; # } # # It was compiled with -g and -O1, and the mir was dumped before ra greedy. --- | ; ModuleID = 'live-debug-vars-unused-arg.ll' source_filename = "live-debug-vars-unused-arg.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @bar = local_unnamed_addr global [2 x i32] [i32 1, i32 2], align 4, !dbg !0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 !dbg !14 { entry: tail call void @llvm.dbg.value(metadata i32 %argc, metadata !21, metadata !DIExpression()), !dbg !25 tail call void @llvm.dbg.value(metadata i8** %argv, metadata !22, metadata !DIExpression()), !dbg !26 %0 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 0), align 4, !dbg !27, !tbaa !28 tail call void @llvm.dbg.value(metadata i32 %0, metadata !23, metadata !DIExpression()), !dbg !32 %1 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 1), align 4, !dbg !33, !tbaa !28 tail call void @llvm.dbg.value(metadata i32 %1, metadata !24, metadata !DIExpression()), !dbg !34 tail call void @foo(i32 %0, i32 %1) #2, !dbg !35 ret i32 0, !dbg !36 } declare void @foo(i32, i32) local_unnamed_addr ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.value(metadata, metadata, metadata) #1 ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #2 attributes #0 = { nounwind uwtable } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind } !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!10, !11, !12} !llvm.ident = !{!13} !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "bar", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 6.0.0 (trunk 313866) (llvm/trunk 313875)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) !3 = !DIFile(filename: "live-debug-vars-unused-arg.c", directory: "/repo/uabbpet/master") !4 = !{} !5 = !{!0} !6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 64, elements: !8) !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !8 = !{!9} !9 = !DISubrange(count: 2) !10 = !{i32 2, !"Dwarf Version", i32 4} !11 = !{i32 2, !"Debug Info Version", i32 3} !12 = !{i32 1, !"wchar_size", i32 4} !13 = !{!"clang version 6.0.0 (trunk 313866) (llvm/trunk 313875)"} !14 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 5, type: !15, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !2, variables: !20) !15 = !DISubroutineType(types: !16) !16 = !{!7, !7, !17} !17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64) !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) !19 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) !20 = !{!21, !22, !23, !24} !21 = !DILocalVariable(name: "argc", arg: 1, scope: !14, file: !3, line: 5, type: !7) !22 = !DILocalVariable(name: "argv", arg: 2, scope: !14, file: !3, line: 5, type: !17) !23 = !DILocalVariable(name: "a0", scope: !14, file: !3, line: 7, type: !7) !24 = !DILocalVariable(name: "a1", scope: !14, file: !3, line: 8, type: !7) !25 = !DILocation(line: 5, column: 14, scope: !14) !26 = !DILocation(line: 5, column: 27, scope: !14) !27 = !DILocation(line: 7, column: 12, scope: !14) !28 = !{!29, !29, i64 0} !29 = !{!"int", !30, i64 0} !30 = !{!"omnipotent char", !31, i64 0} !31 = !{!"Simple C/C++ TBAA"} !32 = !DILocation(line: 7, column: 7, scope: !14) !33 = !DILocation(line: 8, column: 12, scope: !14) !34 = !DILocation(line: 8, column: 7, scope: !14) !35 = !DILocation(line: 9, column: 3, scope: !14) !36 = !DILocation(line: 10, column: 3, scope: !14) ... --- name: main alignment: 4 exposesReturnsTwice: false legalized: false regBankSelected: false selected: false tracksRegLiveness: true registers: - { id: 0, class: gr32, preferred-register: '' } - { id: 1, class: gr64, preferred-register: '' } - { id: 2, class: gr32, preferred-register: '' } - { id: 3, class: gr32, preferred-register: '' } - { id: 4, class: gr32, preferred-register: '' } liveins: frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false hasPatchPoint: false stackSize: 0 offsetAdjustment: 0 maxAlignment: 0 adjustsStack: false hasCalls: true stackProtector: '' maxCallFrameSize: 4294967295 hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' fixedStack: stack: constants: body: | bb.0.entry: DBG_VALUE debug-use %edi, debug-use _, !21, !DIExpression(), debug-location !25 DBG_VALUE debug-use %rsi, debug-use _, !22, !DIExpression(), debug-location !26 %2 = MOV32rm %rip, 1, _, @bar, _, debug-location !27 :: (dereferenceable load 4 from `i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 0)`, !tbaa !28) DBG_VALUE debug-use %2, debug-use _, !23, !DIExpression(), debug-location !32 %3 = MOV32rm %rip, 1, _, @bar + 4, _, debug-location !33 :: (dereferenceable load 4 from `i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 1)`, !tbaa !28) DBG_VALUE debug-use %3, debug-use _, !24, !DIExpression(), debug-location !34 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit %rsp, debug-location !35 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp, debug-location !35 %edi = COPY %2, debug-location !35 %esi = COPY %3, debug-location !35 - CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit killed %edi, implicit killed %esi, implicit-def %rsp, debug-location !35 - ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit %rsp, debug-location !35 + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit killed %edi, implicit killed %esi, implicit-def %rsp, debug-location !35 + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp, debug-location !35 %eax = MOV32r0 implicit-def dead %eflags, debug-location !36 RET 0, killed %eax, debug-location !36 ... # Let's verify that the slot index ranges for the unused variables argc/argv, # connected to physical regs %EDI and %RSI, does not overlap with the ranges # for %vreg2 and %vreg3. The register allocator is actually allocating the # virtual registers # to %EDI and %ESI, so the ranges for argc/argv should # not cover the whole BB. # # CHECKDBG-LABEL: ********** EMITTING LIVE DEBUG VARIABLES ********** # CHECKDBG-NEXT: !"argc,5" [0B;0e):0 Loc0=%EDI # CHECKDBG-NEXT: [0B;0e):0 BB#0-160B # CHECKDBG-NEXT: !"argv,5" [0B;0e):0 Loc0=%RSI # CHECKDBG-NEXT: [0B;0e):0 BB#0-160B # CHECKDBG-NEXT: !"a0,7" [16r;64r):0 Loc0=%vreg2 # CHECKDBG-NEXT: [16r;64r):0 BB#0-160B # CHECKDBG-NEXT: !"a1,8" [32r;80r):0 Loc0=%vreg3 # CHECKDBG-NEXT: [32r;80r):0 BB#0-160B diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir index 84c2194f4a35..8ffb548eee1e 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir @@ -1,158 +1,158 @@ # RUN: llc -O1 -start-before=greedy -stop-after=virtregrewriter -o - %s | FileCheck -check-prefix=CHECKMIR %s # This test case was generated by using the following c program: # extern void foo(int, int); # # int bar[2] = {1, 2}; # # int main(int argc, char** argv) # { # int a0 = bar[0]; # int a1 = bar[1]; # foo(a0, a1); # return 0; # } # # It was compiled with -g and -O1, and the mir was dumped before ra greedy. --- | ; ModuleID = 'live-debug-vars-unused-arg.ll' source_filename = "live-debug-vars-unused-arg.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @bar = local_unnamed_addr global [2 x i32] [i32 1, i32 2], align 4, !dbg !0 ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 !dbg !14 { entry: tail call void @llvm.dbg.value(metadata i32 %argc, metadata !21, metadata !DIExpression()), !dbg !25 tail call void @llvm.dbg.value(metadata i8** %argv, metadata !22, metadata !DIExpression()), !dbg !26 %0 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 0), align 4, !dbg !27, !tbaa !28 tail call void @llvm.dbg.value(metadata i32 %0, metadata !23, metadata !DIExpression()), !dbg !32 %1 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 1), align 4, !dbg !33, !tbaa !28 tail call void @llvm.dbg.value(metadata i32 %1, metadata !24, metadata !DIExpression()), !dbg !34 tail call void @foo(i32 %0, i32 %1) #2, !dbg !35 ret i32 0, !dbg !36 } declare void @foo(i32, i32) local_unnamed_addr ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.value(metadata, metadata, metadata) #1 ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #2 attributes #0 = { nounwind uwtable } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind } !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!10, !11, !12} !llvm.ident = !{!13} !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "bar", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 6.0.0 (trunk 313866) (llvm/trunk 313875)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) !3 = !DIFile(filename: "live-debug-vars-unused-arg.c", directory: "/repo/uabbpet/master") !4 = !{} !5 = !{!0} !6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 64, elements: !8) !7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !8 = !{!9} !9 = !DISubrange(count: 2) !10 = !{i32 2, !"Dwarf Version", i32 4} !11 = !{i32 2, !"Debug Info Version", i32 3} !12 = !{i32 1, !"wchar_size", i32 4} !13 = !{!"clang version 6.0.0 (trunk 313866) (llvm/trunk 313875)"} !14 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 5, type: !15, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !2, variables: !20) !15 = !DISubroutineType(types: !16) !16 = !{!7, !7, !17} !17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64) !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) !19 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) !20 = !{!21, !22, !23, !24} !21 = !DILocalVariable(name: "argc", arg: 1, scope: !14, file: !3, line: 5, type: !7) !22 = !DILocalVariable(name: "argv", arg: 2, scope: !14, file: !3, line: 5, type: !17) !23 = !DILocalVariable(name: "a0", scope: !14, file: !3, line: 7, type: !7) !24 = !DILocalVariable(name: "a1", scope: !14, file: !3, line: 8, type: !7) !25 = !DILocation(line: 5, column: 14, scope: !14) !26 = !DILocation(line: 5, column: 27, scope: !14) !27 = !DILocation(line: 7, column: 12, scope: !14) !28 = !{!29, !29, i64 0} !29 = !{!"int", !30, i64 0} !30 = !{!"omnipotent char", !31, i64 0} !31 = !{!"Simple C/C++ TBAA"} !32 = !DILocation(line: 7, column: 7, scope: !14) !33 = !DILocation(line: 8, column: 12, scope: !14) !34 = !DILocation(line: 8, column: 7, scope: !14) !35 = !DILocation(line: 9, column: 3, scope: !14) !36 = !DILocation(line: 10, column: 3, scope: !14) ... --- name: main alignment: 4 exposesReturnsTwice: false legalized: false regBankSelected: false selected: false tracksRegLiveness: true registers: - { id: 0, class: gr32, preferred-register: '' } - { id: 1, class: gr64, preferred-register: '' } - { id: 2, class: gr32, preferred-register: '' } - { id: 3, class: gr32, preferred-register: '' } - { id: 4, class: gr32, preferred-register: '' } liveins: frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false hasPatchPoint: false stackSize: 0 offsetAdjustment: 0 maxAlignment: 0 adjustsStack: false hasCalls: true stackProtector: '' maxCallFrameSize: 4294967295 hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' fixedStack: stack: constants: body: | bb.0.entry: DBG_VALUE debug-use %edi, debug-use _, !21, !DIExpression(), debug-location !25 DBG_VALUE debug-use %rsi, debug-use _, !22, !DIExpression(), debug-location !26 %2 = MOV32rm %rip, 1, _, @bar, _, debug-location !27 :: (dereferenceable load 4 from `i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 0)`, !tbaa !28) DBG_VALUE debug-use %2, debug-use _, !23, !DIExpression(), debug-location !32 %3 = MOV32rm %rip, 1, _, @bar + 4, _, debug-location !33 :: (dereferenceable load 4 from `i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 1)`, !tbaa !28) DBG_VALUE debug-use %3, debug-use _, !24, !DIExpression(), debug-location !34 - ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit %rsp, debug-location !35 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp, debug-location !35 %edi = COPY %2, debug-location !35 %esi = COPY %3, debug-location !35 - CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit killed %edi, implicit killed %esi, implicit-def %rsp, debug-location !35 - ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit %rsp, debug-location !35 + CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit killed %edi, implicit killed %esi, implicit-def %rsp, debug-location !35 + ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp, debug-location !35 %eax = MOV32r0 implicit-def dead %eflags, debug-location !36 RET 0, killed %eax, debug-location !36 ... # Verify that we only get one DBG_VALUE for argc and one for argv. # # CHECKMIR: ![[ARGC:[0-9]+]] = !DILocalVariable(name: "argc", arg: 1 # CHECKMIR: ![[ARGV:[0-9]+]] = !DILocalVariable(name: "argv", arg: 2 # CHECKMIR: name: main # CHECKMIR: body: # CHECKMIR: DBG_VALUE debug-use %edi, debug-use _, ![[ARGC]] # CHECKMIR-NOT: DBG_VALUE debug-use %{{.*}}, debug-use _, ![[ARGC]] # CHECKMIR: DBG_VALUE debug-use %rsi, debug-use _, ![[ARGV]] # CHECKMIR-NOT: DBG_VALUE debug-use %{{.*}}, debug-use _, ![[ARGC]] # CHECKMIR-NOT: DBG_VALUE debug-use %{{.*}}, debug-use _, ![[ARGV]] diff --git a/llvm/test/DebugInfo/X86/live-debug-vars-dse.mir b/llvm/test/DebugInfo/X86/live-debug-vars-dse.mir index ac4e48fe92e6..18f706982d46 100644 --- a/llvm/test/DebugInfo/X86/live-debug-vars-dse.mir +++ b/llvm/test/DebugInfo/X86/live-debug-vars-dse.mir @@ -1,147 +1,147 @@ # RUN: llc -start-after=machine-scheduler %s -o - | FileCheck %s # C source: # void escape(int *); # extern int global; # void f(int x) { # escape(&x); # x = 1; # global = x; # x = 2; # escape(&x); # } # CHECK-LABEL: f: # @f # CHECK: movl %ecx, [[OFF_X:[0-9]+]](%rsp) # CHECK: #DEBUG_VALUE: f:x <- [DW_OP_plus_uconst [[OFF_X]]] [%RSP+0] # CHECK: leaq [[OFF_X]](%rsp), %rsi # CHECK: callq escape # CHECK: #DEBUG_VALUE: f:x <- 1 # CHECK: movl $1, global(%rip) # CHECK: #DEBUG_VALUE: f:x <- [DW_OP_plus_uconst [[OFF_X]]] [%RSP+0] # CHECK: movl $2, [[OFF_X]](%rsp) # CHECK: callq escape # CHECK: retq --- | ; ModuleID = '' source_filename = "dse.c" target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.0.24215" @global = external global i32, align 4 ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.value(metadata, metadata, metadata) #0 declare void @escape(i32*) ; Function Attrs: nounwind uwtable define void @f(i32 %x) #1 !dbg !8 { entry: %x.addr = alloca i32, align 4 store i32 %x, i32* %x.addr, align 4 call void @llvm.dbg.value(metadata i32* %x.addr, metadata !13, metadata !DIExpression()), !dbg !14 call void @escape(i32* %x.addr), !dbg !15 call void @llvm.dbg.value(metadata i32 1, metadata !13, metadata !DIExpression()), !dbg !16 store i32 1, i32* @global, align 4, !dbg !17 call void @llvm.dbg.value(metadata i32* %x.addr, metadata !13, metadata !DIExpression()), !dbg !18 store i32 2, i32* %x.addr, align 4, !dbg !18 call void @escape(i32* %x.addr), !dbg !19 ret void, !dbg !20 } ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #2 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind uwtable } attributes #2 = { nounwind } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} !llvm.ident = !{!7} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) !1 = !DIFile(filename: "dse.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild") !2 = !{} !3 = !{i32 2, !"Dwarf Version", i32 4} !4 = !{i32 2, !"Debug Info Version", i32 3} !5 = !{i32 1, !"wchar_size", i32 2} !6 = !{i32 7, !"PIC Level", i32 2} !7 = !{!"clang version 6.0.0 "} !8 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !12) !9 = !DISubroutineType(types: !10) !10 = !{null, !11} !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !12 = !{!13} !13 = !DILocalVariable(name: "x", arg: 1, scope: !8, file: !1, line: 3, type: !11) !14 = !DILocation(line: 3, column: 12, scope: !8) !15 = !DILocation(line: 4, column: 3, scope: !8) !16 = !DILocation(line: 5, column: 5, scope: !8) !17 = !DILocation(line: 6, column: 10, scope: !8) !18 = !DILocation(line: 7, column: 5, scope: !8) !19 = !DILocation(line: 8, column: 3, scope: !8) !20 = !DILocation(line: 9, column: 1, scope: !8) ... --- name: f alignment: 4 exposesReturnsTwice: false legalized: false regBankSelected: false selected: false tracksRegLiveness: true registers: - { id: 0, class: gr32, preferred-register: '' } - { id: 1, class: gr64, preferred-register: '' } liveins: - { reg: '%ecx', virtual-reg: '%0' } frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false hasPatchPoint: false stackSize: 0 offsetAdjustment: 0 maxAlignment: 8 adjustsStack: false hasCalls: true stackProtector: '' maxCallFrameSize: 4294967295 hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' fixedStack: stack: - { id: 0, name: x.addr, type: default, offset: 0, size: 4, alignment: 4, stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } constants: body: | bb.0.entry: liveins: %ecx %0 = COPY %ecx MOV32mr %stack.0.x.addr, 1, _, 0, _, %0 :: (store 4 into %ir.x.addr) DBG_VALUE %stack.0.x.addr, 0, !13, !DIExpression(), debug-location !14 - ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit %rsp, debug-location !15 + ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp, debug-location !15 %1 = LEA64r %stack.0.x.addr, 1, _, 0, _ %rcx = COPY %1, debug-location !15 - CALL64pcrel32 @escape, csr_win64, implicit %rsp, implicit %rcx, implicit-def %rsp, debug-location !15 - ADJCALLSTACKUP64 32, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit %rsp, debug-location !15 + CALL64pcrel32 @escape, csr_win64, implicit %rsp, implicit %ssp, implicit %rcx, implicit-def %rsp, implicit-def %ssp, debug-location !15 + ADJCALLSTACKUP64 32, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp, debug-location !15 DBG_VALUE 1, debug-use _, !13, !DIExpression(), debug-location !16 MOV32mi %rip, 1, _, @global, _, 1, debug-location !17 :: (store 4 into @global) DBG_VALUE %stack.0.x.addr, 0, !13, !DIExpression(), debug-location !18 MOV32mi %stack.0.x.addr, 1, _, 0, _, 2, debug-location !18 :: (store 4 into %ir.x.addr) - ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit %rsp, debug-location !19 + ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp, debug-location !19 %rcx = COPY %1, debug-location !19 - CALL64pcrel32 @escape, csr_win64, implicit %rsp, implicit %rcx, implicit-def %rsp, debug-location !19 - ADJCALLSTACKUP64 32, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit %rsp, debug-location !19 + CALL64pcrel32 @escape, csr_win64, implicit %rsp, implicit %ssp, implicit %rcx, implicit-def %rsp, implicit-def %ssp, debug-location !19 + ADJCALLSTACKUP64 32, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp, debug-location !19 RET 0, debug-location !20 ... diff --git a/llvm/test/MC/X86/cet-encoding.s b/llvm/test/MC/X86/cet-encoding.s new file mode 100644 index 000000000000..26cc76639c62 --- /dev/null +++ b/llvm/test/MC/X86/cet-encoding.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+shstk --show-encoding %s | FileCheck %s + +// CHECK: incsspd %r13d +// CHECK: # encoding: [0xf3,0x41,0x0f,0xae,0xed] + incsspd %r13d + +// CHECK: incsspq %r15 +// CHECK: # encoding: [0xf3,0x49,0x0f,0xae,0xef] + incsspq %r15 + +// CHECK: rdsspq %r15 +// CHECK: # encoding: [0xf3,0x49,0x0f,0x1e,0xcf] + rdsspq %r15 + +// CHECK: rdsspd %r13d +// CHECK: # encoding: [0xf3,0x41,0x0f,0x1e,0xcd] + rdsspd %r13d + +// CHECK: saveprevssp +// CHECK: # encoding: [0xf3,0x0f,0x01,0xea] + saveprevssp + +// CHECK: rstorssp 485498096 +// CHECK: # encoding: [0xf3,0x0f,0x01,0x2c,0x25,0xf0,0x1c,0xf0,0x1c] + rstorssp 485498096 + +// CHECK: rstorssp (%rdx) +// CHECK: # encoding: [0xf3,0x0f,0x01,0x2a] + rstorssp (%rdx) + +// CHECK: rstorssp 64(%rdx) +// CHECK: # encoding: [0xf3,0x0f,0x01,0x6a,0x40] + rstorssp 64(%rdx) + +// CHECK: rstorssp 64(%rdx,%rax) +// CHECK: # encoding: [0xf3,0x0f,0x01,0x6c,0x02,0x40] + rstorssp 64(%rdx,%rax) + +// CHECK: rstorssp 64(%rdx,%rax,4) +// CHECK: # encoding: [0xf3,0x0f,0x01,0x6c,0x82,0x40] + rstorssp 64(%rdx,%rax,4) + +// CHECK: rstorssp -64(%rdx,%rax,4) +// CHECK: # encoding: [0xf3,0x0f,0x01,0x6c,0x82,0xc0] + rstorssp -64(%rdx,%rax,4) + +// CHECK: wrssq %r15, 485498096 +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf6,0x3c,0x25,0xf0,0x1c,0xf0,0x1c] + wrssq %r15, 485498096 + +// CHECK: wrssq %r15, (%rdx) +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf6,0x3a] + wrssq %r15, (%rdx) + +// CHECK: wrssq %r15, 64(%rdx) +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf6,0x7a,0x40] + wrssq %r15, 64(%rdx) + +// CHECK: wrssq %r15, 64(%rdx,%rax) +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf6,0x7c,0x02,0x40] + wrssq %r15, 64(%rdx,%rax) + +// CHECK: wrssq %r15, 64(%rdx,%rax,4) +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf6,0x7c,0x82,0x40] + wrssq %r15, 64(%rdx,%rax,4) + +// CHECK: wrssq %r15, -64(%rdx,%rax,4) +// CHECK: # encoding: [0x4c,0x0f,0x38,0xf6,0x7c,0x82,0xc0] + wrssq %r15, -64(%rdx,%rax,4) + +// CHECK: wrssd %r13d, 485498096 +// CHECK: # encoding: [0x44,0x0f,0x38,0xf6,0x2c,0x25,0xf0,0x1c,0xf0,0x1c] + wrssd %r13d, 485498096 + +// CHECK: wrssd %r13d, (%rdx) +// CHECK: # encoding: [0x44,0x0f,0x38,0xf6,0x2a] + wrssd %r13d, (%rdx) + +// CHECK: wrssd %r13d, 64(%rdx) +// CHECK: # encoding: [0x44,0x0f,0x38,0xf6,0x6a,0x40] + wrssd %r13d, 64(%rdx) + +// CHECK: wrssd %r13d, 64(%rdx,%rax) +// CHECK: # encoding: [0x44,0x0f,0x38,0xf6,0x6c,0x02,0x40] + wrssd %r13d, 64(%rdx,%rax) + +// CHECK: wrssd %r13d, 64(%rdx,%rax,4) +// CHECK: # encoding: [0x44,0x0f,0x38,0xf6,0x6c,0x82,0x40] + wrssd %r13d, 64(%rdx,%rax,4) + +// CHECK: wrssd %r13d, -64(%rdx,%rax,4) +// CHECK: # encoding: [0x44,0x0f,0x38,0xf6,0x6c,0x82,0xc0] + wrssd %r13d, -64(%rdx,%rax,4) + +// CHECK: wrussd %r13d, 485498096 +// CHECK: # encoding: [0x66,0x44,0x0f,0x38,0xf5,0x2c,0x25,0xf0,0x1c,0xf0,0x1c] + wrussd %r13d, 485498096 + +// CHECK: wrussd %r13d, (%rdx) +// CHECK: # encoding: [0x66,0x44,0x0f,0x38,0xf5,0x2a] + wrussd %r13d, (%rdx) + +// CHECK: wrussd %r13d, 64(%rdx) +// CHECK: # encoding: [0x66,0x44,0x0f,0x38,0xf5,0x6a,0x40] + wrussd %r13d, 64(%rdx) + +// CHECK: wrussd %r13d, 64(%rdx,%rax) +// CHECK: # encoding: [0x66,0x44,0x0f,0x38,0xf5,0x6c,0x02,0x40] + wrussd %r13d, 64(%rdx,%rax) + +// CHECK: wrussd %r13d, 64(%rdx,%rax,4) +// CHECK: # encoding: [0x66,0x44,0x0f,0x38,0xf5,0x6c,0x82,0x40] + wrussd %r13d, 64(%rdx,%rax,4) + +// CHECK: wrussd %r13d, -64(%rdx,%rax,4) +// CHECK: # encoding: [0x66,0x44,0x0f,0x38,0xf5,0x6c,0x82,0xc0] + wrussd %r13d, -64(%rdx,%rax,4) + +// CHECK: wrussq %r15, 485498096 +// CHECK: # encoding: [0x66,0x4c,0x0f,0x38,0xf5,0x3c,0x25,0xf0,0x1c,0xf0,0x1c] + wrussq %r15, 485498096 + +// CHECK: wrussq %r15, (%rdx) +// CHECK: # encoding: [0x66,0x4c,0x0f,0x38,0xf5,0x3a] + wrussq %r15, (%rdx) + +// CHECK: wrussq %r15, 64(%rdx) +// CHECK: # encoding: [0x66,0x4c,0x0f,0x38,0xf5,0x7a,0x40] + wrussq %r15, 64(%rdx) + +// CHECK: wrussq %r15, 64(%rdx,%rax) +// CHECK: # encoding: [0x66,0x4c,0x0f,0x38,0xf5,0x7c,0x02,0x40] + wrussq %r15, 64(%rdx,%rax) + +// CHECK: wrussq %r15, 64(%rdx,%rax,4) +// CHECK: # encoding: [0x66,0x4c,0x0f,0x38,0xf5,0x7c,0x82,0x40] + wrussq %r15, 64(%rdx,%rax,4) + +// CHECK: wrussq %r15, -64(%rdx,%rax,4) +// CHECK: # encoding: [0x66,0x4c,0x0f,0x38,0xf5,0x7c,0x82,0xc0] + wrussq %r15, -64(%rdx,%rax,4) + +// CHECK: clrssbsy 485498096 +// CHECK: # encoding: [0xf3,0x0f,0xae,0x34,0x25,0xf0,0x1c,0xf0,0x1c] + clrssbsy 485498096 + +// CHECK: clrssbsy (%rdx) +// CHECK: # encoding: [0xf3,0x0f,0xae,0x32] + clrssbsy (%rdx) + +// CHECK: clrssbsy 64(%rdx) +// CHECK: # encoding: [0xf3,0x0f,0xae,0x72,0x40] + clrssbsy 64(%rdx) + +// CHECK: clrssbsy 64(%rdx,%rax) +// CHECK: # encoding: [0xf3,0x0f,0xae,0x74,0x02,0x40] + clrssbsy 64(%rdx,%rax) + +// CHECK: clrssbsy 64(%rdx,%rax,4) +// CHECK: # encoding: [0xf3,0x0f,0xae,0x74,0x82,0x40] + clrssbsy 64(%rdx,%rax,4) + +// CHECK: clrssbsy -64(%rdx,%rax,4) +// CHECK: # encoding: [0xf3,0x0f,0xae,0x74,0x82,0xc0] + clrssbsy -64(%rdx,%rax,4) + +// CHECK: setssbsy +// CHECK: # encoding: [0xf3,0x0f,0x01,0xe8] + setssbsy