diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -227,6 +227,7 @@ class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; } class NOTRACK { bit hasNoTrackPrefix = 1; } +class SIMD_EXP { list Uses = [MXCSR]; bit mayRaiseFPException = 1; } // Specify AVX512 8-bit compressed displacement encoding based on the vector // element size in bits (8, 16, 32, 64) and the CDisp8 form. diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -837,7 +837,7 @@ multiclass sse12_cvt_p opc, RegisterClass RC, X86MemOperand x86memop, ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, string asm, Domain d, X86FoldableSchedWrite sched> { -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : I, Sched<[sched]>; @@ -864,7 +864,7 @@ } // hasSideEffects = 0 } -let isCodeGenOnly = 1, Predicates = [UseAVX] in { +let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", WriteCvtSS2I>, @@ -889,13 +889,13 @@ // where appropriate to do so. let isCodeGenOnly = 1 in { defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", - WriteCvtI2SS>, XS, VEX_4V, VEX_LIG; + WriteCvtI2SS>, XS, VEX_4V, VEX_LIG, SIMD_EXP; defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", - WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG; + WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG, SIMD_EXP; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", - WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG; + WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG, SIMD_EXP; } // isCodeGenOnly = 1 let Predicates = [UseAVX] in { @@ -921,28 +921,28 @@ let isCodeGenOnly = 1 in { defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I>, XS, SIMD_EXP; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, REX_W; + WriteCvtSS2I>, XS, REX_W, SIMD_EXP; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD; + WriteCvtSD2I>, XD, SIMD_EXP; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, REX_W; + WriteCvtSD2I>, XD, REX_W, SIMD_EXP; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss", "cvtsi2ss{l}", - WriteCvtI2SS, ReadInt2Fpu>, XS; + WriteCvtI2SS, ReadInt2Fpu>, XS, SIMD_EXP; defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss", "cvtsi2ss{q}", - WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W; + WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W, SIMD_EXP; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd", "cvtsi2sd{l}", WriteCvtI2SD, ReadInt2Fpu>, XD; defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd", "cvtsi2sd{q}", - WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W; + WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W, SIMD_EXP; } // isCodeGenOnly = 1 // Conversion Instructions Intrinsics - Match intrinsics which expect MM @@ -982,6 +982,7 @@ } } +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [UseAVX] in { defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", @@ -994,27 +995,27 @@ sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; - +} let Predicates = [UseAVX] in { defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, SIMD_EXP; defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXP; defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXP; } let Constraints = "$src1 = $dst" in { defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS, SIMD_EXP; defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W, SIMD_EXP; defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD; defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W, SIMD_EXP; } def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1048,7 +1049,7 @@ /// SSE 1 Only // Aliases for intrinsics -let Predicates = [UseAVX] in { +let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", WriteCvtSS2I>, XS, VEX, VEX_LIG; @@ -1064,6 +1065,7 @@ "cvttsd2si", WriteCvtSS2I>, XD, VEX, VEX_LIG, VEX_W; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", WriteCvtSS2I>, XS; @@ -1076,6 +1078,7 @@ defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", WriteCvtSD2I>, XD, REX_W; +} def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; @@ -1111,7 +1114,7 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; -let Predicates = [UseAVX] in { +let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS, VEX, VEX_LIG; @@ -1119,6 +1122,7 @@ ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS; @@ -1139,6 +1143,7 @@ "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, WriteCvtI2PS>, PS, Requires<[UseSSE2]>; +} // AVX aliases def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", @@ -1184,13 +1189,13 @@ (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSD2SS]>; + Sched<[WriteCvtSD2SS]>, SIMD_EXP; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; + Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXP; } def : Pat<(f32 (fpround FR64:$src)), @@ -1201,14 +1206,15 @@ def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fpround FR64:$src))]>, - Sched<[WriteCvtSD2SS]>; + Sched<[WriteCvtSD2SS]>, SIMD_EXP; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>, XD, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSD2SS.Folded]>; + Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXP; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1238,6 +1244,7 @@ XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; } +} // Convert scalar single to scalar double // SSE2 instructions with XS prefix @@ -1246,14 +1253,14 @@ (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>; + Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXP; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, - Requires<[UseAVX, OptForSize]>; + Requires<[UseAVX, OptForSize]>, SIMD_EXP; } // isCodeGenOnly = 1, hasSideEffects = 0 def : Pat<(f64 (fpextend FR32:$src)), @@ -1265,15 +1272,15 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fpextend FR32:$src))]>, - XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>; + XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXP; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSS2SD.Folded]>; + Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXP; } // isCodeGenOnly = 1 -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1418,36 +1425,36 @@ def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, - VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; + VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXP; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, - VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; + VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXP; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; + VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXP; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG; + VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXP; } def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, - Sched<[WriteCvtPS2I]>; + Sched<[WriteCvtPS2I]>, SIMD_EXP; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, - Sched<[WriteCvtPS2ILd]>; + Sched<[WriteCvtPS2ILd]>, SIMD_EXP; // Convert Packed Double FP to Packed DW Integers -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -1486,15 +1493,16 @@ "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, - Sched<[WriteCvtPD2ILd]>; + Sched<[WriteCvtPD2ILd]>, SIMD_EXP; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, - Sched<[WriteCvtPD2I]>; + Sched<[WriteCvtPD2I]>, SIMD_EXP; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [HasAVX, NoVLX] in { def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", @@ -1529,11 +1537,12 @@ [(set VR128:$dst, (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, Sched<[WriteCvtPS2ILd]>; +} // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // XMM only def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", @@ -1575,15 +1584,15 @@ "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, - Sched<[WriteCvtPD2I]>; + Sched<[WriteCvtPD2I]>, SIMD_EXP; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>, - Sched<[WriteCvtPD2ILd]>; + Sched<[WriteCvtPD2ILd]>, SIMD_EXP; // Convert packed single to packed double -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", @@ -1603,7 +1612,7 @@ PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; } -let Predicates = [UseSSE2] in { +let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, @@ -1674,7 +1683,7 @@ // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // XMM only def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", @@ -1703,11 +1712,11 @@ def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, - Sched<[WriteCvtPD2PS]>; + Sched<[WriteCvtPD2PS]>, SIMD_EXP; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>, - Sched<[WriteCvtPD2PS.Folded]>; + Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXP; let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), @@ -1725,6 +1734,7 @@ SDNode OpNode, ValueType VT, PatFrag ld_frag, string asm, X86FoldableSchedWrite sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let isCommutable = 1 in def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, @@ -1736,6 +1746,7 @@ (ld_frag addr:$src2), timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let isCodeGenOnly = 1 in { let ExeDomain = SSEPackedSingle in @@ -1763,6 +1774,7 @@ multiclass sse12_cmp_scalar_int { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src, u8imm:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, @@ -1775,6 +1787,7 @@ mem_cpat:$src, timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} // Aliases to match intrinsics which expect XMM operand(s). let ExeDomain = SSEPackedSingle in @@ -1804,7 +1817,7 @@ ValueType vt, X86MemOperand x86memop, PatFrag ld_frag, string OpcodeStr, X86FoldableSchedWrite sched> { -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def rr: SI, @@ -1823,6 +1836,7 @@ ValueType vt, Operand memop, ComplexPattern mem_cpat, string OpcodeStr, X86FoldableSchedWrite sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr_Int: SI, @@ -1834,6 +1848,7 @@ mem_cpat:$src2))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, @@ -1888,6 +1903,7 @@ ValueType VT, string asm, X86FoldableSchedWrite sched, Domain d, PatFrag ld_frag> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let isCommutable = 1 in def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, @@ -1899,6 +1915,7 @@ (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} defm VCMPPS : sse12_cmp_packed opc, string OpcodeStr, SDNode OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PS : sse12_fp_packed, PD; } } +} multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm V#NAME#SS : sse12_fp_scalar, XS, VEX_4V, VEX_LIG, VEX_WIG; @@ -2599,10 +2619,12 @@ sched.PD.Scl>, XD; } } +} multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG, VEX_WIG; @@ -2619,6 +2641,7 @@ SSEPackedDouble, sched.PD.Scl>, XD; } } +} // Binary Arithmetic instructions defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, @@ -2964,7 +2987,7 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>; + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>, SIMD_EXP; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. @@ -4436,6 +4459,7 @@ multiclass sse3_addsub { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : I<0xD0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, @@ -4451,6 +4475,7 @@ [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { @@ -4488,6 +4513,7 @@ X86MemOperand x86memop, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag ld_frag, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : S3DI, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag ld_frag, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : S3I, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { @@ -5348,6 +5377,7 @@ X86FoldableSchedWrite sched> { // Intrinsic operation, reg. // Vector intrinsic operation, reg +let Uses = [MXCSR], mayRaiseFPException = 1 in { def r : SS4AIi8, Sched<[sched.Folded]>; } +} multiclass avx_fp_unop_rm opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { @@ -5400,6 +5431,7 @@ multiclass sse41_fp_unop_s opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { def SSr : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, hasSideEffects = 0 } +} multiclass sse41_fp_binop_s opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched, ValueType VT32, ValueType VT64, SDNode OpNode, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in { def SSr_Int : SS4AIi8; } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 } +} // FP round - roundss, roundps, roundsd, roundpd let Predicates = [HasAVX, NoVLX] in { @@ -5959,6 +5994,7 @@ SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, load, f128mem, 0, @@ -5972,6 +6008,7 @@ VR256, load, i256mem, 0, SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; } +} let Predicates = [HasAVX2] in { let isCommutable = 0 in { @@ -5991,11 +6028,11 @@ let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memop, f128mem, 1, - SchedWriteDPPS.XMM>; + SchedWriteDPPS.XMM>, SIMD_EXP; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, VR128, memop, f128mem, 1, - SchedWriteDPPD.XMM>; + SchedWriteDPPD.XMM>, SIMD_EXP; } /// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -515,6 +515,9 @@ // Set the floating point control register as reserved. Reserved.set(X86::FPCW); + // Set the SIMD floating point control register as reserved. + Reserved.set(X86::MXCSR); + // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -294,6 +294,11 @@ // Floating-point control word def FPCW : X86Reg<"fpcr", 0>; +// SIMD Floating-point control register. +// Note: We only model the current rounding modes and the IEEE masks. +// IEEE flags, FTZ and DAZ are not modeled here. +def MXCSR : X86Reg<"mxcsr", 0>; + // Status flags register. // // Note that some flags that are commonly thought of as part of the status