Index: llvm/lib/Target/X86/X86InstrFormats.td =================================================================== --- llvm/lib/Target/X86/X86InstrFormats.td +++ llvm/lib/Target/X86/X86InstrFormats.td @@ -227,6 +227,7 @@ class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; } class NOTRACK { bit hasNoTrackPrefix = 1; } +class SIMD_EXC { list Uses = [MXCSR]; bit mayRaiseFPException = 1; } // Specify AVX512 8-bit compressed displacement encoding based on the vector // element size in bits (8, 16, 32, 64) and the CDisp8 form. Index: llvm/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/lib/Target/X86/X86InstrSSE.td +++ llvm/lib/Target/X86/X86InstrSSE.td @@ -837,7 +837,7 @@ multiclass sse12_cvt_p opc, RegisterClass RC, X86MemOperand x86memop, ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, string asm, Domain d, X86FoldableSchedWrite sched> { -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : I, Sched<[sched]>; @@ -864,7 +864,7 @@ } // hasSideEffects = 0 } -let isCodeGenOnly = 1, Predicates = [UseAVX] in { +let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", WriteCvtSS2I>, @@ -889,13 +889,13 @@ // where appropriate to do so. let isCodeGenOnly = 1 in { defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", - WriteCvtI2SS>, XS, VEX_4V, VEX_LIG; + WriteCvtI2SS>, XS, VEX_4V, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", - WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG; + WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", - WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG; + WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; } // isCodeGenOnly = 1 let Predicates = [UseAVX] in { @@ -921,28 +921,28 @@ let isCodeGenOnly = 1 in { defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I>, XS, SIMD_EXC; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, REX_W; + WriteCvtSS2I>, XS, REX_W, SIMD_EXC; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD; + WriteCvtSD2I>, XD, SIMD_EXC; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, REX_W; + WriteCvtSD2I>, XD, REX_W, SIMD_EXC; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss", "cvtsi2ss{l}", - WriteCvtI2SS, ReadInt2Fpu>, XS; + WriteCvtI2SS, ReadInt2Fpu>, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss", "cvtsi2ss{q}", - WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W; + WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd", "cvtsi2sd{l}", WriteCvtI2SD, ReadInt2Fpu>, XD; defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd", "cvtsi2sd{q}", - WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W; + WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; } // isCodeGenOnly = 1 // Conversion Instructions Intrinsics - Match intrinsics which expect MM @@ -982,6 +982,7 @@ } } +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [UseAVX] in { defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", @@ -994,27 +995,27 @@ sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; - +} let Predicates = [UseAVX] in { defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; } let Constraints = "$src1 = $dst" in { defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD; defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W, SIMD_EXC; } def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1048,7 +1049,7 @@ /// SSE 1 Only // Aliases for intrinsics -let Predicates = [UseAVX] in { +let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", WriteCvtSS2I>, XS, VEX, VEX_LIG; @@ -1064,6 +1065,7 @@ "cvttsd2si", WriteCvtSS2I>, XD, VEX, VEX_LIG, VEX_W; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", WriteCvtSS2I>, XS; @@ -1076,6 +1078,7 @@ defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", WriteCvtSD2I>, XD, REX_W; +} def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; @@ -1111,7 +1114,7 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; -let Predicates = [UseAVX] in { +let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS, VEX, VEX_LIG; @@ -1119,6 +1122,7 @@ ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", WriteCvtSS2I>, XS; @@ -1139,6 +1143,7 @@ "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, WriteCvtI2PS>, PS, Requires<[UseSSE2]>; +} // AVX aliases def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", @@ -1184,13 +1189,13 @@ (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSD2SS]>; + Sched<[WriteCvtSD2SS]>, SIMD_EXC; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; + Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; } def : Pat<(f32 (fpround FR64:$src)), @@ -1201,14 +1206,15 @@ def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fpround FR64:$src))]>, - Sched<[WriteCvtSD2SS]>; + Sched<[WriteCvtSD2SS]>, SIMD_EXC; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>, XD, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSD2SS.Folded]>; + Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1238,6 +1244,7 @@ XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; } +} // Convert scalar single to scalar double // SSE2 instructions with XS prefix @@ -1246,14 +1253,14 @@ (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>; + Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, - Requires<[UseAVX, OptForSize]>; + Requires<[UseAVX, OptForSize]>, SIMD_EXC; } // isCodeGenOnly = 1, hasSideEffects = 0 def : Pat<(f64 (fpextend FR32:$src)), @@ -1265,15 +1272,15 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fpextend FR32:$src))]>, - XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>; + XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSS2SD.Folded]>; + Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; } // isCodeGenOnly = 1 -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1418,36 +1425,36 @@ def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, - VEX, Sched<[WriteCvtPS2I]>, VEX_WIG; + VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>, - VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG; + VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG; + VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>, - VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG; + VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC; } def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>, - Sched<[WriteCvtPS2I]>; + Sched<[WriteCvtPS2I]>, SIMD_EXC; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))]>, - Sched<[WriteCvtPS2ILd]>; + Sched<[WriteCvtPS2ILd]>, SIMD_EXC; // Convert Packed Double FP to Packed DW Integers -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -1486,15 +1493,16 @@ "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))]>, - Sched<[WriteCvtPD2ILd]>; + Sched<[WriteCvtPD2ILd]>, SIMD_EXC; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, - Sched<[WriteCvtPD2I]>; + Sched<[WriteCvtPD2I]>, SIMD_EXC; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [HasAVX, NoVLX] in { def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", @@ -1529,11 +1537,12 @@ [(set VR128:$dst, (v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>, Sched<[WriteCvtPS2ILd]>; +} // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // XMM only def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", @@ -1575,15 +1584,15 @@ "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>, - Sched<[WriteCvtPD2I]>; + Sched<[WriteCvtPD2I]>, SIMD_EXC; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>, - Sched<[WriteCvtPD2ILd]>; + Sched<[WriteCvtPD2ILd]>, SIMD_EXC; // Convert packed single to packed double -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", @@ -1603,7 +1612,7 @@ PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG; } -let Predicates = [UseSSE2] in { +let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, @@ -1674,7 +1683,7 @@ // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // XMM only def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", @@ -1703,11 +1712,11 @@ def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, - Sched<[WriteCvtPD2PS]>; + Sched<[WriteCvtPD2PS]>, SIMD_EXC; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>, - Sched<[WriteCvtPD2PS.Folded]>; + Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), @@ -1725,6 +1734,7 @@ SDNode OpNode, ValueType VT, PatFrag ld_frag, string asm, X86FoldableSchedWrite sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let isCommutable = 1 in def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, @@ -1736,6 +1746,7 @@ (ld_frag addr:$src2), timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let isCodeGenOnly = 1 in { let ExeDomain = SSEPackedSingle in @@ -1763,6 +1774,7 @@ multiclass sse12_cmp_scalar_int { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src, u8imm:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, @@ -1775,6 +1787,7 @@ mem_cpat:$src, timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} // Aliases to match intrinsics which expect XMM operand(s). let ExeDomain = SSEPackedSingle in @@ -1804,7 +1817,7 @@ ValueType vt, X86MemOperand x86memop, PatFrag ld_frag, string OpcodeStr, X86FoldableSchedWrite sched> { -let hasSideEffects = 0 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def rr: SI, @@ -1823,6 +1836,7 @@ ValueType vt, Operand memop, ComplexPattern mem_cpat, string OpcodeStr, X86FoldableSchedWrite sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr_Int: SI, @@ -1834,6 +1848,7 @@ mem_cpat:$src2))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, @@ -1888,6 +1903,7 @@ ValueType VT, string asm, X86FoldableSchedWrite sched, Domain d, PatFrag ld_frag> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let isCommutable = 1 in def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm, @@ -1899,6 +1915,7 @@ (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} defm VCMPPS : sse12_cmp_packed opc, string OpcodeStr, SDNode OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PS : sse12_fp_packed, PD; } } +} multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm V#NAME#SS : sse12_fp_scalar, XS, VEX_4V, VEX_LIG, VEX_WIG; @@ -2599,10 +2619,12 @@ sched.PD.Scl>, XD; } } +} multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteSizes sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V, VEX_LIG, VEX_WIG; @@ -2619,6 +2641,7 @@ SSEPackedDouble, sched.PD.Scl>, XD; } } +} // Binary Arithmetic instructions defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, @@ -2964,7 +2987,7 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>; + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>, SIMD_EXC; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. @@ -4436,6 +4459,7 @@ multiclass sse3_addsub { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : I<0xD0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, @@ -4451,6 +4475,7 @@ [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { @@ -4488,6 +4513,7 @@ X86MemOperand x86memop, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag ld_frag, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : S3DI, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag ld_frag, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr : S3I, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { @@ -5348,6 +5377,7 @@ X86FoldableSchedWrite sched> { // Intrinsic operation, reg. // Vector intrinsic operation, reg +let Uses = [MXCSR], mayRaiseFPException = 1 in { def r : SS4AIi8, Sched<[sched.Folded]>; } +} multiclass avx_fp_unop_rm opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { @@ -5400,6 +5431,7 @@ multiclass sse41_fp_unop_s opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { def SSr : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, hasSideEffects = 0 } +} multiclass sse41_fp_binop_s opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched, ValueType VT32, ValueType VT64, SDNode OpNode, bit Is2Addr = 1> { +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in { def SSr_Int : SS4AIi8; } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 } +} // FP round - roundss, roundps, roundsd, roundpd let Predicates = [HasAVX, NoVLX] in { @@ -5959,6 +5994,7 @@ SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; } +let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, load, f128mem, 0, @@ -5972,6 +6008,7 @@ VR256, load, i256mem, 0, SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; } +} let Predicates = [HasAVX2] in { let isCommutable = 0 in { @@ -5991,11 +6028,11 @@ let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memop, f128mem, 1, - SchedWriteDPPS.XMM>; + SchedWriteDPPS.XMM>, SIMD_EXC; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, VR128, memop, f128mem, 1, - SchedWriteDPPD.XMM>; + SchedWriteDPPD.XMM>, SIMD_EXC; } /// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate Index: llvm/lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86RegisterInfo.cpp +++ llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -523,6 +523,9 @@ // Set the floating point control register as reserved. Reserved.set(X86::FPCW); + // Set the SIMD floating point control register as reserved. + Reserved.set(X86::MXCSR); + // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); ++I) Index: llvm/lib/Target/X86/X86RegisterInfo.td =================================================================== --- llvm/lib/Target/X86/X86RegisterInfo.td +++ llvm/lib/Target/X86/X86RegisterInfo.td @@ -294,6 +294,11 @@ // Floating-point control word def FPCW : X86Reg<"fpcr", 0>; +// SIMD Floating-point control register. +// Note: We only model the current rounding modes and the IEEE masks. +// IEEE flags, FTZ and DAZ are not modeled here. +def MXCSR : X86Reg<"mxcsr", 0>; + // Status flags register. // // Note that some flags that are commonly thought of as part of the status Index: llvm/test/CodeGen/MIR/X86/constant-pool.mir =================================================================== --- llvm/test/CodeGen/MIR/X86/constant-pool.mir +++ llvm/test/CodeGen/MIR/X86/constant-pool.mir @@ -61,12 +61,12 @@ alignment: 4 body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ - $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr + $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -89,10 +89,10 @@ value: 'float 6.250000e+00' body: | bb.0.entry: - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ - $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr + $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -117,12 +117,12 @@ alignment: 1 body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _ - $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr + $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... --- @@ -135,11 +135,11 @@ value: 'float 6.250000e+00' body: | bb.0.entry: - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.1 - 12, $noreg - ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.0 + 8, $noreg - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.1 - 12, _ - $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.0 + 8, _ - $xmm1 = CVTSS2SDrr killed $xmm1 - $xmm0 = MULSDrr killed $xmm0, killed $xmm1 + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.1 - 12, $noreg, implicit $mxcsr + ; CHECK-NEXT: $xmm1 = ADDSSrm killed $xmm1, $rip, 1, $noreg, %const.0 + 8, $noreg, implicit $mxcsr + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.1 - 12, _, implicit $mxcsr + $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.0 + 8, _, implicit $mxcsr + $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr + $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr RETQ $xmm0 ... Index: llvm/test/CodeGen/MIR/X86/fastmath.mir =================================================================== --- llvm/test/CodeGen/MIR/X86/fastmath.mir +++ llvm/test/CodeGen/MIR/X86/fastmath.mir @@ -10,24 +10,24 @@ ; CHECK: %0:fr32 = COPY $xmm0 %0:fr32 = COPY $xmm0 - ; CHECK: %1:fr32 = nnan VMULSSrr %0, %0 - %1:fr32 = nnan VMULSSrr %0, %0 - ; CHECK: %2:fr32 = ninf VMULSSrr %1, %1 - %2:fr32 = ninf VMULSSrr %1, %1 - ; CHECK: %3:fr32 = nsz VMULSSrr %2, %2 - %3:fr32 = nsz VMULSSrr %2, %2 - ; CHECK: %4:fr32 = arcp VMULSSrr %3, %3 - %4:fr32 = arcp VMULSSrr %3, %3 - ; CHECK: %5:fr32 = contract VMULSSrr %4, %4 - %5:fr32 = contract VMULSSrr %4, %4 - ; CHECK: %6:fr32 = afn VMULSSrr %5, %5 - %6:fr32 = afn VMULSSrr %5, %5 - ; CHECK: %7:fr32 = reassoc VMULSSrr %6, %6 - %7:fr32 = reassoc VMULSSrr %6, %6 - ; CHECK: %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7 - %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7 - ; CHECK: %9:fr32 = contract afn reassoc VMULSSrr %8, %8 - %9:fr32 = contract afn reassoc VMULSSrr %8, %8 + ; CHECK: %1:fr32 = nnan VMULSSrr %0, %0, implicit $mxcsr + %1:fr32 = nnan VMULSSrr %0, %0, implicit $mxcsr + ; CHECK: %2:fr32 = ninf VMULSSrr %1, %1, implicit $mxcsr + %2:fr32 = ninf VMULSSrr %1, %1, implicit $mxcsr + ; CHECK: %3:fr32 = nsz VMULSSrr %2, %2, implicit $mxcsr + %3:fr32 = nsz VMULSSrr %2, %2, implicit $mxcsr + ; CHECK: %4:fr32 = arcp VMULSSrr %3, %3, implicit $mxcsr + %4:fr32 = arcp VMULSSrr %3, %3, implicit $mxcsr + ; CHECK: %5:fr32 = contract VMULSSrr %4, %4, implicit $mxcsr + %5:fr32 = contract VMULSSrr %4, %4, implicit $mxcsr + ; CHECK: %6:fr32 = afn VMULSSrr %5, %5, implicit $mxcsr + %6:fr32 = afn VMULSSrr %5, %5, implicit $mxcsr + ; CHECK: %7:fr32 = reassoc VMULSSrr %6, %6, implicit $mxcsr + %7:fr32 = reassoc VMULSSrr %6, %6, implicit $mxcsr + ; CHECK: %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7, implicit $mxcsr + %8:fr32 = nsz arcp contract afn reassoc VMULSSrr %7, %7, implicit $mxcsr + ; CHECK: %9:fr32 = contract afn reassoc VMULSSrr %8, %8, implicit $mxcsr + %9:fr32 = contract afn reassoc VMULSSrr %8, %8, implicit $mxcsr ; CHECK: $xmm0 = COPY %9 $xmm0 = COPY %9 ; CHECK: RET 0, $xmm0 Index: llvm/test/CodeGen/MIR/X86/memory-operands.mir =================================================================== --- llvm/test/CodeGen/MIR/X86/memory-operands.mir +++ llvm/test/CodeGen/MIR/X86/memory-operands.mir @@ -336,10 +336,10 @@ bb.0.entry: liveins: $xmm0 ; CHECK: name: constant_pool_psv - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool) - ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool + 8) - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ :: (load 8 from constant-pool) - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _ :: (load 8 from constant-pool + 8) + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load 8 from constant-pool) + ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load 8 from constant-pool + 8) + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load 8 from constant-pool) + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load 8 from constant-pool + 8) RETQ $xmm0 ... --- Index: llvm/test/CodeGen/X86/evex-to-vex-compress.mir =================================================================== --- llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -2314,38 +2314,38 @@ $xmm0 = VMOVQI2PQIZrm $rip, 1, $rax, 0, $noreg ; CHECK: $xmm0 = VMOVZPQILo2PQIrr $xmm0 $xmm0 = VMOVZPQILo2PQIZrr $xmm0 - ; CHECK: VCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISDrr_Int $xmm0, $xmm1, implicit-def $eflags - VCOMISDZrr_Int $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VCOMISSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISSrr_Int $xmm0, $xmm1, implicit-def $eflags - VCOMISSZrr_Int $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISDrr_Int $xmm0, $xmm1, implicit-def $eflags - VUCOMISDZrr_Int $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISSrr_Int $xmm0, $xmm1, implicit-def $eflags - VUCOMISSZrr_Int $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VCOMISDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISDrr $xmm0, $xmm1, implicit-def $eflags - VCOMISDZrr $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VCOMISSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISSrr $xmm0, $xmm1, implicit-def $eflags - VCOMISSZrr $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISDrr $xmm0, $xmm1, implicit-def $eflags - VUCOMISDZrr $xmm0, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISSrr $xmm0, $xmm1, implicit-def $eflags - VUCOMISSZrr $xmm0, $xmm1, implicit-def $eflags + ; CHECK: VCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISDZrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISSZrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrm_Int $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrr_Int $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISDZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISSZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrr $xmm0, $xmm1, implicit-def $eflags, implicit $mxcsr ; CHECK: VEXTRACTPSmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 1 VEXTRACTPSZmr $rdi, 1, $noreg, 0, $noreg, $xmm0, 1 ; CHECK: $eax = VEXTRACTPSrr $xmm0, 1 @@ -4696,38 +4696,38 @@ $xmm16 = VMOVQI2PQIZrm $rip, 1, $rax, 0, $noreg ; CHECK: $xmm16 = VMOVZPQILo2PQIZrr $xmm16 $xmm16 = VMOVZPQILo2PQIZrr $xmm16 - ; CHECK: VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags - VCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags - VCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags - VUCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags - VUCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISDZrr $xmm16, $xmm1, implicit-def $eflags - VCOMISDZrr $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VCOMISSZrr $xmm16, $xmm1, implicit-def $eflags - VCOMISSZrr $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISDZrr $xmm16, $xmm1, implicit-def $eflags - VUCOMISDZrr $xmm16, $xmm1, implicit-def $eflags - ; CHECK: VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags - ; CHECK: VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags - VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags + ; CHECK: VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrm_Int $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrr_Int $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISDZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISDZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISDZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISDZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrm $xmm16, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit $mxcsr + ; CHECK: VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr + VUCOMISSZrr $xmm16, $xmm1, implicit-def $eflags, implicit $mxcsr ; CHECK: $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15 $xmm16 = VRNDSCALESDZm $xmm16, $rip, 1, $rax, 0, $noreg, 15 ; CHECK: $xmm16 = VRNDSCALESDZr $xmm16, $xmm1, 15 Index: llvm/test/CodeGen/X86/ipra-reg-usage.ll =================================================================== --- llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh call void @bar1() call void @bar2() ret void