Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -7302,7 +7302,7 @@ multiclass avx512_vcvtsi opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, X86MemOperand x86memop, PatFrag ld_frag, string asm> { - let hasSideEffects = 0 in { + let hasSideEffects = 0, isCodeGenOnly = 1 in { def rr : SI, @@ -7313,22 +7313,20 @@ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } // hasSideEffects = 0 - let isCodeGenOnly = 1 in { - def rr_Int : SI, - EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; - - def rm_Int : SI, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; - }//isCodeGenOnly = 1 + def rr_Int : SI, + EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; + + def rm_Int : SI, + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_vcvtsi_round opc, SDNode OpNode, @@ -7372,9 +7370,9 @@ XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; + (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; + (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -7411,9 +7409,9 @@ XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; + (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">; + (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; Index: llvm/trunk/lib/Target/X86/X86InstrFMA.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFMA.td +++ llvm/trunk/lib/Target/X86/X86InstrFMA.td @@ -236,7 +236,8 @@ Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } -let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in +let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, + hasSideEffects = 0 in multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, string OpStr, string PackTy, string Suff, SDNode OpNode, RegisterClass RC, @@ -262,8 +263,7 @@ // the lowest element of the FMA*_Int instruction. Even though such analysis // may be not implemented yet we allow the routines doing the actual commute // transformation to decide if one or another instruction is commutable or not. -let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, - hasSideEffects = 0 in +let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in multiclass fma3s_rm_int opc, string OpcodeStr, Operand memopr, RegisterClass RC, X86FoldableSchedWrite sched> { Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -21,6 +21,7 @@ RegisterClass RC, X86MemOperand x86memop, Domain d, X86FoldableSchedWrite sched, bit Is2Addr = 1> { +let isCodeGenOnly = 1 in { let isCommutable = 1 in { def rr : SI, Sched<[sched.Folded, sched.ReadAfterFold]>; } +} /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class multiclass sse12_fp_scalar_int opc, string OpcodeStr, @@ -43,7 +45,7 @@ ValueType VT, string asm, Operand memopr, ComplexPattern mem_cpat, Domain d, X86FoldableSchedWrite sched, bit Is2Addr = 1> { -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let hasSideEffects = 0 in { def rr_Int : SI_Int, @@ -877,28 +879,13 @@ "cvttsd2si\t{$src, $dst|$dst, $src}", WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG; - -def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", - (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0, "att">; -def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", - (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0, "att">; -def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", - (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0, "att">; -def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", - (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0, "att">; -def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", - (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0, "att">; -def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", - (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0, "att">; -def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", - (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0, "att">; -def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", - (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0, "att">; } + // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly // where appropriate to do so. +let isCodeGenOnly = 1 in { defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}", WriteCvtI2SS>, XS, VEX_4V, VEX_LIG; defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}", @@ -907,11 +894,7 @@ WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}", WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG; - -def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0, "att">; -def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0, "att">; +} // isCodeGenOnly = 1 let Predicates = [UseAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), @@ -933,6 +916,7 @@ (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; } +let isCodeGenOnly = 1 in { defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}", WriteCvtSS2I>, XS; @@ -957,28 +941,7 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W; - -def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", - (CVTTSS2SIrr GR32:$dst, FR32:$src), 0, "att">; -def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", - (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0, "att">; -def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", - (CVTTSD2SIrr GR32:$dst, FR64:$src), 0, "att">; -def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", - (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0, "att">; -def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", - (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0, "att">; -def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", - (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0, "att">; -def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", - (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0, "att">; -def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", - (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0, "att">; - -def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", - (CVTSI2SSrm FR64:$dst, i32mem:$src), 0, "att">; -def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", - (CVTSI2SDrm FR64:$dst, i32mem:$src), 0, "att">; +} // isCodeGenOnly = 1 // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). @@ -1031,33 +994,40 @@ sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; -let isCodeGenOnly = 1 in { - let Predicates = [UseAVX] in { - defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss{l}", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG; - defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss{q}", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W; - defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2sd{l}", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; - defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd{q}", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W; - } - let Constraints = "$src1 = $dst" in { - defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss{l}", WriteCvtI2SS>, XS; - defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss{q}", WriteCvtI2SS>, XS, REX_W; - defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2sd{l}", WriteCvtI2SD>, XD; - defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd{q}", WriteCvtI2SD>, XD, REX_W; - } -} // isCodeGenOnly = 1 +let Predicates = [UseAVX] in { +defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + i32mem, "cvtsi2ss{l}", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG; +defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + i64mem, "cvtsi2ss{q}", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W; +defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + i32mem, "cvtsi2sd{l}", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; +defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + i64mem, "cvtsi2sd{q}", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W; +} +let Constraints = "$src1 = $dst" in { + defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + i32mem, "cvtsi2ss{l}", WriteCvtI2SS>, XS; + defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + i64mem, "cvtsi2ss{q}", WriteCvtI2SS>, XS, REX_W; + defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + i32mem, "cvtsi2sd{l}", WriteCvtI2SD>, XD; + defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + i64mem, "cvtsi2sd{q}", WriteCvtI2SD>, XD, REX_W; +} + +def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; +def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">; + +def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", + (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">; +def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", + (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">; /// SSE 1 Only // Aliases for intrinsics -let isCodeGenOnly = 1 in { let Predicates = [UseAVX] in { defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", @@ -1086,7 +1056,40 @@ defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", WriteCvtSD2I>, XD, REX_W; -} // isCodeGenOnly = 1 + +def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; +def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; +def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; +def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; +def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; +def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; +def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; +def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; + +def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; +def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">; +def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">; +def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">; +def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; +def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">; +def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">; +def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">; let Predicates = [UseAVX] in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, @@ -1156,7 +1159,7 @@ /// SSE 2 Only // Convert scalar double to scalar single -let hasSideEffects = 0, Predicates = [UseAVX] in { +let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX] in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, @@ -1174,6 +1177,7 @@ (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, Requires<[UseAVX]>; +let isCodeGenOnly = 1 in { def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fpround FR64:$src))]>, @@ -1183,8 +1187,8 @@ [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>, XD, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSD2SS.Folded]>; +} -let isCodeGenOnly = 1 in { def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1214,11 +1218,10 @@ XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; } -} // isCodeGenOnly = 1 // Convert scalar single to scalar double // SSE2 instructions with XS prefix -let hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, @@ -1231,7 +1234,7 @@ XS, VEX_4V, VEX_LIG, VEX_WIG, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, Requires<[UseAVX, OptForSize]>; -} +} // isCodeGenOnly = 1, hasSideEffects = 0 def : Pat<(f64 (fpextend FR32:$src)), (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; @@ -1245,6 +1248,7 @@ (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, Requires<[UseAVX, OptForSpeed]>; +let isCodeGenOnly = 1 in { def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fpextend FR32:$src))]>, @@ -1254,6 +1258,7 @@ [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSS2SD.Folded]>; +} // isCodeGenOnly = 1 // extload f32 -> f64. This matches load+fpextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1265,7 +1270,7 @@ def : Pat<(extloadf32 addr:$src), (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let hasSideEffects = 0 in { def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1290,7 +1295,7 @@ []>, XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; } -} // isCodeGenOnly = 1 +} // hasSideEffects = 0 // Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and // (v)cvtss2sd intrinsic sequences from clang which produce unnecessary @@ -1757,25 +1762,27 @@ Sched<[sched.Folded, sched.ReadAfterFold]>; } -let ExeDomain = SSEPackedSingle in -defm VCMPSS : sse12_cmp_scalar, XS, VEX_4V, VEX_LIG, VEX_WIG; -let ExeDomain = SSEPackedDouble in -defm VCMPSD : sse12_cmp_scalar, - XD, VEX_4V, VEX_LIG, VEX_WIG; - -let Constraints = "$src1 = $dst" in { +let isCodeGenOnly = 1 in { let ExeDomain = SSEPackedSingle in - defm CMPSS : sse12_cmp_scalar, XS; + defm VCMPSS : sse12_cmp_scalar, XS, VEX_4V, VEX_LIG, VEX_WIG; let ExeDomain = SSEPackedDouble in - defm CMPSD : sse12_cmp_scalar, XD; + defm VCMPSD : sse12_cmp_scalar, + XD, VEX_4V, VEX_LIG, VEX_WIG; + + let Constraints = "$src1 = $dst" in { + let ExeDomain = SSEPackedSingle in + defm CMPSS : sse12_cmp_scalar, XS; + let ExeDomain = SSEPackedDouble in + defm CMPSD : sse12_cmp_scalar, XD; + } } multiclass sse12_cmp_scalar_int; } -let isCodeGenOnly = 1 in { - // Aliases to match intrinsics which expect XMM operand(s). +// Aliases to match intrinsics which expect XMM operand(s). +let ExeDomain = SSEPackedSingle in +defm VCMPSS : sse12_cmp_scalar_int, + XS, VEX_4V, VEX_LIG, VEX_WIG; +let ExeDomain = SSEPackedDouble in +defm VCMPSD : sse12_cmp_scalar_int, + XD, VEX_4V, VEX_LIG, VEX_WIG; +let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in - defm VCMPSS : sse12_cmp_scalar_int, - XS, VEX_4V, VEX_LIG, VEX_WIG; + defm CMPSS : sse12_cmp_scalar_int, XS; let ExeDomain = SSEPackedDouble in - defm VCMPSD : sse12_cmp_scalar_int, - XD, VEX_4V, VEX_LIG, VEX_WIG; - let Constraints = "$src1 = $dst" in { - let ExeDomain = SSEPackedSingle in - defm CMPSS : sse12_cmp_scalar_int, XS; - let ExeDomain = SSEPackedDouble in - defm CMPSD : sse12_cmp_scalar_int, XD; -} + defm CMPSD : sse12_cmp_scalar_int, XD; } @@ -2845,7 +2850,7 @@ ValueType ScalarVT, X86MemOperand x86memop, Operand intmemop, SDNode OpNode, Domain d, X86FoldableSchedWrite sched, Predicate target> { - let hasSideEffects = 0 in { + let isCodeGenOnly = 1, hasSideEffects = 0 in { def r : I, Sched<[sched]>, @@ -2856,8 +2861,9 @@ [(set RC:$dst, (OpNode (load addr:$src1)))], d>, Sched<[sched.Folded]>, Requires<[target, OptForSize]>; + } - let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in { + let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in { def r_Int : I, Sched<[sched]>; @@ -2866,7 +2872,6 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>, Sched<[sched.Folded, sched.ReadAfterFold]>; } - } } @@ -2911,7 +2916,7 @@ ValueType ScalarVT, X86MemOperand x86memop, Operand intmemop, SDNode OpNode, Domain d, X86FoldableSchedWrite sched, Predicate target> { - let hasSideEffects = 0 in { + let isCodeGenOnly = 1, hasSideEffects = 0 in { def r : I, Sched<[sched]>; @@ -2919,7 +2924,8 @@ def m : I, Sched<[sched.Folded, sched.ReadAfterFold]>; - let isCodeGenOnly = 1, ExeDomain = d in { + } + let hasSideEffects = 0, ExeDomain = d in { def r_Int : I, Sched<[sched.Folded, sched.ReadAfterFold]>; } - } // We don't want to fold scalar loads into these instructions unless // optimizing for size. This is because the folded instruction will have a Index: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td +++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td @@ -964,6 +964,7 @@ } def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm, CVTSS2SDrm, VCVTSS2SDrm, + CVTSS2SDrm_Int, VCVTSS2SDrm_Int, VPSLLVQrm, VPSRLVQrm)>; Index: llvm/trunk/lib/Target/X86/X86SchedHaswell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedHaswell.td +++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td @@ -1397,8 +1397,8 @@ let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDirm, - CVTSD2SSrm, - VCVTSD2SSrm)>; + CVTSD2SSrm, CVTSD2SSrm_Int, + VCVTSD2SSrm, VCVTSD2SSrm_Int)>; def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { let Latency = 9; Index: llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td +++ llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td @@ -901,7 +901,8 @@ let Latency = 13; let NumMicroOps = 2; } -def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>; +def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr, + CVTSI642SDrr_Int, CVTSI642SSrr_Int, CVTSI2SDrr_Int, CVTSI2SSrr_Int)>; defm : PdWriteResXMMPair; defm : PdWriteResYMMPair; Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s @@ -141,12 +141,12 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 515 +# CHECK-NEXT: Total Cycles: 6503 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.94 -# CHECK-NEXT: IPC: 0.97 +# CHECK-NEXT: uOps Per Cycle: 0.15 +# CHECK-NEXT: IPC: 0.08 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: @@ -197,12 +197,12 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 515 +# CHECK-NEXT: Total Cycles: 6503 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.94 -# CHECK-NEXT: IPC: 0.97 +# CHECK-NEXT: uOps Per Cycle: 0.15 +# CHECK-NEXT: IPC: 0.08 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: Index: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-2.s @@ -126,12 +126,12 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 2003 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.98 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: @@ -173,12 +173,12 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total Cycles: 2003 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.98 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: