Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -259,6 +259,23 @@ MaskingConstraint, NoItinerary, IsCommutable, IsKCommutable>; +// Similar to AVX512_maskable_common, but with scalar types. +multiclass AVX512_maskable_fp_common O, Format F, X86VectorVTInfo _, + dag Outs, + dag Ins, dag MaskingIns, dag ZeroMaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + SDNode Select = vselect, + string MaskingConstraint = "", + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0, + bit IsKCommutable = 0> : + AVX512_maskable_custom; + // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. @@ -291,6 +308,18 @@ (X86selects _.KRCWM:$mask, RHS, _.RC:$src0), X86selects, "$src0 = $dst", itin, IsCommutable>; +// Similar to AVX512_maskable_scalar, but with scalar types. +multiclass AVX512_maskable_fp_scalar O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : + AVX512_maskable_fp_common; + // Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude @@ -6030,27 +6059,40 @@ //===----------------------------------------------------------------------===// multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNode> { - defm rr : AVX512_maskable_scalar, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; - defm rm : AVX512_maskable_scalar, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; + } + + defm rr : AVX512_maskable_fp_scalar, + EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; + let mayLoad = 1 in + defm rm : AVX512_maskable_fp_scalar, + EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; + } // Scalar Coversion with SAE - suppress all exceptions multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd> { - defm rrb : AVX512_maskable_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd> { - defm rrb : AVX512_maskable_scalar; def : Pat<(f64 (fpextend FR32X:$src)), - (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X), - (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>, + (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>, Requires<[HasAVX512]>; def : Pat<(f64 (fpextend (loadf32 addr:$src))), - (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, + (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; def : Pat<(f64 (extloadf32 addr:$src)), - (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, + (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; def : Pat<(f64 (extloadf32 addr:$src)), - (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>, + (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, Requires<[HasAVX512, OptForSpeed]>; def : Pat<(f32 (fpround FR64X:$src)), - (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X), - (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>, + (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>, Requires<[HasAVX512]>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), - (VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>, + (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), - (VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>, + (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -1851,6 +1851,10 @@ { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE }, { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 }, { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE }, + { X86::VCVTSS2SDZrr, X86::VCVTSS2SDZrm, 0 }, + { X86::VCVTSS2SDZrr_Int, X86::VCVTSS2SDZrm_Int, TB_NO_REVERSE }, + { X86::VCVTSD2SSZrr, X86::VCVTSD2SSZrm, 0 }, + { X86::VCVTSD2SSZrr_Int, X86::VCVTSD2SSZrm_Int, TB_NO_REVERSE }, { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 }, { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 }, { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 }, @@ -8165,11 +8169,15 @@ case X86::VCVTUSI642SDZrrb_Int: case X86::VCVTUSI642SDZrm_Int: case X86::VCVTSD2SSZrr: - case X86::VCVTSD2SSZrrb: + case X86::VCVTSD2SSZrr_Int: + case X86::VCVTSD2SSZrrb_Int: case X86::VCVTSD2SSZrm: + case X86::VCVTSD2SSZrm_Int: case X86::VCVTSS2SDZrr: - case X86::VCVTSS2SDZrrb: + case X86::VCVTSS2SDZrr_Int: + case X86::VCVTSS2SDZrrb_Int: case X86::VCVTSS2SDZrm: + case X86::VCVTSS2SDZrm_Int: case X86::VRNDSCALESDr: case X86::VRNDSCALESDrb: case X86::VRNDSCALESDm: Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -1716,20 +1716,21 @@ // Convert scalar double to scalar single let hasSideEffects = 0, Predicates = [UseAVX] in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), - (ins FR64:$src1, FR64:$src2), + (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>, VEX_WIG; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), - (ins FR64:$src1, f64mem:$src2), + (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; } -def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, +def : Pat<(f32 (fpround FR64:$src)), + (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>, Requires<[UseAVX]>; def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), @@ -1781,14 +1782,14 @@ // SSE2 instructions with XS prefix let hasSideEffects = 0, Predicates = [UseAVX] in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), - (ins FR32:$src1, FR32:$src2), + (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>, VEX_WIG; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), - (ins FR32:$src1, f32mem:$src2), + (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, @@ -1796,15 +1797,15 @@ } def : Pat<(f64 (fpextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>; + (VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>; def : Pat<(fpextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; + (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, + (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, + (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, Requires<[UseAVX, OptForSpeed]>; def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), Index: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll +++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL ; ; Half to Float @@ -1941,25 +1941,25 @@ ; AVX1-LABEL: cvt_8i16_to_8f64: ; AVX1: # BB#0: ; AVX1-NEXT: vmovq %xmm0, %rdx -; AVX1-NEXT: movq %rdx, %r9 +; AVX1-NEXT: movq %rdx, %r8 ; AVX1-NEXT: movl %edx, %r10d -; AVX1-NEXT: movswl %dx, %r8d +; AVX1-NEXT: movswl %dx, %r9d ; AVX1-NEXT: shrq $48, %rdx -; AVX1-NEXT: shrq $32, %r9 +; AVX1-NEXT: shrq $32, %r8 ; AVX1-NEXT: shrl $16, %r10d ; AVX1-NEXT: vpextrq $1, %xmm0, %rdi -; AVX1-NEXT: movq %rdi, %rsi -; AVX1-NEXT: movl %edi, %eax +; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: movl %edi, %esi ; AVX1-NEXT: movswl %di, %ecx ; AVX1-NEXT: shrq $48, %rdi -; AVX1-NEXT: shrq $32, %rsi -; AVX1-NEXT: shrl $16, %eax -; AVX1-NEXT: cwtl -; AVX1-NEXT: vmovd %eax, %xmm0 +; AVX1-NEXT: shrq $32, %rax +; AVX1-NEXT: shrl $16, %esi +; AVX1-NEXT: movswl %si, %esi +; AVX1-NEXT: vmovd %esi, %xmm0 ; AVX1-NEXT: vcvtph2ps %xmm0, %xmm1 ; AVX1-NEXT: vmovd %ecx, %xmm0 ; AVX1-NEXT: vcvtph2ps %xmm0, %xmm2 -; AVX1-NEXT: movswl %si, %eax +; AVX1-NEXT: cwtl ; AVX1-NEXT: vmovd %eax, %xmm0 ; AVX1-NEXT: vcvtph2ps %xmm0, %xmm3 ; AVX1-NEXT: movswl %di, %eax @@ -1968,9 +1968,9 @@ ; AVX1-NEXT: movswl %r10w, %eax ; AVX1-NEXT: vmovd %eax, %xmm0 ; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX1-NEXT: vmovd %r8d, %xmm5 +; AVX1-NEXT: vmovd %r9d, %xmm5 ; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX1-NEXT: movswl %r9w, %eax +; AVX1-NEXT: movswl %r8w, %eax ; AVX1-NEXT: vmovd %eax, %xmm6 ; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6 ; AVX1-NEXT: movswl %dx, %eax @@ -1995,25 +1995,25 @@ ; AVX2-LABEL: cvt_8i16_to_8f64: ; AVX2: # BB#0: ; AVX2-NEXT: vmovq %xmm0, %rdx -; AVX2-NEXT: movq %rdx, %r9 +; AVX2-NEXT: movq %rdx, %r8 ; AVX2-NEXT: movl %edx, %r10d -; AVX2-NEXT: movswl %dx, %r8d +; AVX2-NEXT: movswl %dx, %r9d ; AVX2-NEXT: shrq $48, %rdx -; AVX2-NEXT: shrq $32, %r9 +; AVX2-NEXT: shrq $32, %r8 ; AVX2-NEXT: shrl $16, %r10d ; AVX2-NEXT: vpextrq $1, %xmm0, %rdi -; AVX2-NEXT: movq %rdi, %rsi -; AVX2-NEXT: movl %edi, %eax +; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: movl %edi, %esi ; AVX2-NEXT: movswl %di, %ecx ; AVX2-NEXT: shrq $48, %rdi -; AVX2-NEXT: shrq $32, %rsi -; AVX2-NEXT: shrl $16, %eax -; AVX2-NEXT: cwtl -; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: shrq $32, %rax +; AVX2-NEXT: shrl $16, %esi +; AVX2-NEXT: movswl %si, %esi +; AVX2-NEXT: vmovd %esi, %xmm0 ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1 ; AVX2-NEXT: vmovd %ecx, %xmm0 ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2 -; AVX2-NEXT: movswl %si, %eax +; AVX2-NEXT: cwtl ; AVX2-NEXT: vmovd %eax, %xmm0 ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3 ; AVX2-NEXT: movswl %di, %eax @@ -2022,9 +2022,9 @@ ; AVX2-NEXT: movswl %r10w, %eax ; AVX2-NEXT: vmovd %eax, %xmm0 ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX2-NEXT: vmovd %r8d, %xmm5 +; AVX2-NEXT: vmovd %r9d, %xmm5 ; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX2-NEXT: movswl %r9w, %eax +; AVX2-NEXT: movswl %r8w, %eax ; AVX2-NEXT: vmovd %eax, %xmm6 ; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6 ; AVX2-NEXT: movswl %dx, %eax