Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -32499,6 +32499,32 @@ return SDValue(N, 0); } + // Look for a v2i64/v2f64 VZEXT_MOVL of a node that already produces zeros + // in the upper 64 bits. + // TODO: Can we generalize this using computeKnownBits. + if (N->getOpcode() == X86ISD::VZEXT_MOVL && + (VT == MVT::v2f64 || VT == MVT::v2i64) && + N->getOperand(0).getOpcode() == ISD::BITCAST && + (N->getOperand(0).getOperand(0).getValueType() == MVT::v4f32 || + N->getOperand(0).getOperand(0).getValueType() == MVT::v4i32)) { + SDValue In = N->getOperand(0).getOperand(0); + switch (In.getOpcode()) { + default: + break; + case X86ISD::CVTP2SI: case X86ISD::CVTP2UI: + case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI: + case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI: + case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI: + case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: + case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P: + case X86ISD::VFPROUND: case X86ISD::VMFPROUND: + if (In.getOperand(0).getValueType() == MVT::v2f64 || + In.getOperand(0).getValueType() == MVT::v2i64) + return N->getOperand(0); // return the bitcast + break; + } + } + // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the // operands is an extend from v2i32 to v2i64. Turn it into a pmulld. // FIXME: This can probably go away once we default to widening legalization. Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -8184,12 +8184,6 @@ v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), - (VCVTPD2PSZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), - (VCVTPD2PSZ128rm addr:$src)>; def : Pat<(v2f64 (extloadv2f32 addr:$src)), (VCVTPS2PDZ128rm addr:$src)>; def : Pat<(v4f64 (extloadv4f32 addr:$src)), @@ -8831,25 +8825,6 @@ } let Predicates = [HasAVX512, HasVLX] in { - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), - (VCVTPD2DQZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), - (VCVTPD2DQZ128rm addr:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))), - (VCVTPD2UDQZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), - (VCVTTPD2DQZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), - (VCVTTPD2DQZ128rm addr:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))), - (VCVTTPD2UDQZ128rr VR128X:$src)>; - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (VCVTDQ2PDZ128rm addr:$src)>; def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), @@ -8862,13 +8837,6 @@ } let Predicates = [HasDQI, HasVLX] in { - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))), - (VCVTQQ2PSZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))), - (VCVTUQQ2PSZ128rr VR128X:$src)>; - // Special patterns to allow use of X86VMSintToFP for masking. Instruction // patterns have been disabled with null_frag. def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))), Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -1617,21 +1617,6 @@ (VCVTTPD2DQYrm addr:$src)>; } -let Predicates = [HasAVX, NoVLX] in { - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), - (VCVTPD2DQrr VR128:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), - (VCVTPD2DQrm addr:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), - (VCVTTPD2DQrr VR128:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), - (VCVTTPD2DQrm addr:$src)>; -} // Predicates = [HasAVX, NoVLX] - def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -1643,21 +1628,6 @@ (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>, Sched<[WriteCvtPD2ILd]>; -let Predicates = [UseSSE2] in { - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), - (CVTPD2DQrr VR128:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))), - (CVTPD2DQrm addr:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), - (CVTTPD2DQrr VR128:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))), - (CVTTPD2DQrm addr:$src)>; -} // Predicates = [UseSSE2] - // Convert packed single to packed double let Predicates = [HasAVX, NoVLX] in { // SSE2 instructions without OpSize prefix @@ -1790,33 +1760,11 @@ [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>, Sched<[WriteCvtPD2PS.Folded]>; -// AVX 256-bit register conversion intrinsics -// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below -// whenever possible to avoid declaring two versions of each one. - let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), (VCVTPD2PSYrr VR256:$src)>; def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), (VCVTPD2PSYrm addr:$src)>; - - // Match fpround and fpextend for 128/256-bit conversions - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128:$src)))))), - (VCVTPD2PSrr VR128:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), - (VCVTPD2PSrm addr:$src)>; -} - -let Predicates = [UseSSE2] in { - // Match fpround and fpextend for 128 conversions - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128:$src)))))), - (CVTPD2PSrr VR128:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (memopv2f64 addr:$src)))))), - (CVTPD2PSrm addr:$src)>; } //===----------------------------------------------------------------------===// Index: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -223,8 +223,6 @@ ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] ; X86-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8] -; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X86-NEXT: # xmm1 = xmm1[0],zero ; X86-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0] ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -233,8 +231,6 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8] -; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X64-NEXT: # xmm1 = xmm1[0],zero ; X64-NEXT: vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0] ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -445,8 +441,6 @@ ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8] -; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X86-NEXT: # xmm1 = xmm1[0],zero ; X86-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0] ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -455,8 +449,6 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8] -; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X64-NEXT: # xmm1 = xmm1[0],zero ; X64-NEXT: vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0] ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; X64-NEXT: retq # encoding: [0xc3] Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3327,8 +3327,6 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8] -; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X86-NEXT: # xmm1 = xmm1[0],zero ; X86-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -3337,8 +3335,6 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8] -; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X64-NEXT: # xmm1 = xmm1[0],zero ; X64-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -3381,8 +3377,6 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8] -; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X86-NEXT: # xmm1 = xmm1[0],zero ; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] ; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -3391,8 +3385,6 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8] -; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X64-NEXT: # xmm1 = xmm1[0],zero ; X64-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] ; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -3435,8 +3427,6 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8] -; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X86-NEXT: # xmm1 = xmm1[0],zero ; X86-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -3445,8 +3435,6 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8] -; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X64-NEXT: # xmm1 = xmm1[0],zero ; X64-NEXT: vcvtpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -3616,8 +3604,6 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8] -; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X86-NEXT: # xmm1 = xmm1[0],zero ; X86-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -3626,8 +3612,6 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8] -; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X64-NEXT: # xmm1 = xmm1[0],zero ; X64-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -3670,8 +3654,6 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8] -; X86-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X86-NEXT: # xmm1 = xmm1[0],zero ; X86-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -3680,8 +3662,6 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8] -; X64-NEXT: vmovq %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] -; X64-NEXT: # xmm1 = xmm1[0],zero ; X64-NEXT: vcvttpd2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3]