diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36402,51 +36402,6 @@ return SDValue(N, 0); } - // Look for a v2i64/v2f64 VZEXT_MOVL of a node that already produces zeros - // in the upper 64 bits. - // TODO: Can we generalize this using computeKnownBits. - if (N->getOpcode() == X86ISD::VZEXT_MOVL && - (VT == MVT::v2f64 || VT == MVT::v2i64) && - N->getOperand(0).getOpcode() == ISD::BITCAST) { - SDValue In = N->getOperand(0).getOperand(0); - EVT InVT = In.getValueType(); - switch (In.getOpcode()) { - default: - break; - case X86ISD::CVTP2SI: case X86ISD::CVTP2UI: - case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI: - case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI: - case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI: - case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: - case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P: - case X86ISD::VFPROUND: case X86ISD::VMFPROUND: - if ((InVT == MVT::v4f32 || InVT == MVT::v4i32) && - (In.getOperand(0).getValueType() == MVT::v2f64 || - In.getOperand(0).getValueType() == MVT::v2i64)) - return N->getOperand(0); // return the bitcast - break; - case X86ISD::STRICT_CVTTP2SI: - case X86ISD::STRICT_CVTTP2UI: - case X86ISD::STRICT_CVTSI2P: - case X86ISD::STRICT_CVTUI2P: - case X86ISD::STRICT_VFPROUND: - if ((InVT == MVT::v4f32 || InVT == MVT::v4i32) && - (In.getOperand(1).getValueType() == MVT::v2f64 || - In.getOperand(1).getValueType() == MVT::v2i64)) - return N->getOperand(0); // return the bitcast - break; - case X86ISD::CVTPS2PH: - case X86ISD::MCVTPS2PH: - if (InVT == MVT::v8i16 && In.getOperand(0).getValueType() == MVT::v4f32) - return N->getOperand(0); // return the bitcast - break; - case X86ISD::STRICT_CVTPS2PH: - if (InVT == MVT::v8i16 && In.getOperand(1).getValueType() == MVT::v4f32) - return N->getOperand(0); // return the bitcast - break; - } - } - // Pull subvector inserts into undef through VZEXT_MOVL by making it an // insert into a zero vector. This helps get VZEXT_MOVL closer to // scalar_to_vectors where 256/512 are canonicalized to an insert and a @@ -36614,6 +36569,38 @@ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; + LLVM_FALLTHROUGH; + } + case X86ISD::CVTP2SI: + case X86ISD::CVTP2UI: + case X86ISD::MCVTP2SI: + case X86ISD::MCVTP2UI: + case X86ISD::CVTTP2SI: + case X86ISD::CVTTP2UI: + case X86ISD::MCVTTP2SI: + case X86ISD::MCVTTP2UI: + case X86ISD::MCVTSI2P: + case X86ISD::MCVTUI2P: + case X86ISD::VFPROUND: + case X86ISD::VMFPROUND: + case X86ISD::CVTPS2PH: + case X86ISD::MCVTPS2PH: { + // Conversions - upper elements are known zero. + int NumSrcElts = Op.getOperand(0).getValueType().getVectorNumElements(); + if (NumElts > NumSrcElts) + KnownZero = APInt::getHighBitsSet(NumElts, NumElts - NumSrcElts); + break; + } + case X86ISD::STRICT_CVTTP2SI: + case X86ISD::STRICT_CVTTP2UI: + case X86ISD::STRICT_CVTSI2P: + case X86ISD::STRICT_CVTUI2P: + case X86ISD::STRICT_VFPROUND: + case X86ISD::STRICT_CVTPS2PH: { + // Strict Conversions - upper elements are known zero. + int NumSrcElts = Op.getOperand(1).getValueType().getVectorNumElements(); + if (NumElts > NumSrcElts) + KnownZero = APInt::getHighBitsSet(NumElts, NumElts - NumSrcElts); break; } case X86ISD::PACKSS: @@ -36702,6 +36689,18 @@ KnownUndef = LHSUndef & RHSUndef; break; } + case X86ISD::VZEXT_MOVL: { + SDValue Src = Op.getOperand(0); + if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, + TLO, Depth + 1)) + return true; + + // If upper demanded elements are already zero then we have nothing to do. + // TODO - move this into shuffle code below. + if ((NumElts - 1) <= (int)(KnownZero | ~DemandedElts).countLeadingOnes()) + return TLO.CombineTo(Op, Src); + break; + } case X86ISD::VBROADCAST: { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType();