diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1093,7 +1093,8 @@ /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a /// constant pool entry. - virtual bool isVectorClearMaskLegal(ArrayRef /*Mask*/, + virtual bool isVectorClearMaskLegal(SDValue /*N0*/, SDValue /*N1*/, + ArrayRef /*Mask*/, EVT /*VT*/) const { return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23163,17 +23163,23 @@ if (0 <= Mask[I]) AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; + SDValue BitCastN0 = DAG.getBitcast(IntVT, N0); + SDValue ZeroInIntVT = DAG.getConstant(0, DL, IntVT); // See if a clear mask is legal instead of going via // XformToShuffleWithZero which loses UNDEF mask elements. - if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) - return DAG.getBitcast( - VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), - DAG.getConstant(0, DL, IntVT), ClearMask)); + if (TLI.isVectorClearMaskLegal(BitCastN0, ZeroInIntVT, ClearMask, + IntVT)) + return DAG.getBitcast(VT, + DAG.getVectorShuffle(IntVT, DL, BitCastN0, + ZeroInIntVT, ClearMask)); if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) return DAG.getBitcast( - VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), + VT, DAG.getNode(ISD::AND, DL, IntVT, BitCastN0, DAG.getBuildVector(IntVT, DL, AndMask))); + + if (BitCastN0.use_empty()) + DAG.RemoveDeadNode(BitCastN0.getNode()); } } } @@ -23938,13 +23944,16 @@ // Let's see if the target supports this vector_shuffle. EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); - if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) + SDValue BitcastLHS = DAG.getBitcast(ClearVT, LHS); + SDValue Zero = DAG.getConstant(0, DL, ClearVT); + if (!TLI.isVectorClearMaskLegal(BitcastLHS, Zero, Indices, ClearVT)) { + if (BitcastLHS.use_empty()) + DAG.RemoveDeadNode(BitcastLHS.getNode()); return SDValue(); + } - SDValue Zero = DAG.getConstant(0, DL, ClearVT); - return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, - DAG.getBitcast(ClearVT, LHS), - Zero, Indices)); + return DAG.getBitcast( + VT, DAG.getVectorShuffle(ClearVT, DL, BitcastLHS, Zero, Indices)); }; // Determine maximum split level (byte level masking). diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -566,7 +566,8 @@ /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' /// shuffle mask can be codegen'd directly. - bool isVectorClearMaskLegal(ArrayRef M, EVT VT) const override; + bool isVectorClearMaskLegal(SDValue N0, SDValue N1, ArrayRef M, + EVT VT) const override; /// Return the ISD::SETCC ValueType. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12549,7 +12549,8 @@ isConcatMask(M, VT, VT.getSizeInBits() == 128)); } -bool AArch64TargetLowering::isVectorClearMaskLegal(ArrayRef M, +bool AArch64TargetLowering::isVectorClearMaskLegal(SDValue N0, SDValue N1, + ArrayRef M, EVT VT) const { // Just delegate to the generic legality, clear masks aren't special. return isShuffleMaskLegal(M, VT); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1345,7 +1345,8 @@ /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a /// constant pool entry. - bool isVectorClearMaskLegal(ArrayRef Mask, EVT VT) const override; + bool isVectorClearMaskLegal(SDValue N0, SDValue N1, ArrayRef Mask, + EVT VT) const override; /// Returns true if lowering to a jump table is allowed. bool areJTsAllowed(const Function *Fn) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34582,7 +34582,8 @@ return isTypeLegal(VT.getSimpleVT()); } -bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef Mask, +bool X86TargetLowering::isVectorClearMaskLegal(SDValue N0, SDValue N1, + ArrayRef Mask, EVT VT) const { // Don't convert an 'and' into a shuffle that we don't directly support. // vpblendw and vpshufb for 256-bit vectors are not available on AVX1.