Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12393,7 +12393,22 @@ for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); SDValue Val = St->getValue(); - // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. + // Peek through bitcasts. + if (!St->isTruncatingStore()) + while (Val.getOpcode() == ISD::BITCAST) + Val = Val.getOperand(0); + // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of type MemVT. + // If the underlying value is not the correct type, but it is an + // extraction of an appropriate vector we can recast Val to be of the + // correct type. + if (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Val.getOperand(0).getOpcode() == ISD::BITCAST) { + SDValue Vec = Val.getOperand(0).getOperand(0); + if (Vec.getValueType().getVectorElementType() == MemVT) + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Val), MemVT, Vec, + Val.getOperand(1)); + } + // TODO: add this check for EXTRACT_SUBVECTOR as well. if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); @@ -12472,6 +12487,12 @@ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); EVT MemVT = St->getMemoryVT(); + SDValue Val = St->getValue(); + // Only peek through bitcasts of non-truncstores + if (!St->isTruncatingStore()) + while (Val.getValueType() != MemVT && Val.getOpcode() == ISD::BITCAST) + Val = Val.getOperand(0); + // We must have a base and an offset. if (!BasePtr.getBase().getNode()) return; @@ -12482,9 +12503,8 @@ bool IsConstantSrc = isa(St->getValue()) || isa(St->getValue()); - bool IsExtractVecSrc = - (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); bool IsLoadSrc = isa(St->getValue()); BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. @@ -12496,12 +12516,11 @@ int64_t &Offset) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; - // We can merge constant floats to equivalent integers - if (Other->getMemoryVT() != MemVT) - if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) && - isa(Other->getValue()))) - return false; if (IsLoadSrc) { + // Allow loads of different types to merge as integers. + if (MemVT.isInteger() ? !MemVT.bitsEq(Other->getMemoryVT()) + : Other->getMemoryVT() != MemVT) + return false; // The Load's Base Ptr must also match if (LoadSDNode *OtherLd = dyn_cast(Other->getValue())) { auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); @@ -12510,14 +12529,26 @@ } else return false; } - if (IsConstantSrc) + if (IsConstantSrc) { + // Allow merging constants of different types as integers. + if (MemVT.isInteger() ? !MemVT.bitsEq(Other->getMemoryVT()) + : Other->getMemoryVT() != MemVT) + return false; if (!(isa(Other->getValue()) || isa(Other->getValue()))) return false; - if (IsExtractVecSrc) - if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) + } + if (IsExtractVecSrc) { + // Peek through bitcasts. + SDValue Val = Other->getValue(); + if (!Other->isTruncatingStore()) + while (Val.getOpcode() == ISD::BITCAST) + Val = Val.getOperand(0); + if (!MemVT.bitsEq(Val.getValueType()) || + !(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) return false; + } Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; @@ -12789,14 +12820,18 @@ bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); - unsigned StoreValOpcode = St->getValue().getOpcode(); + SDValue StVal = St->getValue(); + // Peek through bitcasts. + while (StVal.getOpcode() == ISD::BITCAST) + StVal = StVal.getOperand(0); + // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a // vector. It should be possible to handle mixed sources, but load // sources need more careful handling (see the block of code below that // handles consecutive loads). - if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && - StoreValOpcode != ISD::EXTRACT_SUBVECTOR) + if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT && + StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR) return RV; // Find a legal type for the vector store. Index: test/CodeGen/X86/MergeConsecutiveStores.ll =================================================================== --- test/CodeGen/X86/MergeConsecutiveStores.ll +++ test/CodeGen/X86/MergeConsecutiveStores.ll @@ -622,9 +622,6 @@ ret void ; CHECK-LABEL: merge_bitcast -; CHECK: vmovd %xmm0, (%rdi) -; CHECK-NEXT: vpextrd $1, %xmm0, 4(%rdi) -; CHECK-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; CHECK-NEXT: vpextrd $3, %xmm0, 12(%rdi) +; CHECK: vmovups %xmm0, (%rdi) ; CHECK-NEXT: retq }