Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12319,8 +12319,21 @@ StoreSDNode *St = cast(StoreNodes[i].MemNode); SDValue Val = St->getValue(); // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. + while (Val.getValueType() != MemVT && Val->getOpcode() == ISD::BITCAST) + Val = Val.getOperand(0); + // If we see an extract of a Bitcast, and we could remove both + // bitcasts, do so. + if (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Val.getOperand(0).getOpcode() == ISD::BITCAST) { + SDValue Vec = Val.getOperand(0).getOperand(0); + if (Vec.getValueType().getVectorElementType() == MemVT) + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Val), MemVT, Vec, + Val.getOperand(1)); + } + // TODO: add this check for EXTRACT_SUBVECTOR as well. if (Val.getValueType() != MemVT) return false; + Ops.push_back(Val); } @@ -12395,8 +12408,13 @@ // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + EVT MemVT = St->getMemoryVT(); + SDValue Val = St->getValue(); + while (Val.getValueType() != MemVT && Val->getOpcode() == ISD::BITCAST) + Val = Val.getOperand(0); + // We must have a base and an offset. if (!BasePtr.getBase().getNode()) return; @@ -12407,9 +12425,9 @@ bool IsConstantSrc = isa(St->getValue()) || isa(St->getValue()); - bool IsExtractVecSrc = - (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + + bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); bool IsLoadSrc = isa(St->getValue()); BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. @@ -12419,15 +12437,14 @@ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { + bool EquivType = (Other->getMemoryVT() == MemVT) || + (MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT())); if (Other->isVolatile() || Other->isIndexed()) return false; - // We can merge constant floats to equivalent integers - if (Other->getMemoryVT() != MemVT) - if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) && - isa(Other->getValue()))) - return false; if (IsLoadSrc) { // The Load's Base Ptr must also match + if (!EquivType) + return false; if (LoadSDNode *OtherLd = dyn_cast(Other->getValue())) { auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) @@ -12435,14 +12452,26 @@ } else return false; } - if (IsConstantSrc) - if (!(isa(Other->getValue()) || - isa(Other->getValue()))) + if (IsConstantSrc) { + // May truncate things converted to integers + if (!EquivType || !(isa(Other->getValue()) || + isa(Other->getValue()))) return false; - if (IsExtractVecSrc) - if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) + } + if (IsExtractVecSrc) { + SDValue Val = Other->getValue(); + // Peel off bitcasts. + bool MatchVT = false; + while (Val.getOpcode() == ISD::BITCAST) { + if (Val.getValueType() == MemVT) + MatchVT = true; + Val = Val.getOperand(0); + } + if (!(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) return false; + } + Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; @@ -12714,7 +12743,13 @@ bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); - unsigned StoreValOpcode = St->getValue().getOpcode(); + SDValue StVal = St->getValue(); + // peek through bitcasts + while (StVal->getOpcode() == ISD::BITCAST) + StVal = StVal.getOperand(0); + + unsigned StoreValOpcode = StVal.getOpcode(); + // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a // vector. It should be possible to handle mixed sources, but load Index: test/CodeGen/X86/MergeConsecutiveStores.ll =================================================================== --- test/CodeGen/X86/MergeConsecutiveStores.ll +++ test/CodeGen/X86/MergeConsecutiveStores.ll @@ -622,9 +622,6 @@ ret void ; CHECK-LABEL: merge_bitcast -; CHECK: vmovd %xmm0, (%rdi) -; CHECK-NEXT: vpextrd $1, %xmm0, 4(%rdi) -; CHECK-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; CHECK-NEXT: vpextrd $3, %xmm0, 12(%rdi) +; CHECK: vmovups %xmm0, (%rdi) ; CHECK-NEXT: retq }