Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12407,7 +12407,43 @@ for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); SDValue Val = St->getValue(); - // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. + // Peek through bitcasts. + if (!St->isTruncatingStore()) + while (Val.getOpcode() == ISD::BITCAST) + Val = Val.getOperand(0); + // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of type MemVT. + // If the underlying value is not the correct type, but it is an + // extraction of an appropriate vector we can recast Val to be of the + // correct type. + if (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Val.getOperand(0).getOpcode() == ISD::BITCAST) { + SDValue Vec = Val.getOperand(0).getOperand(0); + // We may need to add a bitcast here to get types to line up. + if (MemVT != Val.getValueType()) { + EVT NewVecScalarTy = MemVT.getScalarType(); + unsigned Elts = Vec.getValueType().getSizeInBits() / NewVecScalarTy.getSizeInBits(); + EVT NewVecTy = EVT::getVectorVT(*DAG.getContext(), NewVecScalarTy, Elts); + Vec = DAG.getBitcast(NewVecTy, Vec); + } + if (Vec.getValueType().getVectorElementType() == MemVT) + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Val), MemVT, Vec, + Val.getOperand(1)); + } + else if (Val.getOpcode() == ISD::EXTRACT_SUBVECTOR && + Val.getOperand(0).getOpcode() == ISD::BITCAST) { + SDValue Vec = Val.getOperand(0).getOperand(0); + // We may need to add a bitcast here to get types to line up. + if (MemVT != Val.getValueType()){ + EVT NewVecScalarTy = MemVT.getScalarType(); + unsigned Elts = Vec.getValueType().getSizeInBits() / NewVecScalarTy.getSizeInBits(); + EVT NewVecTy = EVT::getVectorVT(*DAG.getContext(), NewVecScalarTy, Elts); + Vec = DAG.getBitcast(NewVecTy, Vec); + } + if (Vec.getValueType().getVectorElementType() == MemVT.getVectorElementType()) + Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Val), MemVT, Vec, + Val.getOperand(1)); + } + // TODO: add this check for EXTRACT_SUBVECTOR as well. if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); @@ -12486,6 +12522,12 @@ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); EVT MemVT = St->getMemoryVT(); + SDValue Val = St->getValue(); + // Only peek through bitcasts of non-truncstores + if (!St->isTruncatingStore()) + while (Val.getValueType() != MemVT && Val->getOpcode() == ISD::BITCAST) + Val = Val.getOperand(0); + // We must have a base and an offset. if (!BasePtr.getBase().getNode()) return; @@ -12496,9 +12538,8 @@ bool IsConstantSrc = isa(St->getValue()) || isa(St->getValue()); - bool IsExtractVecSrc = - (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); bool IsLoadSrc = isa(St->getValue()); BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. @@ -12510,12 +12551,11 @@ int64_t &Offset) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; - // We can merge constant floats to equivalent integers - if (Other->getMemoryVT() != MemVT) - if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) && - isa(Other->getValue()))) - return false; if (IsLoadSrc) { + // Allow loads of different types to merge as integers. + if (MemVT.isInteger() ? !MemVT.bitsEq(Other->getMemoryVT()) + : Other->getMemoryVT() != MemVT) + return false; // The Load's Base Ptr must also match if (LoadSDNode *OtherLd = dyn_cast(Other->getValue())) { auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); @@ -12524,14 +12564,26 @@ } else return false; } - if (IsConstantSrc) + if (IsConstantSrc) { + // Allow merging constants of different types as integers. + if (MemVT.isInteger() ? !MemVT.bitsEq(Other->getMemoryVT()) + : Other->getMemoryVT() != MemVT) + return false; if (!(isa(Other->getValue()) || isa(Other->getValue()))) return false; - if (IsExtractVecSrc) - if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) + } + if (IsExtractVecSrc) { + // Peek through bitcasts. + SDValue Val = Other->getValue(); + if (!Other->isTruncatingStore()) + while (Val.getOpcode() == ISD::BITCAST) + Val = Val.getOperand(0); + if (!MemVT.bitsEq(Val.getValueType()) || + !(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) return false; + } Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; @@ -12623,6 +12675,11 @@ // Perform an early exit check. Do not bother looking at stored values that // are not constants, loads, or extracted vector elements. SDValue StoredVal = St->getValue(); + + if (!St->isTruncatingStore()) + while (StoredVal->getOpcode() == ISD::BITCAST) + StoredVal = StoredVal.getOperand(0); + bool IsLoadSrc = isa(StoredVal); bool IsConstantSrc = isa(StoredVal) || isa(StoredVal); @@ -12809,14 +12866,18 @@ bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); - unsigned StoreValOpcode = St->getValue().getOpcode(); + SDValue StVal = St->getValue(); + // Peek through bitcasts + while (StVal->getOpcode() == ISD::BITCAST) + StVal = StVal.getOperand(0); + // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a // vector. It should be possible to handle mixed sources, but load // sources need more careful handling (see the block of code below that // handles consecutive loads). - if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && - StoreValOpcode != ISD::EXTRACT_SUBVECTOR) + if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT && + StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR) return RV; // Find a legal type for the vector store. Index: test/CodeGen/X86/MergeConsecutiveStores.ll =================================================================== --- test/CodeGen/X86/MergeConsecutiveStores.ll +++ test/CodeGen/X86/MergeConsecutiveStores.ll @@ -622,9 +622,6 @@ ret void ; CHECK-LABEL: merge_bitcast -; CHECK: vmovd %xmm0, (%rdi) -; CHECK-NEXT: vpextrd $1, %xmm0, 4(%rdi) -; CHECK-NEXT: vpextrd $2, %xmm0, 8(%rdi) -; CHECK-NEXT: vpextrd $3, %xmm0, 12(%rdi) +; CHECK: vmovups %xmm0, (%rdi) ; CHECK-NEXT: retq }