Index: lib/Target/R600/AMDGPUISelLowering.h =================================================================== --- lib/Target/R600/AMDGPUISelLowering.h +++ lib/Target/R600/AMDGPUISelLowering.h @@ -73,9 +73,19 @@ virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const; - /// \brief Split a vector load into multiple scalar loads. - SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const; + + /// \brief Split a vector load into a scalar load of each component. + SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const; + + /// \brief Split a vector load into 2 loads of half the vector. + SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; + + /// \brief Split a vector store into a scalar store of each component. + SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const; + + /// \brief Split a vector store into 2 stores of half the vector. SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/R600/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/R600/AMDGPUISelLowering.cpp +++ lib/Target/R600/AMDGPUISelLowering.cpp @@ -1011,12 +1011,14 @@ return SDValue(); } -SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, - SelectionDAG &DAG) const { - LoadSDNode *Load = dyn_cast(Op); - EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); +SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op, + SelectionDAG &DAG) const { + LoadSDNode *Load = cast(Op); + EVT MemVT = Load->getMemoryVT(); + EVT MemEltVT = MemVT.getVectorElementType(); + EVT LoadVT = Op.getValueType(); - EVT EltVT = Op.getValueType().getVectorElementType(); + EVT EltVT = LoadVT.getVectorElementType(); EVT PtrVT = Load->getBasePtr().getValueType(); unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); @@ -1024,15 +1026,17 @@ SmallVector Chains; SDLoc SL(Op); + unsigned MemEltSize = MemEltVT.getStoreSize(); + MachinePointerInfo SrcValue(Load->getMemOperand()->getValue()); - for (unsigned i = 0, e = NumElts; i != e; ++i) { + for (unsigned i = 0; i < NumElts; ++i) { SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), - DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); + DAG.getConstant(i * MemEltSize, PtrVT)); SDValue NewLoad = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, Load->getChain(), Ptr, - MachinePointerInfo(Load->getMemOperand()->getValue()), + SrcValue.getWithOffset(i * MemEltSize), MemEltVT, Load->isVolatile(), Load->isNonTemporal(), Load->getAlignment()); Loads.push_back(NewLoad.getValue(0)); @@ -1047,6 +1051,55 @@ return DAG.getMergeValues(Ops, SL); } +SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + + // If this is a 2 element vector, we really want to scalarize and not create + // weird 1 element vectors. + if (VT.getVectorNumElements() == 2) + return ScalarizeVectorLoad(Op, DAG); + + LoadSDNode *Load = cast(Op); + SDValue BasePtr = Load->getBasePtr(); + EVT PtrVT = BasePtr.getValueType(); + EVT MemVT = Load->getMemoryVT(); + SDLoc SL(Op); + MachinePointerInfo SrcValue(Load->getMemOperand()->getValue()); + + EVT LoVT, HiVT; + EVT LoMemVT, HiMemVT; + SDValue Lo, Hi; + + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT); + std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT); + SDValue LoLoad + = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, + Load->getChain(), BasePtr, + SrcValue, + LoMemVT, Load->isVolatile(), Load->isNonTemporal(), + Load->getAlignment()); + + SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(LoMemVT.getStoreSize(), PtrVT)); + + SDValue HiLoad + = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, + Load->getChain(), HiPtr, + SrcValue.getWithOffset(LoMemVT.getStoreSize()), + HiMemVT, Load->isVolatile(), Load->isNonTemporal(), + Load->getAlignment()); + + SDValue Ops[] = { + DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad), + DAG.getNode(ISD::TokenFactor, SL, MVT::Other, + LoLoad.getValue(1), HiLoad.getValue(1)) + }; + + return DAG.getMergeValues(Ops, SL); +} + SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const { StoreSDNode *Store = cast(Op); @@ -1105,8 +1158,8 @@ Store->getAlignment()); } -SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, - SelectionDAG &DAG) const { +SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op, + SelectionDAG &DAG) const { StoreSDNode *Store = cast(Op); EVT MemEltVT = Store->getMemoryVT().getVectorElementType(); EVT EltVT = Store->getValue().getValueType().getVectorElementType(); @@ -1116,21 +1169,77 @@ SmallVector Chains; + unsigned EltSize = MemEltVT.getStoreSize(); + MachinePointerInfo SrcValue(Store->getMemOperand()->getValue()); + for (unsigned i = 0, e = NumElts; i != e; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, - Store->getValue(), DAG.getConstant(i, MVT::i32)); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, - Store->getBasePtr(), - DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), - PtrVT)); - Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, - MachinePointerInfo(Store->getMemOperand()->getValue()), - MemEltVT, Store->isVolatile(), Store->isNonTemporal(), - Store->getAlignment())); + Store->getValue(), + DAG.getConstant(i, MVT::i32)); + + SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset); + SDValue NewStore = + DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, + SrcValue.getWithOffset(i * EltSize), + MemEltVT, Store->isNonTemporal(), Store->isVolatile(), + Store->getAlignment()); + Chains.push_back(NewStore); } + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains); } +SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, + SelectionDAG &DAG) const { + StoreSDNode *Store = cast(Op); + SDValue Val = Store->getValue(); + EVT VT = Val.getValueType(); + + // If this is a 2 element vector, we really want to scalarize and not create + // weird 1 element vectors. + if (VT.getVectorNumElements() == 2) + return ScalarizeVectorStore(Op, DAG); + + EVT MemVT = Store->getMemoryVT(); + SDValue Chain = Store->getChain(); + SDValue BasePtr = Store->getBasePtr(); + SDLoc SL(Op); + + EVT LoVT, HiVT; + EVT LoMemVT, HiMemVT; + SDValue Lo, Hi; + + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT); + std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT); + + EVT PtrVT = BasePtr.getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(LoMemVT.getStoreSize(), PtrVT)); + + MachinePointerInfo SrcValue(Store->getMemOperand()->getValue()); + SDValue LoStore + = DAG.getTruncStore(Chain, SL, Lo, + BasePtr, + SrcValue, + LoMemVT, + Store->isNonTemporal(), + Store->isVolatile(), + Store->getAlignment()); + SDValue HiStore + = DAG.getTruncStore(Chain, SL, Hi, + HiPtr, + SrcValue.getWithOffset(LoMemVT.getStoreSize()), + HiMemVT, + Store->isNonTemporal(), + Store->isVolatile(), + Store->getAlignment()); + + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore); +} + + SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *Load = cast(Op); @@ -1227,7 +1336,7 @@ if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && Store->getValue().getValueType().isVector()) { - return SplitVectorStore(Op, DAG); + return ScalarizeVectorStore(Op, DAG); } EVT MemVT = Store->getMemoryVT(); Index: lib/Target/R600/R600ISelLowering.cpp =================================================================== --- lib/Target/R600/R600ISelLowering.cpp +++ lib/Target/R600/R600ISelLowering.cpp @@ -1543,7 +1543,7 @@ if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { SDValue MergedValues[2] = { - SplitVectorLoad(Op, DAG), + ScalarizeVectorLoad(Op, DAG), Chain }; return DAG.getMergeValues(MergedValues, DL); Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -923,7 +923,7 @@ break; // fall-through case AMDGPUAS::LOCAL_ADDRESS: - return SplitVectorLoad(Op, DAG); + return ScalarizeVectorLoad(Op, DAG); } } @@ -1073,7 +1073,7 @@ if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { if (VT.isVector() && VT.getVectorNumElements() > 4) - return SplitVectorStore(Op, DAG); + return ScalarizeVectorStore(Op, DAG); return SDValue(); } @@ -1082,7 +1082,7 @@ return Ret; if (VT.isVector() && VT.getVectorNumElements() >= 8) - return SplitVectorStore(Op, DAG); + return ScalarizeVectorStore(Op, DAG); if (VT == MVT::i1) return DAG.getTruncStore(Store->getChain(), DL,