Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -101,6 +101,8 @@ setOperationAction(ISD::LOAD, MVT::i64, Promote); AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::STORE, MVT::v8i32, Custom); setOperationAction(ISD::STORE, MVT::v16i32, Custom); @@ -113,6 +115,8 @@ setOperationAction(ISD::STORE, MVT::i64, Promote); AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f64, Promote); AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); @@ -1802,7 +1806,6 @@ assert(Op.getValueType().getVectorElementType() == MVT::i32 && "Custom lowering for non-i32 vectors hasn't been implemented."); unsigned NumElements = MemVT.getVectorNumElements(); - assert(NumElements != 2 && "v2 loads are supported for all address spaces."); switch (Load->getAddressSpace()) { case AMDGPUAS::CONSTANT_ADDRESS: @@ -1839,9 +1842,22 @@ llvm_unreachable("unsupported private_element_size"); } } - case AMDGPUAS::LOCAL_ADDRESS: + case AMDGPUAS::LOCAL_ADDRESS: { + if (NumElements > 2) + return SplitVectorLoad(Op, DAG); + + if (Load->getAlignment() < 4) { + SDValue Ops[2]; + std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); + return DAG.getMergeValues(Ops, DL); + } + + if (NumElements == 2) + return SDValue(); + // If properly aligned, if we split we might be able to use ds_read_b64. return SplitVectorLoad(Op, DAG); + } default: return SDValue(); } @@ -2071,9 +2087,19 @@ llvm_unreachable("unsupported private_element_size"); } } - case AMDGPUAS::LOCAL_ADDRESS: + case AMDGPUAS::LOCAL_ADDRESS: { + if (NumElements > 2) + return SplitVectorStore(Op, DAG); + + if (Store->getAlignment() < 4) + return expandUnalignedStore(Store, DAG); + + if (NumElements == 2) + return Op; + // If properly aligned, if we split we might be able to use ds_write_b64. return SplitVectorStore(Op, DAG); + } default: llvm_unreachable("unhandled address space"); }