Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4153,7 +4153,8 @@ if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { - if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { + if (!isOperationLegalOrCustom(ISD::LOAD, intVT) && + LoadedVT.isVector()) { // Scalarize the load and let the individual components be handled. SDValue Scalarized = scalarizeVectorLoad(LD, DAG); if (Scalarized->getOpcode() == ISD::MERGE_VALUES) @@ -4303,13 +4304,14 @@ EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); auto &MF = DAG.getMachineFunction(); + EVT MemVT = ST->getMemoryVT(); SDLoc dl(ST); - if (ST->getMemoryVT().isFloatingPoint() || - ST->getMemoryVT().isVector()) { + if (MemVT.isFloatingPoint() || MemVT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); if (isTypeLegal(intVT)) { - if (!isOperationLegalOrCustom(ISD::STORE, intVT)) { + if (!isOperationLegalOrCustom(ISD::STORE, intVT) && + MemVT.isVector()) { // Scalarize the store and let the individual components be handled. SDValue Result = scalarizeVectorStore(ST, DAG); Index: test/CodeGen/AMDGPU/unaligned-load-store.ll =================================================================== --- test/CodeGen/AMDGPU/unaligned-load-store.ll +++ test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -601,4 +601,68 @@ ret void } +; SI-LABEL: {{^}}private_load_align1_f64: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +define double @private_load_align1_f64(double addrspace(5)* %in) { + %x = load double, double addrspace(5)* %in, align 1 + ret double %x +} + +; SI-LABEL: {{^}}private_store_align1_f64: +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +define void @private_store_align1_f64(double addrspace(5)* %out, double %x) #0 { + store double %x, double addrspace(5)* %out, align 1 + ret void +} + +; SI-LABEL: {{^}}private_load_align4_f64: +; SI: buffer_load_dword +; SI: buffer_load_dword +define double @private_load_align4_f64(double addrspace(5)* %in) { + %x = load double, double addrspace(5)* %in, align 4 + ret double %x +} + +; SI-LABEL: {{^}}private_store_align4_f64: +; SI: buffer_store_dword +; SI: buffer_store_dword +define void @private_store_align4_f64(double addrspace(5)* %out, double %x) #0 { + store double %x, double addrspace(5)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}private_load_align2_f64: +; SI: buffer_load_ushort +; SI: buffer_load_ushort +; SI: buffer_load_ushort +; SI: buffer_load_ushort +define double @private_load_align2_f64(double addrspace(5)* %in) { + %x = load double, double addrspace(5)* %in, align 2 + ret double %x +} + +; SI-LABEL: {{^}}private_store_align2_f64: +; SI: buffer_store_short +; SI: buffer_store_short +; SI: buffer_store_short +; SI: buffer_store_short +define void @private_store_align2_f64(double addrspace(5)* %out, double %x) #0 { + store double %x, double addrspace(5)* %out, align 2 + ret void +} + attributes #0 = { nounwind }