Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1886,12 +1886,22 @@
       llvm_unreachable("unsupported private_element_size");
     }
   }
-  case AMDGPUAS::LOCAL_ADDRESS:
+  case AMDGPUAS::LOCAL_ADDRESS: {
+    if (NumElements > 2)
+      return SplitVectorLoad(Op, DAG);
+
+    if (Load->getAlignment() < 4) {
+      SDValue Ops[2];
+      std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
+      return DAG.getMergeValues(Ops, DL);
+    }
+
     if (NumElements == 2)
-      return Op;
+      return SDValue();
 
     // If properly aligned, if we split we might be able to use ds_read_b64.
     return SplitVectorLoad(Op, DAG);
+  }
   default:
     return SDValue();
   }
@@ -2121,12 +2131,19 @@
       llvm_unreachable("unsupported private_element_size");
     }
   }
-  case AMDGPUAS::LOCAL_ADDRESS:
+  case AMDGPUAS::LOCAL_ADDRESS: {
+    if (NumElements > 2)
+      return SplitVectorStore(Op, DAG);
+
+    if (Store->getAlignment() < 4)
+      return expandUnalignedStore(Store, DAG);
+
     if (NumElements == 2)
       return Op;
 
     // If properly aligned, if we split we might be able to use ds_write_b64.
     return SplitVectorStore(Op, DAG);
+  }
   default:
     llvm_unreachable("unhandled address space");
   }
Index: test/CodeGen/AMDGPU/unaligned-load-store.ll
===================================================================
--- test/CodeGen/AMDGPU/unaligned-load-store.ll
+++ test/CodeGen/AMDGPU/unaligned-load-store.ll
@@ -65,13 +65,36 @@
 ; SI: ds_read_u8
 ; SI: ds_read_u8
 ; SI: ds_read_u8
+
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
 ; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
 ; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
 ; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
 ; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
 ; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
 ; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
 ; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
@@ -80,6 +103,53 @@
   ret void
 }
 
+; SI-LABEL: {{^}}unaligned_load_store_v2i32_local:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+; SI: ds_write_b8
+; SI: s_endpgm
+define void @unaligned_load_store_v2i32_local(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
+  %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
+  store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
+  ret void
+}
+
 ; SI-LABEL: {{^}}unaligned_load_store_i64_global:
 ; SI: buffer_load_ubyte
 ; SI: buffer_load_ubyte