Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -101,6 +101,8 @@
   setOperationAction(ISD::LOAD, MVT::i64, Promote);
   AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
 
+  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+
   setOperationAction(ISD::STORE, MVT::v8i32, Custom);
   setOperationAction(ISD::STORE, MVT::v16i32, Custom);
 
@@ -113,6 +115,8 @@
   setOperationAction(ISD::STORE, MVT::i64, Promote);
   AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
 
+  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+
   setOperationAction(ISD::SELECT, MVT::i64, Custom);
   setOperationAction(ISD::SELECT, MVT::f64, Promote);
   AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
@@ -1802,7 +1806,6 @@
   assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
          "Custom lowering for non-i32 vectors hasn't been implemented.");
   unsigned NumElements = MemVT.getVectorNumElements();
-  assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
 
   switch (Load->getAddressSpace()) {
   case AMDGPUAS::CONSTANT_ADDRESS:
@@ -1839,9 +1842,22 @@
       llvm_unreachable("unsupported private_element_size");
     }
   }
-  case AMDGPUAS::LOCAL_ADDRESS:
+  case AMDGPUAS::LOCAL_ADDRESS: {
+    if (NumElements > 2)
+      return SplitVectorLoad(Op, DAG);
+
+    if (Load->getAlignment() < 4) {
+      SDValue Ops[2];
+      std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
+      return DAG.getMergeValues(Ops, DL);
+    }
+
+    if (NumElements == 2)
+      return SDValue();
+
     // If properly aligned, if we split we might be able to use ds_read_b64.
     return SplitVectorLoad(Op, DAG);
+  }
   default:
     return SDValue();
   }
@@ -2071,9 +2087,19 @@
       llvm_unreachable("unsupported private_element_size");
     }
   }
-  case AMDGPUAS::LOCAL_ADDRESS:
+  case AMDGPUAS::LOCAL_ADDRESS: {
+    if (NumElements > 2)
+      return SplitVectorStore(Op, DAG);
+
+    if (Store->getAlignment() < 4)
+      return expandUnalignedStore(Store, DAG);
+
+    if (NumElements == 2)
+      return Op;
+
     // If properly aligned, if we split we might be able to use ds_write_b64.
     return SplitVectorStore(Op, DAG);
+  }
   default:
     llvm_unreachable("unhandled address space");
   }