diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2465,6 +2465,10 @@ VT, *Store->getMemOperand())) return expandUnalignedStore(Store, DAG); + // v2f16 and v2bf16 don't need special handling. + if (VT == MVT::v2f16 || VT == MVT::v2bf16) + return SDValue(); + if (VT.isVector()) return LowerSTOREVector(Op, DAG); diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -276,8 +276,7 @@ ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0]; ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1]; ; CHECK-DAG: ld.b32 [[E:%r[0-9]+]], [%[[A]]] -; CHECK: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[E]]; -; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]}; +; CHECK-DAG: st.b32 [%[[B]]], [[E]]; ; CHECK: ret; define void @test_ldst_v2f16(ptr %a, ptr %b) { %t1 = load <2 x half>, ptr %a