Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1579,7 +1579,8 @@ // entire split argument. if (Arg->Flags.isSplit()) { while (!Arg->Flags.isSplitEnd()) { - assert(!Arg->VT.isVector() && + assert((!Arg->VT.isVector() || + Arg->VT.getScalarSizeInBits() == 16) && "unexpected vector split in ps argument type"); if (!SkipArg) Splits.push_back(*Arg); Index: test/CodeGen/AMDGPU/calling-conventions.ll =================================================================== --- test/CodeGen/AMDGPU/calling-conventions.ll +++ test/CodeGen/AMDGPU/calling-conventions.ll @@ -199,6 +199,37 @@ ret void } +; FIXME: Differenet ABI for VI+ +; GCN-LABEL: {{^}}ps_mesa_v4f16: +; SI: v_cvt_f16_f32_e32 v3, v3 +; SI: v_cvt_f16_f32_e32 v2, v2 +; SI: v_cvt_f16_f32_e32 v1, v1 +; SI: v_cvt_f16_f32_e32 v0, v0 + +; VI: v_add_f16_e32 v2, 1.0, v1 +; VI: v_add_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI: v_add_f16_e32 v4, 1.0, v0 +; VI: v_add_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +define amdgpu_ps <4 x half> @ps_mesa_v4f16(<4 x half> %arg0) { + %add = fadd <4 x half> %arg0, + ret <4 x half> %add +} + +; GCN-LABEL: {{^}}ps_mesa_inreg_v4f16: +; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, s3 +; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, s2 +; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, s1 +; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, s0 + +; VI: v_add_f16_e64 +; VI: v_add_f16_sdwa +; VI: v_add_f16_e64 +; VI: v_add_f16_sdwa +define amdgpu_ps <4 x half> @ps_mesa_inreg_v4f16(<4 x half> inreg %arg0) { + %add = fadd <4 x half> %arg0, + ret <4 x half> %add +} + ; GCN-LABEL: {{^}}ps_mesa_inreg_v3i32: ; GCN-DAG: s_add_i32 s0, s0, 1 ; GCN-DAG: s_add_i32 s{{[0-9]*}}, s1, 2