Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -4287,21 +4287,30 @@ } assert(VT == MVT::v2f16 || VT == MVT::v2i16); + assert(!Subtarget->hasVOP3PInsts() && "this should be legal"); SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); - Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo); - Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi); + // Avoid adding defined bits with the zero_extend. + if (Hi.isUndef()) { + Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo); + SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo); + return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo); + } - Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo); + Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi); Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi); SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi, DAG.getConstant(16, SL, MVT::i32)); + if (Lo.isUndef()) + return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi); - SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi); + Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo); + Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo); + SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi); return DAG.getNode(ISD::BITCAST, SL, VT, Or); } Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1465,9 +1465,28 @@ // from S_LSHL_B32's multiple outputs from implicit scc def. def : GCNPat < (v2i16 (build_vector (i16 0), i16:$src1)), - (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0)) + (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16)))) >; +def : GCNPat < + (v2i16 (build_vector i16:$src0, (i16 undef))), + (v2i16 (COPY $src0)) +>; + +def : GCNPat < + (v2f16 (build_vector f16:$src0, (f16 undef))), + (v2f16 (COPY $src0)) +>; + +def : GCNPat < + (v2i16 (build_vector (i16 undef), i16:$src1)), + (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16)))) +>; + +def : GCNPat < + (v2f16 (build_vector (f16 undef), f16:$src1)), + (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16)))) +>; let SubtargetPredicate = HasVOP3PInsts in { def : GCNPat < Index: test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll @@ -0,0 +1,380 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s + +define void @undef_lo_v2i16(i16 %arg0) { +; GFX9-LABEL: undef_lo_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v0 +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_lo_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1 + call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.lo); + ret void +} + +define void @undef_lo_v2f16(half %arg0) { +; GFX9-LABEL: undef_lo_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v0 +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_lo_v2f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.lo = insertelement <2 x half> undef, half %arg0, i32 1 + call void asm sideeffect "; use $0", "v"(<2 x half> %undef.lo); + ret void +} + +define void @undef_lo_op_v2f16(half %arg0) { +; GFX9-LABEL: undef_lo_op_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v0 +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_lo_op_v2f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00 +; GFX8-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, 0x7e00, v0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.lo = insertelement <2 x half> undef, half %arg0, i32 1 + %op = fadd <2 x half> %undef.lo, + call void asm sideeffect "; use $0", "v"(<2 x half> %op); + ret void +} + +define void @undef_lo_op_v2i16(i16 %arg0) { +; GFX9-LABEL: undef_lo_op_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_movk_i32 s6, 0x63 +; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0] +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v0 +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_lo_op_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 0x63 +; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.lo = insertelement <2 x i16> undef, i16 %arg0, i32 1 + %op = add <2 x i16> %undef.lo, + call void asm sideeffect "; use $0", "v"(<2 x i16> %op); + ret void +} + +define void @undef_lo3_v4i16(i16 %arg0) { +; GFX9-LABEL: undef_lo3_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v[0:1] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_lo3_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.lo = insertelement <4 x i16> undef, i16 %arg0, i32 1 + call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo); + ret void +} + +define void @undef_lo3_v4f16(half %arg0) { +; GFX9-LABEL: undef_lo3_v4f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v[0:1] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_lo3_v4f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.lo = insertelement <4 x half> undef, half %arg0, i32 1 + call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo); + ret void +} + +define void @undef_lo2_v4i16(<2 x i16> %arg0) { +; GFX9-LABEL: undef_lo2_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff0000 +; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v[0:1] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_lo2_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> + call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo); + ret void +} + +define void @undef_lo2_v4f16(<2 x half> %arg0) { +; GFX9-LABEL: undef_lo2_v4f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v[0:1] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_lo2_v4f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> + call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo); + ret void +} + +define void @undef_hi_v2i16(i16 %arg0) { +; GFX9-LABEL: undef_hi_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v0 +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_hi_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0 + call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.hi); + ret void +} + +define void @undef_hi_v2f16(half %arg0) { +; GFX9-LABEL: undef_hi_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v0 +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_hi_v2f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.hi = insertelement <2 x half> undef, half %arg0, i32 0 + call void asm sideeffect "; use $0", "v"(<2 x half> %undef.hi); + ret void +} + +define void @undef_hi_op_v2f16(half %arg0) { +; GFX9-LABEL: undef_hi_op_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v0 +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_hi_op_v2f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0 +; GFX8-NEXT: v_or_b32_e32 v0, 0x7e000000, v0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.hi = insertelement <2 x half> undef, half %arg0, i32 0 + %op = fadd <2 x half> %undef.hi, + call void asm sideeffect "; use $0", "v"(<2 x half> %op); + ret void +} + +define void @undef_hi_op_v2i16(i16 %arg0) { +; GFX9-LABEL: undef_hi_op_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_movk_i32 s6, 0x63 +; GFX9-NEXT: v_pk_add_u16 v0, v0, s6 op_sel_hi:[1,0] +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v0 +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_hi_op_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u16_e32 v0, 0x63, v0 +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v0 +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.hi = insertelement <2 x i16> undef, i16 %arg0, i32 0 + %op = add <2 x i16> %undef.hi, + call void asm sideeffect "; use $0", "v"(<2 x i16> %op); + ret void +} + +define void @undef_hi3_v4i16(i16 %arg0) { +; GFX9-LABEL: undef_hi3_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v[0:1] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_hi3_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.hi = insertelement <4 x i16> undef, i16 %arg0, i32 0 + call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi); + ret void +} + +define void @undef_hi3_v4f16(half %arg0) { +; GFX9-LABEL: undef_hi3_v4f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v[0:1] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_hi3_v4f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.hi = insertelement <4 x half> undef, half %arg0, i32 0 + call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi); + ret void +} + +define void @undef_hi2_v4i16(<2 x i16> %arg0) { +; GFX9-LABEL: undef_hi2_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v[0:1] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_hi2_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> + call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi); + ret void +} + +define void @undef_hi2_v4f16(<2 x half> %arg0) { +; GFX9-LABEL: undef_hi2_v4f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use v[0:1] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: undef_hi2_v4f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] + %undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> + call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi); + ret void +} + Index: test/CodeGen/AMDGPU/call-argument-types.ll =================================================================== --- test/CodeGen/AMDGPU/call-argument-types.ll +++ test/CodeGen/AMDGPU/call-argument-types.ll @@ -402,9 +402,9 @@ ; FIXME: materialize constant directly in VGPR ; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm: ; GFX9-DAG: s_mov_b32 [[K01:s[0-9]+]], 0x20001 -; GFX9-DAG: s_pack_ll_b32_b16 [[K23:s[0-9]+]], 3, s{{[0-9]+}} +; GFX9-DAG: s_mov_b32 [[K2:s[0-9]+]], 3 ; GFX9: v_mov_b32_e32 v0, [[K01]] -; GFX9: v_mov_b32_e32 v1, [[K23]] +; GFX9: v_mov_b32_e32 v1, [[K2]] ; GFX9: s_swappc_b64 define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { call void @external_void_func_v3i16(<3 x i16> ) Index: test/CodeGen/AMDGPU/mad-mix-hi.ll =================================================================== --- test/CodeGen/AMDGPU/mad-mix-hi.ll +++ test/CodeGen/AMDGPU/mad-mix-hi.ll @@ -83,8 +83,10 @@ } ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: -; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} -; GFX9: v_cvt_f16_f32_e32 v0, v0 +; GCN: s_waitcnt +; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} +; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX9-NEXT: s_setpc_b64 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 { %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float