Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -7941,7 +7941,8 @@ // elements. This exposes more load reduction opportunities by replacing // multiple small extract_vector_elements with a single 32-bit extract. auto *Idx = dyn_cast(N->getOperand(1)); - if (EltSize <= 16 && + if (isa(Vec) && + EltSize <= 16 && EltVT.isByteSized() && VecSize > 32 && VecSize % 32 == 0 && Index: test/CodeGen/AMDGPU/fexp.ll =================================================================== --- test/CodeGen/AMDGPU/fexp.ll +++ test/CodeGen/AMDGPU/fexp.ll @@ -224,37 +224,23 @@ ; SI-LABEL: v_exp_v4f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; SI-NEXT: v_mov_b32_e32 v4, 0x3fb8aa3b -; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-NEXT: v_mul_f32_e32 v3, v3, v4 -; SI-NEXT: v_mul_f32_e32 v2, v2, v4 -; SI-NEXT: v_mul_f32_e32 v1, v1, v4 -; SI-NEXT: v_mul_f32_e32 v0, v0, v4 -; SI-NEXT: v_exp_f32_e32 v3, v3 -; SI-NEXT: v_exp_f32_e32 v2, v2 -; SI-NEXT: v_exp_f32_e32 v1, v1 -; SI-NEXT: v_exp_f32_e32 v0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_mov_b32_e32 v4, 0x3fb8aa3b ; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, v0, v4 +; SI-NEXT: v_mul_f32_e32 v1, v1, v4 +; SI-NEXT: v_mul_f32_e32 v2, v2, v4 +; SI-NEXT: v_mul_f32_e32 v3, v3, v4 +; SI-NEXT: v_exp_f32_e32 v0, v0 +; SI-NEXT: v_exp_f32_e32 v1, v1 +; SI-NEXT: v_exp_f32_e32 v2, v2 +; SI-NEXT: v_exp_f32_e32 v3, v3 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_exp_v4f16: Index: test/CodeGen/AMDGPU/fmax_legacy.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fmax_legacy.f16.ll +++ test/CodeGen/AMDGPU/fmax_legacy.f16.ll @@ -290,79 +290,51 @@ ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: ; SI-SAFE: ; %bb.0: ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v7, v3 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v6, v2 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v5, v1 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v4, v0 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-SAFE-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-SAFE-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v7, v3 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31] ; ; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: ; SI-NNAN: ; %bb.0: ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v7 -; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v6 -; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v5 -; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v4 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NNAN-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-NNAN-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v4 +; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v5 +; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v6 +; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v7 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ugt <4 x half> %a, %b %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b @@ -474,147 +446,91 @@ ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: ; SI-SAFE: ; %bb.0: ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v7, v15, v7 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v6, v14, v6 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v5, v13, v5 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v4, v12, v4 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v11, v3 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v10, v2 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v9, v1 -; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v8, v0 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-SAFE-NEXT: v_or_b32_e32 v7, v6, v7 -; SI-SAFE-NEXT: v_or_b32_e32 v5, v4, v5 -; SI-SAFE-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-SAFE-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v5 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v7 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v7 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v8, v0 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v9, v1 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v10, v2 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v11, v3 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v4, v12, v4 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v5, v13, v5 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v6, v14, v6 +; SI-SAFE-NEXT: v_max_legacy_f32_e32 v7, v15, v7 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31] ; ; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: ; SI-NNAN: ; %bb.0: ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 -; SI-NNAN-NEXT: v_max_f32_e32 v7, v7, v15 -; SI-NNAN-NEXT: v_max_f32_e32 v6, v6, v14 -; SI-NNAN-NEXT: v_max_f32_e32 v5, v5, v13 -; SI-NNAN-NEXT: v_max_f32_e32 v4, v4, v12 -; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v11 -; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v10 -; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v9 -; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v8 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NNAN-NEXT: v_or_b32_e32 v7, v6, v7 -; SI-NNAN-NEXT: v_or_b32_e32 v5, v4, v5 -; SI-NNAN-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-NNAN-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v5 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v7 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v7, 16, v7 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v8 +; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v9 +; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v10 +; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v11 +; SI-NNAN-NEXT: v_max_f32_e32 v4, v4, v12 +; SI-NNAN-NEXT: v_max_f32_e32 v5, v5, v13 +; SI-NNAN-NEXT: v_max_f32_e32 v6, v6, v14 +; SI-NNAN-NEXT: v_max_f32_e32 v7, v7, v15 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ugt <8 x half> %a, %b %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b Index: test/CodeGen/AMDGPU/fmin_legacy.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fmin_legacy.f16.ll +++ test/CodeGen/AMDGPU/fmin_legacy.f16.ll @@ -291,79 +291,51 @@ ; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16: ; SI-SAFE: ; %bb.0: ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v7, v3 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v6, v2 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v5, v1 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v4, v0 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-SAFE-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-SAFE-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v7, v3 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31] ; ; SI-NNAN-LABEL: test_fmin_legacy_ule_v4f16: ; SI-NNAN: ; %bb.0: ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v7 -; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v6 -; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v5 -; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v4 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NNAN-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-NNAN-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v4 +; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v5 +; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v6 +; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v7 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ule <4 x half> %a, %b %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b @@ -475,147 +447,91 @@ ; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16: ; SI-SAFE: ; %bb.0: ; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v7, v15, v7 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v6, v14, v6 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v5, v13, v5 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v4, v12, v4 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v11, v3 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v10, v2 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v9, v1 -; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v8, v0 -; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-SAFE-NEXT: v_or_b32_e32 v7, v6, v7 -; SI-SAFE-NEXT: v_or_b32_e32 v5, v4, v5 -; SI-SAFE-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-SAFE-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v5 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v7 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; SI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v7 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, v8, v0 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v1, v9, v1 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v2, v10, v2 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v3, v11, v3 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v4, v12, v4 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v5, v13, v5 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v6, v14, v6 +; SI-SAFE-NEXT: v_min_legacy_f32_e32 v7, v15, v7 ; SI-SAFE-NEXT: s_setpc_b64 s[30:31] ; ; SI-NNAN-LABEL: test_fmin_legacy_ule_v8f16: ; SI-NNAN: ; %bb.0: ; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 -; SI-NNAN-NEXT: v_min_f32_e32 v7, v7, v15 -; SI-NNAN-NEXT: v_min_f32_e32 v6, v6, v14 -; SI-NNAN-NEXT: v_min_f32_e32 v5, v5, v13 -; SI-NNAN-NEXT: v_min_f32_e32 v4, v4, v12 -; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v11 -; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v10 -; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v9 -; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v8 -; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 ; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; SI-NNAN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NNAN-NEXT: v_or_b32_e32 v7, v6, v7 -; SI-NNAN-NEXT: v_or_b32_e32 v5, v4, v5 -; SI-NNAN-NEXT: v_or_b32_e32 v3, v2, v3 -; SI-NNAN-NEXT: v_or_b32_e32 v1, v0, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v5 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v7 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; SI-NNAN-NEXT: v_lshrrev_b32_e32 v7, 16, v7 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 -; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 ; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NNAN-NEXT: v_min_f32_e32 v0, v0, v8 +; SI-NNAN-NEXT: v_min_f32_e32 v1, v1, v9 +; SI-NNAN-NEXT: v_min_f32_e32 v2, v2, v10 +; SI-NNAN-NEXT: v_min_f32_e32 v3, v3, v11 +; SI-NNAN-NEXT: v_min_f32_e32 v4, v4, v12 +; SI-NNAN-NEXT: v_min_f32_e32 v5, v5, v13 +; SI-NNAN-NEXT: v_min_f32_e32 v6, v6, v14 +; SI-NNAN-NEXT: v_min_f32_e32 v7, v7, v15 ; SI-NNAN-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ule <8 x half> %a, %b %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b Index: test/CodeGen/AMDGPU/reduction.ll =================================================================== --- test/CodeGen/AMDGPU/reduction.ll +++ test/CodeGen/AMDGPU/reduction.ll @@ -47,8 +47,8 @@ ; VI: s_waitcnt ; VI-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-NEXT: v_sub_f16_e32 v0, v0, v1 -; VI-NEXT: v_sub_f16_e32 v0, v2, v0 +; VI-NEXT: v_sub_f16_e32 v0, v1, v0 +; VI-NEXT: v_add_f16_e32 v0, v2, v0 ; VI-NEXT: s_setpc_b64 define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) { entry: