Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4101,7 +4101,7 @@ if (Depth >= MaxRecursionDepth) return false; // Limit search depth. - // TODO: Handle vectors. + // Vector constants are handled by checking operands of BUILD_VECTOR bellow. // If the value is a constant, we can obviously see if it is a NaN or not. if (const ConstantFPSDNode *C = dyn_cast(Op)) { return !C->getValueAPF().isNaN() || @@ -4199,6 +4199,12 @@ case ISD::EXTRACT_VECTOR_ELT: { return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); } + case ISD::BUILD_VECTOR: { + for (const SDValue &Opnd : Op->ops()) + if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1)) + return false; + return true; + } default: if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || Index: llvm/test/CodeGen/AMDGPU/clamp-modifier.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/clamp-modifier.ll +++ llvm/test/CodeGen/AMDGPU/clamp-modifier.ll @@ -236,9 +236,8 @@ ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg_lo: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] -; GFX9: v_pk_add_f16 [[A]], [[A]], 1.0 op_sel_hi:[1,0]{{$}} -; GFX9: v_pk_max_f16 v1, v1, 0 neg_lo:[1,0] -; GFX9: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0] +; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}} +; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_lo:[1,1] clamp{{$}} define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid @@ -257,8 +256,7 @@ ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm_neg_hi: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}} -; GFX9: v_pk_max_f16 v1, v1, 0 neg_hi:[1,0] -; GFX9: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0] +; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_hi:[1,1] clamp{{$}} define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid Index: llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -146,15 +146,12 @@ ; FIXME: Should be packed into 2 registers per argument? ; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt: ; GCN: s_waitcnt -; GFX9-DAG: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX9-DAG: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX9-DAG: v_and_b32_e32 v1, 0xffff, v1 -; GFX9-DAG: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX9-DAG: v_pk_max_f16 v1, v1, 0 -; GFX9-DAG: v_pk_max_f16 v0, v6, 0 -; GFX9-DAG: v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0] -; GFX9-DAG: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0] -; GFX9: s_setpc_b64 +; GFX9-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GFX9-DAG: v_mad_mixhi_f16 v{{[0-9]+}}, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX9-DAG: v_mad_mixlo_f16 v{{[0-9]+}}, v1, v3, v5 op_sel_hi:[1,1,1] +; GFX9-DAG: v_pk_max_f16 v1, v1, v1 clamp +; GFX9: v_mov_b32_e32 v0, v{{[0-9]+}} +; GFX9-NEXT: s_setpc_b64 define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { %src0.ext = fpext <3 x half> %src0 to <3 x float> %src1.ext = fpext <3 x half> %src1 to <3 x float> @@ -168,14 +165,12 @@ ; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_postcvt: ; GCN: s_waitcnt -; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX9-NEXT: v_pk_max_f16 v1, v7, 0 -; GFX9-NEXT: v_pk_max_f16 v0, v6, 0 -; GFX9-NEXT: v_pk_min_f16 v0, v0, 1.0 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_min_f16 v1, v1, 1.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX9-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GFX9-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX9-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-NEXT: s_setpc_b64 define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { %src0.ext = fpext <4 x half> %src0 to <4 x float>