Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -493,6 +493,8 @@ case ISD::FMUL: case ISD::FMA: case ISD::FMAD: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::FSIN: case ISD::FTRUNC: case ISD::FRINT: @@ -2984,6 +2986,22 @@ DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; } + case ISD::FMAXNUM: + case ISD::FMINNUM: { + // fneg (fmaxnum x, y) -> fmaxnum (fneg x), (fneg y) + // fneg (fminnum x, y) -> fminnum (fneg x), (fneg y) + SDValue LHS = N0.getOperand(0); + SDValue RHS = N0.getOperand(1); + + SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); + SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + unsigned Opposite = (Opc == ISD::FMAXNUM) ? ISD::FMINNUM : ISD::FMAXNUM; + + SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags()); + if (!N0.hasOneUse()) + DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); + return Res; + } case ISD::FP_EXTEND: case ISD::FTRUNC: case ISD::FRINT: Index: test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-combines.ll +++ test/CodeGen/AMDGPU/fneg-combines.ll @@ -353,6 +353,194 @@ } ; -------------------------------------------------------------------------------- +; fminnum tests +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}v_fneg_minnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]] +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %b = load volatile float, float addrspace(1)* %b.gep + %min = call float @llvm.minnum.f32(float %a, float %b) + %fneg = fsub float -0.000000e+00, %min + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_self_minnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]] +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_self_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %min = call float @llvm.minnum.f32(float %a, float %a) + %min.fneg = fsub float -0.0, %min + store float %min.fneg, float addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0 +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_posk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %min = call float @llvm.minnum.f32(float 4.0, float %a) + %fneg = fsub float -0.000000e+00, %min + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0 +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_negk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %min = call float @llvm.minnum.f32(float -4.0, float %a) + %fneg = fsub float -0.000000e+00, %min + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: v_max_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]] +; GCN-NEXT: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], 4.0 +; GCN-NEXT: buffer_store_dword [[MAX0]] +; GCN-NEXT: buffer_store_dword [[MUL1]] +define void @v_fneg_minnum_multi_use_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %b = load volatile float, float addrspace(1)* %b.gep + %min = call float @llvm.minnum.f32(float %a, float %b) + %fneg = fsub float -0.000000e+00, %min + %use1 = fmul float %min, 4.0 + store volatile float %fneg, float addrspace(1)* %out + store volatile float %use1, float addrspace(1)* %out + ret void +} + +; -------------------------------------------------------------------------------- +; fmaxnum tests +; -------------------------------------------------------------------------------- + +; GCN-LABEL: {{^}}v_fneg_maxnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]] +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %b = load volatile float, float addrspace(1)* %b.gep + %min = call float @llvm.maxnum.f32(float %a, float %b) + %fneg = fsub float -0.000000e+00, %min + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]] +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_self_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %min = call float @llvm.maxnum.f32(float %a, float %a) + %min.fneg = fsub float -0.0, %min + store float %min.fneg, float addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0 +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_posk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %min = call float @llvm.maxnum.f32(float 4.0, float %a) + %fneg = fsub float -0.000000e+00, %min + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0 +; GCN: buffer_store_dword [[RESULT]] +define void @v_fneg_negk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %min = call float @llvm.maxnum.f32(float -4.0, float %a) + %fneg = fsub float -0.000000e+00, %min + store float %fneg, float addrspace(1)* %out.gep + ret void +} + +; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32: +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: v_min_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]] +; GCN-NEXT: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], 4.0 +; GCN-NEXT: buffer_store_dword [[MAX0]] +; GCN-NEXT: buffer_store_dword [[MUL1]] +define void @v_fneg_maxnum_multi_use_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid.ext = sext i32 %tid to i64 + %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext + %a = load volatile float, float addrspace(1)* %a.gep + %b = load volatile float, float addrspace(1)* %b.gep + %min = call float @llvm.maxnum.f32(float %a, float %b) + %fneg = fsub float -0.000000e+00, %min + %use1 = fmul float %min, 4.0 + store volatile float %fneg, float addrspace(1)* %out + store volatile float %use1, float addrspace(1)* %out + ret void +} + +; -------------------------------------------------------------------------------- ; fma tests ; -------------------------------------------------------------------------------- @@ -1736,6 +1924,8 @@ declare float @llvm.round.f32(float) #1 declare float @llvm.rint.f32(float) #1 declare float @llvm.nearbyint.f32(float) #1 +declare float @llvm.minnum.f32(float, float) #1 +declare float @llvm.maxnum.f32(float, float) #1 declare double @llvm.fma.f64(double, double, double) #1