Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -560,6 +560,18 @@ getOperationAction(Op, VT) == Custom); } + /// Return true if the specified operation is legal on this target or legal on + /// the scalar type for vectors. + bool isOperationLegalOrCustomScalar(unsigned Op, EVT VT) const { + if (isOperationLegalOrCustom(Op, VT)) + return true; + + if (!VT.isVector()) + return false; + + return isOperationLegalOrCustom(Op, VT.getScalarType()); + } + /// Return true if the specified operation is legal on this target or can be /// made legal using promotion. This is used to help guide high-level lowering /// decisions. Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1984,6 +1984,10 @@ case ISD::FMAXNUM: case ISD::FMINNAN: case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: Res = WidenVecRes_Binary(N); break; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2471,7 +2471,7 @@ case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; case SPNB_RETURNS_ANY: - Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ? ISD::FMINNUM + Opc = TLI.isOperationLegalOrCustomScalar(ISD::FMINNUM, VT) ? ISD::FMINNUM : ISD::FMINNAN; break; } @@ -2482,7 +2482,7 @@ case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; case SPNB_RETURNS_ANY: - Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ? ISD::FMAXNUM + Opc = TLI.isOperationLegalOrCustomScalar(ISD::FMAXNUM, VT) ? ISD::FMAXNUM : ISD::FMAXNAN; break; } @@ -2490,10 +2490,11 @@ default: break; } - if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && - // If the underlying comparison instruction is used by any other instruction, - // the consumed instructions won't be destroyed, so it is not profitable - // to convert to a min/max. + if (Opc != ISD::DELETED_NODE && + TLI.isOperationLegalOrCustomScalar(Opc, VT) && + // If the underlying comparison instruction is used by any other + // instruction, the consumed instructions won't be destroyed, so it is + // not profitable to convert to a min/max. cast(&I)->getCondition()->hasOneUse()) { OpCode = Opc; LHSVal = getValue(LHS); Index: test/CodeGen/AMDGPU/fmin-fmax-unsafe.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/fmin-fmax-unsafe.ll @@ -0,0 +1,196 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}unsafe_fmin_f32: +; GCN: v_min_f32_e32 +define void @unsafe_fmin_f32(float addrspace(1)* %out, float %a, float %b) { + %cmp = fcmp fast olt float %a, %b + %cond = select i1 %cmp, float %a, float %b + store float %cond, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_f32: +; GCN: v_max_f32_e32 +define void @unsafe_fmax_f32(float addrspace(1)* %out, float %a, float %b) { + %cmp = fcmp fast ogt float %a, %b + %cond = select i1 %cmp, float %a, float %b + store float %cond, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmin_v1f32: +; GCN: v_min_f32_e32 +define void @unsafe_fmin_v1f32(<1 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) { + %cmp = fcmp fast olt <1 x float> %a, %b + %cond = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b + store <1 x float> %cond, <1 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_v1f32: +;; GCN: v_max_f32 +define void @unsafe_fmax_v1f32(<1 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) { + %cmp = fcmp fast ogt <1 x float> %a, %b + %cond = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b + store <1 x float> %cond, <1 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmin_v2f32: +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +define void @unsafe_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { + %cmp = fcmp fast olt <2 x float> %a, %b + %cond = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + store <2 x float> %cond, <2 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_v2f32: +; GCN: v_max_f32 +; GCN: v_max_f32 +define void @unsafe_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { + %cmp = fcmp fast ogt <2 x float> %a, %b + %cond = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + store <2 x float> %cond, <2 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmin_v3f32: +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN-NOT: v_min_f32 +; GCN: s_endpgm +define void @unsafe_fmin_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, <3 x float> %b) { + %cmp = fcmp fast olt <3 x float> %a, %b + %cond = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b + store <3 x float> %cond, <3 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_v3f32: +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN-NOT: v_max_f32 +; GCN: s_endpgm +define void @unsafe_fmax_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, <3 x float> %b) { + %cmp = fcmp fast ogt <3 x float> %a, %b + %cond = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b + store <3 x float> %cond, <3 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmin_v4f32: +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +define void @unsafe_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) { + %cmp = fcmp fast olt <4 x float> %a, %b + %cond = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b + store <4 x float> %cond, <4 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_v4f32: +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +define void @unsafe_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) { + %cmp = fcmp fast ogt <4 x float> %a, %b + %cond = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b + store <4 x float> %cond, <4 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmin_v8f32: +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +; GCN: v_min_f32_e32 +define void @unsafe_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) { + %cmp = fcmp fast olt <8 x float> %a, %b + %cond = select <8 x i1> %cmp, <8 x float> %a, <8 x float> %b + store <8 x float> %cond, <8 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_v8f32: +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +; GCN: v_max_f32 +define void @unsafe_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) { + %cmp = fcmp fast ogt <8 x float> %a, %b + %cond = select <8 x i1> %cmp, <8 x float> %a, <8 x float> %b + store <8 x float> %cond, <8 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmin_f64: +; GCN: v_min_f64 +define void @unsafe_fmin_f64(double addrspace(1)* %out, double %a, double %b) { + %cmp = fcmp fast olt double %a, %b + %cond = select i1 %cmp, double %a, double %b + store double %cond, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_f64: +; GCN: v_max_f64 +define void @unsafe_fmax_f64(double addrspace(1)* %out, double %a, double %b) { + %cmp = fcmp fast ogt double %a, %b + %cond = select i1 %cmp, double %a, double %b + store double %cond, double addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmin_v1f64: +; GCN: v_min_f64 +define void @unsafe_fmin_v1f64(<1 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) { + %cmp = fcmp fast olt <1 x double> %a, %b + %cond = select <1 x i1> %cmp, <1 x double> %a, <1 x double> %b + store <1 x double> %cond, <1 x double> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_v1f64: +;; GCN: v_max_f64 +define void @unsafe_fmax_v1f64(<1 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) { + %cmp = fcmp fast ogt <1 x double> %a, %b + %cond = select <1 x i1> %cmp, <1 x double> %a, <1 x double> %b + store <1 x double> %cond, <1 x double> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmin_v2f64: +; GCN: v_min_f64 +; GCN: v_min_f64 +define void @unsafe_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) { + %cmp = fcmp fast olt <2 x double> %a, %b + %cond = select <2 x i1> %cmp, <2 x double> %a, <2 x double> %b + store <2 x double> %cond, <2 x double> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}unsafe_fmax_v2f64: +; GCN: v_max_f64 +; GCN: v_max_f64 +define void @unsafe_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) { + %cmp = fcmp fast ogt <2 x double> %a, %b + %cond = select <2 x i1> %cmp, <2 x double> %a, <2 x double> %b + store <2 x double> %cond, <2 x double> addrspace(1)* %out + ret void +} \ No newline at end of file Index: test/CodeGen/AMDGPU/max.ll =================================================================== --- test/CodeGen/AMDGPU/max.ll +++ test/CodeGen/AMDGPU/max.ll @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone -; FUNC-LABEL: @v_test_imax_sge_i32 +; FUNC-LABEL: {{^}}v_test_imax_sge_i32: ; SI: v_max_i32_e32 define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -17,6 +17,24 @@ ret void } +; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32: +; SI: v_max_i32_e32 +; SI: v_max_i32_e32 +; SI: v_max_i32_e32 +; SI: v_max_i32_e32 +define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid + %a = load <4 x i32>, <4 x i32> addrspace(1)* %gep0, align 4 + %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep1, align 4 + %cmp = icmp sge <4 x i32> %a, %b + %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b + store <4 x i32> %val, <4 x i32> addrspace(1)* %outgep, align 4 + ret void +} + ; FUNC-LABEL: @s_test_imax_sge_i32 ; SI: s_max_i32 define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -44,6 +62,15 @@ ret void } +; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32: +; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 +; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 +define void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { + %cmp = icmp sgt <2 x i32> %a, + %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> + store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 + ret void +} ; FUNC-LABEL: @v_test_imax_sgt_i32 ; SI: v_max_i32_e32 define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -92,6 +119,19 @@ ret void } +; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32: +; SI: s_max_u32 +; SI: s_max_u32 +; SI: s_max_u32 +; SI-NOT: s_max_u32 +; SI: s_endpgm +define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind { + %cmp = icmp uge <3 x i32> %a, %b + %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b + store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: @v_test_umax_ugt_i32 ; SI: v_max_u32_e32 define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -107,7 +147,7 @@ ret void } -; FUNC-LABEL: @s_test_umax_ugt_i32 +; FUNC-LABEL: {{^}}s_test_umax_ugt_i32: ; SI: s_max_u32 define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp ugt i32 %a, %b @@ -116,6 +156,16 @@ ret void } +; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32: +; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15 +; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23 +define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { + %cmp = icmp ugt <2 x i32> %a, + %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> + store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 + ret void +} + ; Make sure redundant and removed ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16: ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb Index: test/CodeGen/AMDGPU/min.ll =================================================================== --- test/CodeGen/AMDGPU/min.ll +++ test/CodeGen/AMDGPU/min.ll @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone -; FUNC-LABEL: @v_test_imin_sle_i32 +; FUNC-LABEL: {{^}}v_test_imin_sle_i32: ; SI: v_min_i32_e32 define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -17,7 +17,7 @@ ret void } -; FUNC-LABEL: @s_test_imin_sle_i32 +; FUNC-LABEL: {{^}}s_test_imin_sle_i32: ; SI: s_min_i32 define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp sle i32 %a, %b @@ -26,6 +26,18 @@ ret void } +; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32: +; SI: s_min_i32 +; SI: s_min_i32 +; SI: s_min_i32 +; SI: s_min_i32 +define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { + %cmp = icmp sle <4 x i32> %a, %b + %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b + store <4 x i32> %val, <4 x i32> addrspace(1)* %out + ret void +} + ; FUNC-LABEL: @v_test_imin_slt_i32 ; SI: v_min_i32_e32 define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -50,6 +62,16 @@ ret void } +; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32: +; SI: s_min_i32 +; SI: s_min_i32 +define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { + %cmp = icmp slt <2 x i32> %a, %b + %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b + store <2 x i32> %val, <2 x i32> addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32: ; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { @@ -83,6 +105,24 @@ ret void } +; FUNC-LABEL: @v_test_umin_ule_v3i32 +; SI: v_min_u32_e32 +; SI: v_min_u32_e32 +; SI: v_min_u32_e32 +; SI-NOT: v_min_u32_e32 +; SI: s_endpgm +define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %aptr, <3 x i32> addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid + %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep0 + %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep1 + %cmp = icmp ule <3 x i32> %a, %b + %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b + store <3 x i32> %val, <3 x i32> addrspace(1)* %outgep + ret void +} ; FUNC-LABEL: @s_test_umin_ule_i32 ; SI: s_min_u32 define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {