diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -485,19 +485,18 @@ [(set i32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>; } // End Uses = [SCC] - let isCommutable = 1 in { def S_MIN_I32 : SOP2_32 <"s_min_i32", - [(set i32:$sdst, (smin i32:$src0, i32:$src1))] + [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] >; def S_MIN_U32 : SOP2_32 <"s_min_u32", - [(set i32:$sdst, (umin i32:$src0, i32:$src1))] + [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] >; def S_MAX_I32 : SOP2_32 <"s_max_i32", - [(set i32:$sdst, (smax i32:$src0, i32:$src1))] + [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] >; def S_MAX_U32 : SOP2_32 <"s_max_u32", - [(set i32:$sdst, (umax i32:$src0, i32:$src1))] + [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] >; } // End isCommutable = 1 } // End Defs = [SCC] diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-min-max.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-min-max.ll new file mode 100755 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-min-max.ll @@ -0,0 +1,73 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: name: uniform_imin +; GCN: S_MIN_I32 +define amdgpu_kernel void @uniform_imin(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { + %cmp = icmp sle i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_imin +; GCN: V_MIN_I32_e64 +define void @divergent_imin(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { + %cmp = icmp sle i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: uniform_umin +; GCN: S_MIN_U32 +define amdgpu_kernel void @uniform_umin(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { + %tmp = icmp ule i32 %a, %b + %val = select i1 %tmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 8 + ret void +} + +; GCN-LABEL: name: divergent_umin +; GCN: V_MIN_U32_e64 +define void @divergent_umin(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { + %tmp = icmp ule i32 %a, %b + %val = select i1 %tmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 8 + ret void +} + +; GCN-LABEL: name: uniform_imax +; GCN: S_MAX_I32 +define amdgpu_kernel void @uniform_imax(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp sge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_imax +; GCN: V_MAX_I32_e64 +define void @divergent_imax(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp sge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: uniform_umax +; GCN: S_MAX_U32 +define amdgpu_kernel void @uniform_umax(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp uge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_umax +; GCN: V_MAX_U32_e64 +define void @divergent_umax(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp uge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +}