Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -238,6 +238,14 @@ setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX); setSchedulingPreference(Sched::RegPressure); + + // FIXME: This is only partially true. If we have to do vector compares, any + // SGPR pair can be a condition register. If we have a uniform condition, we + // are better off doing SALU operations, where there is only one SCC. For now, + // we don't have a way of knowing during instruction selection if a condition + // will be uniform and we always use vector compares. Assume we are using + // vector compares until that is fixed. + setHasMultipleConditionRegisters(true); } //===----------------------------------------------------------------------===// Index: test/CodeGen/R600/select-opt.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/select-opt.ll @@ -0,0 +1,131 @@ +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI %s + +; Make sure to test with f32 and i32 compares. If we have to use float +; compares, we always have multiple condition registers. If we can do +; scalar compares, we don't want to use multiple condition registers. + +; FUNC-LABEL: {{^}}opt_select_and_i32_cmp_i32: +; SI-DAG: v_cmp_ne_i32_e32 vcc, +; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[AND]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @opt_select_i32_and_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) nounwind { + %icmp0 = icmp ne i32 %a, %b + %icmp1 = icmp ne i32 %a, %c + %and = and i1 %icmp0, %icmp1 + %select = select i1 %and, i32 %x, i32 %y + store i32 %select, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}opt_select_and_i32_cmp_f32: +; SI-DAG: v_cmp_lg_f32_e32 vcc +; SI-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[AND]] +; SI-NEXT: buffer_store_dword [[RESULT]] +define void @opt_select_i32_and_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) nounwind { + %fcmp0 = fcmp one float %a, %b + %fcmp1 = fcmp one float %a, %c + %and = and i1 %fcmp0, %fcmp1 + %select = select i1 %and, i32 %x, i32 %y + store i32 %select, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}opt_select_and_i64_cmp_i32: +; SI-DAG: v_cmp_ne_i32_e32 vcc, +; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] +; SI: v_cndmask_b32_e64 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[AND]] +; SI-NEXT: v_cndmask_b32_e64 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[AND]] +; SI-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}} +define void @opt_select_i64_and_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) nounwind { + %icmp0 = icmp ne i32 %a, %b + %icmp1 = icmp ne i32 %a, %c + %and = and i1 %icmp0, %icmp1 + %select = select i1 %and, i64 %x, i64 %y + store i64 %select, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}opt_select_and_i64_cmp_f32: +; SI-DAG: v_cmp_lg_f32_e32 vcc, +; SI-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] +; SI: v_cndmask_b32_e64 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[AND]] +; SI-NEXT: v_cndmask_b32_e64 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[AND]] +; SI-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}} +define void @opt_select_i64_and_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) nounwind { + %fcmp0 = fcmp one float %a, %b + %fcmp1 = fcmp one float %a, %c + %and = and i1 %fcmp0, %fcmp1 + %select = select i1 %and, i64 %x, i64 %y + store i64 %select, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}opt_select_or_i32_cmp_i32: +; SI-DAG: v_cmp_ne_i32_e32 vcc, +; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[OR]] +; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: s_endpgm +define void @opt_select_i32_or_cmp_i32(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) nounwind { + %icmp0 = icmp ne i32 %a, %b + %icmp1 = icmp ne i32 %a, %c + %or = or i1 %icmp0, %icmp1 + %select = select i1 %or, i32 %x, i32 %y + store i32 %select, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}opt_select_or_i32_cmp_f32: +; SI-DAG: v_cmp_lg_f32_e32 vcc +; SI-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[OR]] +; SI-NEXT: buffer_store_dword [[RESULT]] +define void @opt_select_i32_or_cmp_f32(i32 addrspace(1)* %out, float %a, float %b, float %c, i32 %x, i32 %y) nounwind { + %fcmp0 = fcmp one float %a, %b + %fcmp1 = fcmp one float %a, %c + %or = or i1 %fcmp0, %fcmp1 + %select = select i1 %or, i32 %x, i32 %y + store i32 %select, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}opt_select_or_i64_cmp_i32: +; SI-DAG: v_cmp_ne_i32_e32 vcc, +; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] +; SI: v_cndmask_b32_e64 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[OR]] +; SI-NEXT: v_cndmask_b32_e64 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[OR]] +; SI-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}} +define void @opt_select_i64_or_cmp_i32(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) nounwind { + %icmp0 = icmp ne i32 %a, %b + %icmp1 = icmp ne i32 %a, %c + %or = or i1 %icmp0, %icmp1 + %select = select i1 %or, i64 %x, i64 %y + store i64 %select, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}opt_select_or_i64_cmp_f32: +; SI-DAG: v_cmp_lg_f32_e32 vcc, +; SI-DAG: v_cmp_lg_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP1]] +; SI: v_cndmask_b32_e64 v[[RESULT1:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[OR]] +; SI-NEXT: v_cndmask_b32_e64 v[[RESULT0:[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[OR]] +; SI-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT0]]:[[RESULT1]]{{\]}} +define void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, float %a, float %b, float %c, i64 %x, i64 %y) nounwind { + %fcmp0 = fcmp one float %a, %b + %fcmp1 = fcmp one float %a, %c + %or = or i1 %fcmp0, %fcmp1 + %select = select i1 %or, i64 %x, i64 %y + store i64 %select, i64 addrspace(1)* %out + ret void +}