diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9622,16 +9622,7 @@ // sub x, zext (setcc) => subcarry x, 0, setcc // sub x, sext (setcc) => addcarry x, 0, setcc - - bool Commuted = false; - unsigned Opc = LHS.getOpcode(); - if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND || - Opc == ISD::ANY_EXTEND) { - std::swap(RHS, LHS); - Commuted = true; - } - - Opc = RHS.getOpcode(); + unsigned Opc = RHS.getOpcode(); switch (Opc) { default: break; case ISD::ZERO_EXTEND: @@ -9643,22 +9634,8 @@ if (!isBoolSGPR(Cond)) break; SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1); - SDValue Zero = DAG.getConstant(0, SL, MVT::i32); - SDValue Args[3]; - Args[2] = Cond; - - if (Commuted) { - // sub zext (setcc), x => addcarry 0, x, setcc - // sub sext (setcc), x => subcarry 0, x, setcc - Args[0] = Zero; - Args[1] = LHS; - Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY; - } else { - Args[0] = LHS; - Args[1] = Zero; - Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY; - } - + SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond }; + Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY; return DAG.getNode(Opc, SL, VTList, Args); } } diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll --- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -130,14 +130,18 @@ ; GCN-LABEL: {{^}}sub_sube_commuted: ; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} ; GCN-DAG: buffer_load_dword [[V:v[0-9]+]], -; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] -; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADDC]] +; GCN: v_cndmask_b32_e64 [[CCZEXT:v[0-9]+]], 0, 1, [[CC]] +; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, [[CCZEXT]], v4 +; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, {{.*}}, [[SUB]] +; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADD]] ; GFX9-LABEL: {{^}}sub_sube_commuted: ; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} ; GFX9-DAG: global_load_dword [[V:v[0-9]+]], -; GFX9: v_addc_co_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] -; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[ADDC]] +; GFX9-DAG: v_cndmask_b32_e64 [[CCZEXT:v[0-9]+]], 0, 1, [[CC]] +; GFX9: v_sub_u32_e32 {{.*}}, [[CCZEXT]] +; GFX9: v_add_u32_e32 +; GFX9: v_add_u32_e32 {{.*}}, 0x64, define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -312,9 +316,10 @@ ; Check case where sub is commuted with zext ; GCN-LABEL: {{^}}sub_zext_setcc_commute: ; GCN: v_cmp_gt_u32_e32 vcc, v -; GCN-NOT: vcc -; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc -; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADDC]] +; GCN: v_cndmask +; GCN: v_sub_i32_e32 +; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, +; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]] define amdgpu_kernel void @sub_zext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -333,9 +338,9 @@ ; Check case where sub is commuted with sext ; GCN-LABEL: {{^}}sub_sext_setcc_commute: ; GCN: v_cmp_gt_u32_e32 vcc, v -; GCN-NOT: vcc -; GCN: v_subb_u32_e32 [[SUBB:v[0-9]+]], vcc, 0, v{{[0-9]+}}, vcc -; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]] +; GCN: v_cndmask +; GCN: v_sub_i32_e32 +; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, ; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]] define amdgpu_kernel void @sub_sext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) { bb: diff --git a/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll b/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sub-zext-cc-zext-cc.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs <%s | FileCheck -check-prefixes=GCN %s +; +; This test checks that we have the correct fold for zext(cc1) - zext(cc2). +; +; GCN-LABEL: sub_zext_zext: +; GCN: ds_read_b32 [[VAL:v[0-9]+]], +; GCN-DAG: v_cmp_lt_f32{{.*}} [[CC1:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]] +; GCN-DAG: v_cmp_gt_f32{{.*}} vcc, 0, [[VAL]] +; GCN: v_cndmask_{{.*}} [[ZEXTCC1:v[0-9]+]], 0, 1, [[CC1]] +; GCN: v_subbrev{{.*}} {{v[0-9]+}}, vcc, 0, [[ZEXTCC1]], vcc +; +; Before the reversion that this test is attached to, the compiler commuted +; the operands to the sub and used different logic to select the addc/subc +; instruction: +; sub zext (setcc), x => addcarry 0, x, setcc +; sub sext (setcc), x => subcarry 0, x, setcc +; +; ... but that is bogus. I believe it is not possible to fold those commuted +; patterns into any form of addcarry or subcarry. + +define amdgpu_cs float @sub_zext_zext() { +.entry: + + %t519 = load float, float addrspace(3)* null + + %t524 = fcmp ogt float %t519, 0.000000e+00 + %t525 = fcmp olt float %t519, 0.000000e+00 + %t526 = zext i1 %t524 to i32 + %t527 = zext i1 %t525 to i32 + %t528 = sub nsw i32 %t526, %t527 + %t529 = sitofp i32 %t528 to float + ret float %t529 +} +