Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -8617,15 +8617,19 @@ SDValue RHS = N->getOperand(1); unsigned Opc = LHS.getOpcode(); - if (Opc != ISD::SUBCARRY) + bool NeedCommute = Opc != ISD::SUBCARRY; + if (NeedCommute) std::swap(RHS, LHS); if (LHS.getOpcode() == ISD::SUBCARRY) { // sub (subcarry x, 0, cc), y => subcarry x, y, cc + // sub y, (subcarry x, 0, cc) => subcarry y, x, cc auto C = dyn_cast(LHS.getOperand(1)); - if (!C || C->getZExtValue() != 0) + if (!C || !C->isNullValue()) return SDValue(); SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) }; + if (NeedCommute) + std::swap(Args[0], Args[1]); return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args); } return SDValue(); Index: test/CodeGen/AMDGPU/combine-cond-add-sub.ll =================================================================== --- test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -127,6 +127,28 @@ ret void } +; GCN-LABEL: {{^}}sub_sube2: +; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: _load_dword [[V:v[0-9]+]], +; GCN: {{v_subb_co_u32|v_subb_u32}}_e{{32|64}} [[SUB:v[0-9]+]], {{[^,]+}}, v{{[0-9]+}}, [[V]], [[CC]] +; GCN: v_add_{{[ui]}}32_e32 {{.*}}, 0x64, [[SUB]] +; GCN-NOT: v_cndmask +; GCN-NOT: v_sub +define amdgpu_kernel void @sub_sube2(i32 addrspace(1)* nocapture %arg, i32 %a) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %adde = add i32 %v, %ext + %sub = sub i32 %adde, %a + %sub2 = sub i32 100, %sub + store i32 %sub2, i32 addrspace(1)* %gep, align 4 + ret void +} + ; GCN-LABEL: {{^}}sube_sub: ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]