Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -320,6 +320,8 @@ DOT4, CARRY, BORROW, + ADDE, + SUBE, BFE_U32, // Extract range of bits with zero extension to 32-bits. BFE_I32, // Extract range of bits with sign extension to 32-bits. BFI, // (src0 & src1) | (~src0 & src2) Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3609,6 +3609,8 @@ NODE_NAME_CASE(DOT4) NODE_NAME_CASE(CARRY) NODE_NAME_CASE(BORROW) + NODE_NAME_CASE(ADDE) + NODE_NAME_CASE(SUBE) NODE_NAME_CASE(BFE_U32) NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -70,6 +70,10 @@ [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>] >; +def AMDGPUAddeSubeOp : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>] +>; + //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // @@ -179,6 +183,10 @@ // out = (src1 > src0) ? 1 : 0 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; +def AMDGPUadde : SDNode<"AMDGPUISD::ADDE", AMDGPUAddeSubeOp, []>; + +def AMDGPUsube : SDNode<"AMDGPUISD::SUBE", AMDGPUAddeSubeOp, []>; + def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -108,6 +108,7 @@ unsigned getFusedOpcode(const SelectionDAG &DAG, const SDNode *N0, const SDNode *N1) const; + SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -471,6 +471,7 @@ setOperationAction(ISD::SELECT, VT, Custom); } + setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FMINNUM); @@ -4839,6 +4840,40 @@ return 0; } +SDValue SITargetLowering::performAddCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if (VT != MVT::i32) + return SDValue(); + + SDLoc SL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // add x, zext (setcc) => addc x, 0, setcc + // add x, sext (setcc) => subb x, 0, setcc + unsigned Opc = LHS.getOpcode(); + if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND || + Opc == ISD::ANY_EXTEND) + std::swap(RHS, LHS); + + Opc = RHS.getOpcode(); + if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND || + Opc == ISD::ANY_EXTEND) { + auto Cond = RHS.getOperand(0); + if (Cond.getOpcode() == ISD::SETCC) { + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue); + SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond }; + Opc = (Opc == ISD::SIGN_EXTEND) ? AMDGPUISD::SUBE : AMDGPUISD::ADDE; + return DAG.getNode(Opc, SL, VTList, Args); + } + } + return SDValue(); +} + SDValue SITargetLowering::performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) @@ -5009,6 +5044,8 @@ switch (N->getOpcode()) { default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); + case ISD::ADD: + return performAddCombine(N, DCI); case ISD::FADD: return performFAddCombine(N, DCI); case ISD::FSUB: Index: lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP2Instructions.td +++ lib/Target/AMDGPU/VOP2Instructions.td @@ -411,6 +411,15 @@ } // End SubtargetPredicate = isGCN +def : Pat< + (AMDGPUadde i32:$src0, i32:$src1, i1:$src2), + (V_ADDC_U32_e64 $src0, $src1, $src2) +>; + +def : Pat< + (AMDGPUsube i32:$src0, i32:$src1, i1:$src2), + (V_SUBB_U32_e64 $src0, $src1, $src2) +>; // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { Index: test/CodeGen/AMDGPU/combine-cond-add-sub.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}add1: +; GCN: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_addc_u32_e32 v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, vcc +; GCN_NOT: v_cndmask + +define amdgpu_kernel void @add1(i32 addrspace(1)* nocapture %arg) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = zext i1 %cmp to i32 + %add = add i32 %v, %ext + store i32 %add, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: {{^}}sub1: +; GCN: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_subb_u32_e64 v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, 0, vcc +; GCN_NOT: v_cndmask + +define amdgpu_kernel void @sub1(i32 addrspace(1)* nocapture %arg) { +bb: + %x = tail call i32 @llvm.amdgcn.workitem.id.x() + %y = tail call i32 @llvm.amdgcn.workitem.id.y() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x + %v = load i32, i32 addrspace(1)* %gep, align 4 + %cmp = icmp ugt i32 %x, %y + %ext = sext i1 %cmp to i32 + %add = add i32 %v, %ext + store i32 %add, i32 addrspace(1)* %gep, align 4 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +declare i32 @llvm.amdgcn.workitem.id.y() #0 + +attributes #0 = { nounwind readnone speculatable }