Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3871,30 +3871,32 @@ } // Hoist one-use addition by non-opaque constant: - // (x + C) - y -> (x - y) + C - if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && - isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { - SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); - return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); - } - // y - (x + C) -> (y - x) - C - if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() && - isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) { - SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); - return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); - } - // (x - C) - y -> (x - y) - C - // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. - if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && - isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { - SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); - return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1)); - } - // (C - x) - y -> C - (x + y) - if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && - isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { - SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1); - return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add); + if (N0->isDivergent() == N1->isDivergent()) { + // (x + C) - y -> (x - y) + C + if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && + isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); + } + // y - (x + C) -> (y - x) - C + if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() && + isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); + } + // (x - C) - y -> (x - y) - C + // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && + isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1)); + } + // (C - x) - y -> C - (x + y) + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && + isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add); + } } // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1' Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -13,12 +13,12 @@ ; VARIANT0-NEXT: s_mov_b32 s6, 0 ; VARIANT0-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; VARIANT0-NEXT: v_mov_b32_e32 v2, 0 -; VARIANT0-NEXT: v_not_b32_e32 v3, v0 ; VARIANT0-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 ; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; VARIANT0-NEXT: s_barrier -; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s0, v3 +; VARIANT0-NEXT: s_add_i32 s0, s0, -1 +; VARIANT0-NEXT: v_sub_i32_e32 v3, vcc, s0, v0 ; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 ; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 @@ -34,11 +34,11 @@ ; VARIANT1-NEXT: s_mov_b32 s6, 0 ; VARIANT1-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; VARIANT1-NEXT: v_mov_b32_e32 v2, 0 -; VARIANT1-NEXT: v_not_b32_e32 v3, v0 ; VARIANT1-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 ; VARIANT1-NEXT: s_barrier -; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s0, v3 +; VARIANT1-NEXT: s_add_i32 s0, s0, -1 +; VARIANT1-NEXT: v_sub_i32_e32 v3, vcc, s0, v0 ; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 ; VARIANT1-NEXT: s_waitcnt expcnt(0) @@ -54,7 +54,8 @@ ; VARIANT2-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; VARIANT2-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT2-NEXT: global_store_dword v2, v0, s[2:3] -; VARIANT2-NEXT: v_xad_u32 v0, v0, -1, s4 +; VARIANT2-NEXT: s_add_i32 s4, s4, -1 +; VARIANT2-NEXT: v_sub_u32_e32 v0, s4, v0 ; VARIANT2-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; VARIANT2-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; VARIANT2-NEXT: v_mov_b32_e32 v3, s3 @@ -74,7 +75,8 @@ ; VARIANT3-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; VARIANT3-NEXT: s_waitcnt lgkmcnt(0) ; VARIANT3-NEXT: global_store_dword v2, v0, s[2:3] -; VARIANT3-NEXT: v_xad_u32 v0, v0, -1, s4 +; VARIANT3-NEXT: s_add_i32 s4, s4, -1 +; VARIANT3-NEXT: v_sub_u32_e32 v0, s4, v0 ; VARIANT3-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; VARIANT3-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; VARIANT3-NEXT: v_mov_b32_e32 v3, s3