diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3202,6 +3202,51 @@ if (VT != MVT::i64) return SDValue(); + // fold (i64 (shr (add a, b), 32)) + // -> (i64 (zext (uaddo (i32 (trunc a)), (i32 (trunc b))).overflow)) + // iff a/b have >= 32 leading zeroes + // (usually a/b are i32->i64 zexts) + if (ShiftAmt == 32 && LHS.getOpcode() == ISD::ADD) { + SDValue AddLHS = LHS->getOperand(0); + SDValue AddRHS = LHS->getOperand(1); + + KnownBits AddLHSKnownBits = DAG.computeKnownBits(AddLHS); + KnownBits AddRHSKnownBits = DAG.computeKnownBits(AddRHS); + if (AddLHSKnownBits.countMinLeadingZeros() >= 32 && + AddRHSKnownBits.countMinLeadingZeros() >= 32) { + + // All users of the add must either be this shr, or truncs to i32. + // If there are other users, don't do the transform. + SmallVector TruncsToReplace; + bool CanCombine = true; + for (SDNode *User : LHS->uses()) { + if (User == N) + continue; + if (User->getOpcode() != ISD::TRUNCATE || + User->getValueType(0) != MVT::i32) { + CanCombine = false; + break; + } + + TruncsToReplace.push_back(SDValue(User, 0)); + } + + if (CanCombine) { + // (i32 (uaddo a, b)) + SDValue A = DAG.getNode(ISD::TRUNCATE, SL, {MVT::i32}, {AddLHS}); + SDValue B = DAG.getNode(ISD::TRUNCATE, SL, {MVT::i32}, {AddRHS}); + SDValue UADDO = + DAG.getNode(ISD::UADDO, SL, {MVT::i32, MVT::i1}, {A, B}); + + for (SDValue V : TruncsToReplace) + DAG.ReplaceAllUsesOfValueWith(V, UADDO); + + // Replace this shift with (i64 (zext uaddo.overflow)) + return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, {UADDO.getValue(1)}); + } + } + } + if (ShiftAmt < 32) return SDValue(); diff --git a/llvm/test/CodeGen/AMDGPU/add_shr_carry.ll b/llvm/test/CodeGen/AMDGPU/add_shr_carry.ll --- a/llvm/test/CodeGen/AMDGPU/add_shr_carry.ll +++ b/llvm/test/CodeGen/AMDGPU/add_shr_carry.ll @@ -10,39 +10,73 @@ ; RUN: llc -global-isel < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11,GISEL-GFX11 %s define i64 @basic_zext(i32 %a, i32 %b, i64 %c) { -; VI-LABEL: basic_zext: -; VI: ; %bb.0: ; %entry -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 -; VI-NEXT: v_addc_u32_e64 v0, s[4:5], 0, 0, vcc -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: basic_zext: +; SDAG-VI: ; %bb.0: ; %entry +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; SDAG-VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX9-LABEL: basic_zext: +; SDAG-GFX9: ; %bb.0: ; %entry +; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +; SDAG-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX10-LABEL: basic_zext: +; SDAG-GFX10: ; %bb.0: ; %entry +; SDAG-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG-GFX10-NEXT: v_add_co_u32 v0, s4, v0, v1 +; SDAG-GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-GFX10-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX11-LABEL: basic_zext: +; SDAG-GFX11: ; %bb.0: ; %entry +; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG-GFX11-NEXT: v_add_co_u32 v0, s0, v0, v1 +; SDAG-GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; SDAG-GFX11-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: basic_zext: +; GISEL-VI: ; %bb.0: ; %entry +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GISEL-VI-NEXT: v_addc_u32_e64 v0, s[4:5], 0, 0, vcc +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: basic_zext: -; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 -; GFX9-NEXT: v_addc_co_u32_e64 v0, s[4:5], 0, 0, vcc -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX9-LABEL: basic_zext: +; GISEL-GFX9: ; %bb.0: ; %entry +; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +; GISEL-GFX9-NEXT: v_addc_co_u32_e64 v0, s[4:5], 0, 0, vcc +; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: basic_zext: -; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32 v0, s4, v0, v1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s4, 0, 0, s4 -; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX10-LABEL: basic_zext: +; GISEL-GFX10: ; %bb.0: ; %entry +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GISEL-GFX10-NEXT: v_add_co_u32 v0, s4, v0, v1 +; GISEL-GFX10-NEXT: v_add_co_ci_u32_e64 v0, s4, 0, 0, s4 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: basic_zext: -; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_add_co_u32 v0, s0, v0, v1 -; GFX11-NEXT: v_add_co_ci_u32_e64 v0, null, 0, 0, s0 -; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX11-LABEL: basic_zext: +; GISEL-GFX11: ; %bb.0: ; %entry +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GISEL-GFX11-NEXT: v_add_co_u32 v0, s0, v0, v1 +; GISEL-GFX11-NEXT: v_add_co_ci_u32_e64 v0, null, 0, 0, s0 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] entry: %a.zext = zext i32 %a to i64 %b.zext = zext i32 %b to i64 @@ -52,28 +86,28 @@ } define i64 @basic_cst_32leadingzeroes(i32 %b, i64 %c) { -; VI-LABEL: basic_cst_32leadingzeroes: -; VI: ; %bb.0: ; %entry -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0 -; VI-NEXT: v_addc_u32_e64 v0, s[4:5], 0, 0, vcc -; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: basic_cst_32leadingzeroes: +; SDAG-VI: ; %bb.0: ; %entry +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0 +; SDAG-VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: basic_cst_32leadingzeroes: -; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v0, s[4:5], 0, 0, vcc -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX9-LABEL: basic_cst_32leadingzeroes: +; SDAG-GFX9: ; %bb.0: ; %entry +; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 +; SDAG-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX10-LABEL: basic_cst_32leadingzeroes: ; SDAG-GFX10: ; %bb.0: ; %entry ; SDAG-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX10-NEXT: v_add_co_u32 v0, s4, v0, -1 -; SDAG-GFX10-NEXT: v_add_co_ci_u32_e64 v0, s4, 0, 0, s4 +; SDAG-GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; SDAG-GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -82,10 +116,26 @@ ; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; SDAG-GFX11-NEXT: v_add_co_u32 v0, s0, v0, -1 -; SDAG-GFX11-NEXT: v_add_co_ci_u32_e64 v0, null, 0, 0, s0 +; SDAG-GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; SDAG-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] ; +; GISEL-VI-LABEL: basic_cst_32leadingzeroes: +; GISEL-VI: ; %bb.0: ; %entry +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0 +; GISEL-VI-NEXT: v_addc_u32_e64 v0, s[4:5], 0, 0, vcc +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX9-LABEL: basic_cst_32leadingzeroes: +; GISEL-GFX9: ; %bb.0: ; %entry +; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 +; GISEL-GFX9-NEXT: v_addc_co_u32_e64 v0, s[4:5], 0, 0, vcc +; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GISEL-GFX10-LABEL: basic_cst_32leadingzeroes: ; GISEL-GFX10: ; %bb.0: ; %entry ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -186,56 +236,107 @@ } define <3 x i32> @add3_i96(<3 x i32> %0, <3 x i32> %1) { -; VI-LABEL: add3_i96: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v0, vcc, v3, v0 -; VI-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc -; VI-NEXT: v_add_u32_e32 v1, vcc, v4, v1 -; VI-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc -; VI-NEXT: v_add_u32_e32 v1, vcc, v1, v3 -; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; VI-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: add3_i96: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_add_u32_e32 v1, vcc, v4, v1 +; SDAG-VI-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; SDAG-VI-NEXT: v_add_u32_e32 v0, vcc, v3, v0 +; SDAG-VI-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; SDAG-VI-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; SDAG-VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc +; SDAG-VI-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; SDAG-VI-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: add3_i96: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v4, v1 -; GFX9-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, vcc -; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc -; GFX9-NEXT: v_add3_u32 v2, v5, v2, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX9-LABEL: add3_i96: +; SDAG-GFX9: ; %bb.0: +; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v4, v1 +; SDAG-GFX9-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, vcc +; SDAG-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 +; SDAG-GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; SDAG-GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 +; SDAG-GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; SDAG-GFX9-NEXT: v_add3_u32 v2, v5, v2, v3 +; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX10-LABEL: add3_i96: +; SDAG-GFX10: ; %bb.0: +; SDAG-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG-GFX10-NEXT: v_add_co_u32 v0, s4, v3, v0 +; SDAG-GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; SDAG-GFX10-NEXT: v_add_co_u32 v1, s4, v4, v1 +; SDAG-GFX10-NEXT: v_add_co_ci_u32_e64 v4, s4, 0, 0, s4 +; SDAG-GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 +; SDAG-GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo +; SDAG-GFX10-NEXT: v_add3_u32 v2, v5, v2, v3 +; SDAG-GFX10-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX11-LABEL: add3_i96: +; SDAG-GFX11: ; %bb.0: +; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; SDAG-GFX11-NEXT: v_add_co_u32 v0, s0, v3, v0 +; SDAG-GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; SDAG-GFX11-NEXT: v_add_co_u32 v1, s0, v4, v1 +; SDAG-GFX11-NEXT: v_add_co_ci_u32_e64 v4, null, 0, 0, s0 +; SDAG-GFX11-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 +; SDAG-GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo +; SDAG-GFX11-NEXT: v_add3_u32 v2, v5, v2, v3 +; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: add3_i96: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_add_u32_e32 v0, vcc, v3, v0 +; GISEL-VI-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-VI-NEXT: v_add_u32_e32 v1, vcc, v4, v1 +; GISEL-VI-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; GISEL-VI-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; GISEL-VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc +; GISEL-VI-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; GISEL-VI-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: add3_i96: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32 v0, s4, v3, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s4, 0, 0, s4 -; GFX10-NEXT: v_add_co_u32 v1, s4, v4, v1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s4, 0, 0, s4 -; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo -; GFX10-NEXT: v_add3_u32 v2, v5, v2, v3 -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX9-LABEL: add3_i96: +; GISEL-GFX9: ; %bb.0: +; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 +; GISEL-GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v4, v1 +; GISEL-GFX9-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, vcc +; GISEL-GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 +; GISEL-GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GISEL-GFX9-NEXT: v_add3_u32 v2, v5, v2, v3 +; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: add3_i96: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_add_co_u32 v0, s0, v3, v0 -; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s0 -; GFX11-NEXT: v_add_co_u32 v1, s0, v4, v1 -; GFX11-NEXT: v_add_co_ci_u32_e64 v4, null, 0, 0, s0 -; GFX11-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo -; GFX11-NEXT: v_add3_u32 v2, v5, v2, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX10-LABEL: add3_i96: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GISEL-GFX10-NEXT: v_add_co_u32 v0, s4, v3, v0 +; GISEL-GFX10-NEXT: v_add_co_ci_u32_e64 v3, s4, 0, 0, s4 +; GISEL-GFX10-NEXT: v_add_co_u32 v1, s4, v4, v1 +; GISEL-GFX10-NEXT: v_add_co_ci_u32_e64 v4, s4, 0, 0, s4 +; GISEL-GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 +; GISEL-GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo +; GISEL-GFX10-NEXT: v_add3_u32 v2, v5, v2, v3 +; GISEL-GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX11-LABEL: add3_i96: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GISEL-GFX11-NEXT: v_add_co_u32 v0, s0, v3, v0 +; GISEL-GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s0 +; GISEL-GFX11-NEXT: v_add_co_u32 v1, s0, v4, v1 +; GISEL-GFX11-NEXT: v_add_co_ci_u32_e64 v4, null, 0, 0, s0 +; GISEL-GFX11-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 +; GISEL-GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo +; GISEL-GFX11-NEXT: v_add3_u32 v2, v5, v2, v3 +; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] %3 = extractelement <3 x i32> %0, i64 0 %4 = zext i32 %3 to i64 %5 = extractelement <3 x i32> %1, i64 0 @@ -261,3 +362,8 @@ %25 = insertelement <3 x i32> %24, i32 %20, i32 2 ret <3 x i32> %25 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX10: {{.*}} +; GFX11: {{.*}} +; GFX9: {{.*}} +; VI: {{.*}}