diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -104,12 +104,19 @@ [{ return Helper.matchFoldableFneg(*${ffn}, ${matchinfo}); }]), (apply [{ Helper.applyFoldableFneg(*${ffn}, ${matchinfo}); }])>; +def shr_add_to_uaddo_matchdata : GIDefMatchData<"MachineInstr *">; +def shr_add_to_uaddo : GICombineRule< + (defs root:$op, shr_add_to_uaddo_matchdata:$matchinfo), + (match (wip_match_opcode G_LSHR, G_UNMERGE_VALUES):$op, + [{ return Helper.matchShrAddToUAddo(*${op}, ${matchinfo}); }]), + (apply [{ Helper.applyShrAddToUAddo(*${op}, ${matchinfo}); }])>; + // Combines which should only apply on SI/VI def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>; def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< "AMDGPUGenPreLegalizerCombinerHelper", - [all_combines, clamp_i64_to_i16, foldable_fneg]> { + [all_combines, clamp_i64_to_i16, foldable_fneg, shr_add_to_uaddo]> { let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; let StateClass = "AMDGPUPreLegalizerCombinerHelperState"; let AdditionalArguments = []; @@ -119,7 +126,7 @@ "AMDGPUGenPostLegalizerCombinerHelper", [all_combines, gfx6gfx7_combines, uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg, - rcp_sqrt_to_rsq]> { + rcp_sqrt_to_rsq, shr_add_to_uaddo]> { let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule"; let StateClass = "AMDGPUPostLegalizerCombinerHelperState"; let AdditionalArguments = []; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h @@ -23,4 +23,7 @@ bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo); void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo); + + bool matchShrAddToUAddo(MachineInstr &MI, MachineInstr *&MatchInfo); + void applyShrAddToUAddo(MachineInstr &MI, MachineInstr *&MatchInfo); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -9,6 +9,7 @@ #include "AMDGPUCombinerHelper.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Target/TargetMachine.h" @@ -380,3 +381,75 @@ MI.eraseFromParent(); } + +bool AMDGPUCombinerHelper::matchShrAddToUAddo(MachineInstr &MI, + MachineInstr *&MatchInfo) { + // fold (i64 (shr (add a, b), 32)) + // -> (i64 (zext (uaddo (i32 (trunc a)), (i32 (trunc b)).overflow)) + // iff a/b have >= 32 leading zeroes + // (usually a/b are i32->i64 zexts) + // + // NOTE: After legalization, we may not have LSHR anymore but instead a + // G_UNMERGE_VALUES. + // This combine could be adapted to support this variant as well if needed, + // but the cases we're interested in should be already present + // pre-legalization (coming from user code) so that variant is currently not + // implemented. + Register Dst = MI.getOperand(0).getReg(); + LLT S64 = LLT::scalar(64); + LLT S32 = LLT::scalar(32); + if (MRI.getType(Dst) != S64) + return false; + + Register A, B; + Optional ShiftAmt; + if (!mi_match(MI, MRI, + m_GLShr(m_GAdd(m_Reg(A), m_Reg(B)), m_GCst(ShiftAmt))) || + ShiftAmt->Value != 32) + return false; + + GISelKnownBits *KB = getKnownBits(); + if (KB->getKnownBits(A).countMinLeadingZeros() < 32 || + KB->getKnownBits(B).countMinLeadingZeros() < 32) + return false; + + // Also check the users of the add. We only perform the combine if all users + // are either the LSHR, or truncs to i32. + for (MachineInstr &User : + MRI.use_nodbg_instructions(MI.getOperand(1).getReg())) { + if (&User != &MI && (User.getOpcode() != AMDGPU::G_TRUNC || + MRI.getType(User.getOperand(0).getReg()) != S32)) + return false; + } + + MatchInfo = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); + return true; +} + +void AMDGPUCombinerHelper::applyShrAddToUAddo(MachineInstr &MI, + MachineInstr *&MatchInfo) { + // add -> (i64 (zext (uaddo (i32 (trunc a)), (i32 (trunc b))))) + // lshr -> (i64 (zext uaddo.overflow)) + // + // Note that since we checked in the "match" function that the add's users + // are all truncs to i32, the zext of the uaddo will be folded out. + + Register Dst = MI.getOperand(0).getReg(); + Register AddDst = MatchInfo->getOperand(0).getReg(); + Register LHS = MatchInfo->getOperand(1).getReg(); + Register RHS = MatchInfo->getOperand(2).getReg(); + + LLT S32 = LLT::scalar(32); + LLT S1 = LLT::scalar(1); + + Builder.setInstrAndDebugLoc(MI); + + LHS = Builder.buildTrunc(S32, LHS).getReg(0); + RHS = Builder.buildTrunc(S32, RHS).getReg(0); + MachineInstr *UAddo = Builder.buildUAddo(S32, S1, LHS, RHS); + Builder.buildZExt(AddDst, UAddo->getOperand(0).getReg()); + Builder.buildZExt(Dst, UAddo->getOperand(1).getReg()); + + MI.eraseFromParent(); + MatchInfo->eraseFromParent(); +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-add-to-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-add-to-uaddo.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-lshr-add-to-uaddo.mir @@ -0,0 +1,264 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,PRE +# RUN: llc -march=amdgcn -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,POST + +--- +name: good_zext_src +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: good_zext_src + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a32:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b32:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO %a32, %b32 + ; CHECK-NEXT: %truclshr:_(s32) = G_ZEXT [[UADDO1]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDO]](s32) + ; CHECK-NEXT: $vgpr1 = COPY %truclshr(s32) + %a32:_(s32) = COPY $vgpr0 + %b32:_(s32) = COPY $vgpr1 + %a64:_(s64) = G_ZEXT %a32 + %b64:_(s64) = G_ZEXT %b32 + %add:_(s64) = G_ADD %a64, %b64 + %c32:_(s64) = G_CONSTANT i64 32 + %lshr:_(s64) = G_LSHR %add, %c32 + %truncadd:_(s32) = G_TRUNC %add + %truclshr:_(s32) = G_TRUNC %lshr + $vgpr0 = COPY %truncadd + $vgpr1 = COPY %truclshr +... + +--- +name: good_constant_src +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: good_constant_src + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a32:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b64:_(s64) = G_CONSTANT i64 4294967290 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %b64(s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO %a32, [[TRUNC]] + ; CHECK-NEXT: %truclshr:_(s32) = G_ZEXT [[UADDO1]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDO]](s32) + ; CHECK-NEXT: $vgpr1 = COPY %truclshr(s32) + %a32:_(s32) = COPY $vgpr0 + %a64:_(s64) = G_ZEXT %a32 + %b64:_(s64) = G_CONSTANT i64 4294967290 ; 0xFFFFFFFA + %add:_(s64) = G_ADD %a64, %b64 + %c32:_(s64) = G_CONSTANT i64 32 + %lshr:_(s64) = G_LSHR %add, %c32 + %truncadd:_(s32) = G_TRUNC %add + %truclshr:_(s32) = G_TRUNC %lshr + $vgpr0 = COPY %truncadd + $vgpr1 = COPY %truclshr +... + +--- +name: good_multi_users +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: good_multi_users + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a32:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b32:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO %a32, %b32 + ; CHECK-NEXT: %truclshr:_(s32) = G_ZEXT [[UADDO1]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDO]](s32) + ; CHECK-NEXT: $vgpr1 = COPY %truclshr(s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UADDO]](s32) + %a32:_(s32) = COPY $vgpr0 + %b32:_(s32) = COPY $vgpr1 + %a64:_(s64) = G_ZEXT %a32 + %b64:_(s64) = G_ZEXT %b32 + %add:_(s64) = G_ADD %a64, %b64 + %c32:_(s64) = G_CONSTANT i64 32 + %lshr:_(s64) = G_LSHR %add, %c32 + %truncadd:_(s32) = G_TRUNC %add + %truncadd2:_(s32) = G_TRUNC %add + %truclshr:_(s32) = G_TRUNC %lshr + $vgpr0 = COPY %truncadd + $vgpr1 = COPY %truclshr + $vgpr2 = COPY %truncadd2 +... + +--- +name: bad_src +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; PRE-LABEL: name: bad_src + ; PRE: liveins: $vgpr0_vgpr1, $vgpr2 + ; PRE-NEXT: {{ $}} + ; PRE-NEXT: %a64:_(s64) = COPY $vgpr0_vgpr1 + ; PRE-NEXT: %b32:_(s32) = COPY $vgpr2 + ; PRE-NEXT: %b64:_(s64) = G_ZEXT %b32(s32) + ; PRE-NEXT: %add:_(s64) = G_ADD %a64, %b64 + ; PRE-NEXT: %c32:_(s64) = G_CONSTANT i64 33 + ; PRE-NEXT: %lshr:_(s64) = G_LSHR %add, %c32(s64) + ; PRE-NEXT: %truncadd:_(s32) = G_TRUNC %add(s64) + ; PRE-NEXT: %truncadd2:_(s32) = G_TRUNC %add(s64) + ; PRE-NEXT: %truclshr:_(s32) = G_TRUNC %lshr(s64) + ; PRE-NEXT: $vgpr0 = COPY %truncadd(s32) + ; PRE-NEXT: $vgpr1 = COPY %truclshr(s32) + ; PRE-NEXT: $vgpr2 = COPY %truncadd2(s32) + ; POST-LABEL: name: bad_src + ; POST: liveins: $vgpr0_vgpr1, $vgpr2 + ; POST-NEXT: {{ $}} + ; POST-NEXT: %a64:_(s64) = COPY $vgpr0_vgpr1 + ; POST-NEXT: %b32:_(s32) = COPY $vgpr2 + ; POST-NEXT: %b64:_(s64) = G_ZEXT %b32(s32) + ; POST-NEXT: %add:_(s64) = G_ADD %a64, %b64 + ; POST-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %add(s64) + ; POST-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; POST-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; POST-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; POST-NEXT: %lshr:_(s64) = G_MERGE_VALUES [[LSHR]](s32), [[C1]](s32) + ; POST-NEXT: %truncadd:_(s32) = G_TRUNC %add(s64) + ; POST-NEXT: %truncadd2:_(s32) = G_TRUNC %add(s64) + ; POST-NEXT: %truclshr:_(s32) = G_TRUNC %lshr(s64) + ; POST-NEXT: $vgpr0 = COPY %truncadd(s32) + ; POST-NEXT: $vgpr1 = COPY %truclshr(s32) + ; POST-NEXT: $vgpr2 = COPY %truncadd2(s32) + %a64:_(s64) = COPY $vgpr0_vgpr1 + %b32:_(s32) = COPY $vgpr2 + %b64:_(s64) = G_ZEXT %b32 + %add:_(s64) = G_ADD %a64, %b64 + %c32:_(s64) = G_CONSTANT i64 33 + %lshr:_(s64) = G_LSHR %add, %c32 + %truncadd:_(s32) = G_TRUNC %add + %truncadd2:_(s32) = G_TRUNC %add + %truclshr:_(s32) = G_TRUNC %lshr + $vgpr0 = COPY %truncadd + $vgpr1 = COPY %truclshr + $vgpr2 = COPY %truncadd2 +... + +--- +name: bad_constant_src +body: | + bb.0: + liveins: $vgpr0 + ; PRE-LABEL: name: bad_constant_src + ; PRE: liveins: $vgpr0 + ; PRE-NEXT: {{ $}} + ; PRE-NEXT: %a64:_(s64) = G_CONSTANT i64 8589934591 + ; PRE-NEXT: %b32:_(s32) = COPY $vgpr0 + ; PRE-NEXT: %b64:_(s64) = G_ZEXT %b32(s32) + ; PRE-NEXT: %add:_(s64) = G_ADD %a64, %b64 + ; PRE-NEXT: %c32:_(s64) = G_CONSTANT i64 32 + ; PRE-NEXT: %lshr:_(s64) = G_LSHR %add, %c32(s64) + ; PRE-NEXT: %truncadd:_(s32) = G_TRUNC %add(s64) + ; PRE-NEXT: %truclshr:_(s32) = G_TRUNC %lshr(s64) + ; PRE-NEXT: $vgpr0 = COPY %truncadd(s32) + ; PRE-NEXT: $vgpr1 = COPY %truclshr(s32) + ; POST-LABEL: name: bad_constant_src + ; POST: liveins: $vgpr0 + ; POST-NEXT: {{ $}} + ; POST-NEXT: %a64:_(s64) = G_CONSTANT i64 8589934591 + ; POST-NEXT: %b32:_(s32) = COPY $vgpr0 + ; POST-NEXT: %b64:_(s64) = G_ZEXT %b32(s32) + ; POST-NEXT: %add:_(s64) = G_ADD %a64, %b64 + ; POST-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %add(s64) + ; POST-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; POST-NEXT: %lshr:_(s64) = G_MERGE_VALUES [[UV1]](s32), [[C]](s32) + ; POST-NEXT: %truncadd:_(s32) = G_TRUNC %add(s64) + ; POST-NEXT: %truclshr:_(s32) = G_TRUNC %lshr(s64) + ; POST-NEXT: $vgpr0 = COPY %truncadd(s32) + ; POST-NEXT: $vgpr1 = COPY %truclshr(s32) + %a64:_(s64) = G_CONSTANT i64 8589934591 ; 0x1FFFFFFFF + %b32:_(s32) = COPY $vgpr0 + %b64:_(s64) = G_ZEXT %b32 + %add:_(s64) = G_ADD %a64, %b64 + %c32:_(s64) = G_CONSTANT i64 32 + %lshr:_(s64) = G_LSHR %add, %c32 + %truncadd:_(s32) = G_TRUNC %add + %truclshr:_(s32) = G_TRUNC %lshr + $vgpr0 = COPY %truncadd + $vgpr1 = COPY %truclshr +... + +--- +name: bad_shift_amt_33 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; PRE-LABEL: name: bad_shift_amt_33 + ; PRE: liveins: $vgpr0, $vgpr1 + ; PRE-NEXT: {{ $}} + ; PRE-NEXT: %a32:_(s32) = COPY $vgpr0 + ; PRE-NEXT: %b32:_(s32) = COPY $vgpr1 + ; PRE-NEXT: %a64:_(s64) = G_ZEXT %a32(s32) + ; PRE-NEXT: %b64:_(s64) = G_ZEXT %b32(s32) + ; PRE-NEXT: %add:_(s64) = G_ADD %a64, %b64 + ; PRE-NEXT: %c32:_(s64) = G_CONSTANT i64 33 + ; PRE-NEXT: %lshr:_(s64) = G_LSHR %add, %c32(s64) + ; PRE-NEXT: %truncadd:_(s32) = G_TRUNC %add(s64) + ; PRE-NEXT: %truclshr:_(s32) = G_TRUNC %lshr(s64) + ; PRE-NEXT: $vgpr0 = COPY %truncadd(s32) + ; PRE-NEXT: $vgpr1 = COPY %truclshr(s32) + ; POST-LABEL: name: bad_shift_amt_33 + ; POST: liveins: $vgpr0, $vgpr1 + ; POST-NEXT: {{ $}} + ; POST-NEXT: %a32:_(s32) = COPY $vgpr0 + ; POST-NEXT: %b32:_(s32) = COPY $vgpr1 + ; POST-NEXT: %a64:_(s64) = G_ZEXT %a32(s32) + ; POST-NEXT: %b64:_(s64) = G_ZEXT %b32(s32) + ; POST-NEXT: %add:_(s64) = G_ADD %a64, %b64 + ; POST-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %add(s64) + ; POST-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; POST-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; POST-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; POST-NEXT: %lshr:_(s64) = G_MERGE_VALUES [[LSHR]](s32), [[C1]](s32) + ; POST-NEXT: %truncadd:_(s32) = G_TRUNC %add(s64) + ; POST-NEXT: %truclshr:_(s32) = G_TRUNC %lshr(s64) + ; POST-NEXT: $vgpr0 = COPY %truncadd(s32) + ; POST-NEXT: $vgpr1 = COPY %truclshr(s32) + %a32:_(s32) = COPY $vgpr0 + %b32:_(s32) = COPY $vgpr1 + %a64:_(s64) = G_ZEXT %a32 + %b64:_(s64) = G_ZEXT %b32 + %add:_(s64) = G_ADD %a64, %b64 + %c32:_(s64) = G_CONSTANT i64 33 + %lshr:_(s64) = G_LSHR %add, %c32 + %truncadd:_(s32) = G_TRUNC %add + %truclshr:_(s32) = G_TRUNC %lshr + $vgpr0 = COPY %truncadd + $vgpr1 = COPY %truclshr +... + +--- +name: bad_shift_amt_31 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: bad_shift_amt_31 + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a32:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b32:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %a64:_(s64) = G_ZEXT %a32(s32) + ; CHECK-NEXT: %b64:_(s64) = G_ZEXT %b32(s32) + ; CHECK-NEXT: %add:_(s64) = G_ADD %a64, %b64 + ; CHECK-NEXT: %c32:_(s64) = G_CONSTANT i64 31 + ; CHECK-NEXT: %lshr:_(s64) = G_LSHR %add, %c32(s64) + ; CHECK-NEXT: %truncadd:_(s32) = G_TRUNC %add(s64) + ; CHECK-NEXT: %truclshr:_(s32) = G_TRUNC %lshr(s64) + ; CHECK-NEXT: $vgpr0 = COPY %truncadd(s32) + ; CHECK-NEXT: $vgpr1 = COPY %truclshr(s32) + %a32:_(s32) = COPY $vgpr0 + %b32:_(s32) = COPY $vgpr1 + %a64:_(s64) = G_ZEXT %a32 + %b64:_(s64) = G_ZEXT %b32 + %add:_(s64) = G_ADD %a64, %b64 + %c32:_(s64) = G_CONSTANT i64 31 + %lshr:_(s64) = G_LSHR %add, %c32 + %truncadd:_(s32) = G_TRUNC %add + %truclshr:_(s32) = G_TRUNC %lshr + $vgpr0 = COPY %truncadd + $vgpr1 = COPY %truclshr +... diff --git a/llvm/test/CodeGen/AMDGPU/add_shr_carry.ll b/llvm/test/CodeGen/AMDGPU/add_shr_carry.ll --- a/llvm/test/CodeGen/AMDGPU/add_shr_carry.ll +++ b/llvm/test/CodeGen/AMDGPU/add_shr_carry.ll @@ -10,73 +10,39 @@ ; RUN: llc -global-isel < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -check-prefixes=GFX11,GISEL-GFX11 %s define i64 @basic_zext(i32 %a, i32 %b, i64 %c) { -; SDAG-VI-LABEL: basic_zext: -; SDAG-VI: ; %bb.0: ; %entry -; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 -; SDAG-VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-VI-NEXT: s_setpc_b64 s[30:31] -; -; SDAG-GFX9-LABEL: basic_zext: -; SDAG-GFX9: ; %bb.0: ; %entry -; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 -; SDAG-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] -; -; SDAG-GFX10-LABEL: basic_zext: -; SDAG-GFX10: ; %bb.0: ; %entry -; SDAG-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; SDAG-GFX10-NEXT: v_add_co_u32 v0, s4, v0, v1 -; SDAG-GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 -; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX10-NEXT: s_setpc_b64 s[30:31] -; -; SDAG-GFX11-LABEL: basic_zext: -; SDAG-GFX11: ; %bb.0: ; %entry -; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; SDAG-GFX11-NEXT: v_add_co_u32 v0, s0, v0, v1 -; SDAG-GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; SDAG-GFX11-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: basic_zext: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-NEXT: v_mov_b32_e32 v1, 0 +; VI-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-VI-LABEL: basic_zext: -; GISEL-VI: ; %bb.0: ; %entry -; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 -; GISEL-VI-NEXT: v_addc_u32_e64 v0, s[4:5], 0, 0, vcc -; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: basic_zext: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX9-LABEL: basic_zext: -; GISEL-GFX9: ; %bb.0: ; %entry -; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v1 -; GISEL-GFX9-NEXT: v_addc_co_u32_e64 v0, s[4:5], 0, 0, vcc -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-GFX10-LABEL: basic_zext: -; GISEL-GFX10: ; %bb.0: ; %entry -; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GISEL-GFX10-NEXT: v_add_co_u32 v0, s4, v0, v1 -; GISEL-GFX10-NEXT: v_add_co_ci_u32_e64 v0, s4, 0, 0, s4 -; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: basic_zext: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32 v0, s4, v0, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX11-LABEL: basic_zext: -; GISEL-GFX11: ; %bb.0: ; %entry -; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GISEL-GFX11-NEXT: v_add_co_u32 v0, s0, v0, v1 -; GISEL-GFX11-NEXT: v_add_co_ci_u32_e64 v0, null, 0, 0, s0 -; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: basic_zext: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_add_co_u32 v0, s0, v0, v1 +; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %a.zext = zext i32 %a to i64 %b.zext = zext i32 %b to i64 @@ -86,21 +52,21 @@ } define i64 @basic_cst_32leadingzeroes(i32 %b, i64 %c) { -; SDAG-VI-LABEL: basic_cst_32leadingzeroes: -; SDAG-VI: ; %bb.0: ; %entry -; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0 -; SDAG-VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: basic_cst_32leadingzeroes: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-NEXT: v_mov_b32_e32 v1, 0 +; VI-NEXT: s_setpc_b64 s[30:31] ; -; SDAG-GFX9-LABEL: basic_cst_32leadingzeroes: -; SDAG-GFX9: ; %bb.0: ; %entry -; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 -; SDAG-GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SDAG-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: basic_cst_32leadingzeroes: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX10-LABEL: basic_cst_32leadingzeroes: ; SDAG-GFX10: ; %bb.0: ; %entry @@ -120,28 +86,12 @@ ; SDAG-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-VI-LABEL: basic_cst_32leadingzeroes: -; GISEL-VI: ; %bb.0: ; %entry -; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0 -; GISEL-VI-NEXT: v_addc_u32_e64 v0, s[4:5], 0, 0, vcc -; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-VI-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-GFX9-LABEL: basic_cst_32leadingzeroes: -; GISEL-GFX9: ; %bb.0: ; %entry -; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 -; GISEL-GFX9-NEXT: v_addc_co_u32_e64 v0, s[4:5], 0, 0, vcc -; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] -; ; GISEL-GFX10-LABEL: basic_cst_32leadingzeroes: ; GISEL-GFX10: ; %bb.0: ; %entry ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX10-NEXT: v_add_co_u32 v0, s4, -1, v0 -; GISEL-GFX10-NEXT: v_add_co_ci_u32_e64 v0, s4, 0, 0, s4 +; GISEL-GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -150,7 +100,7 @@ ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GISEL-GFX11-NEXT: v_add_co_u32 v0, s0, -1, v0 -; GISEL-GFX11-NEXT: v_add_co_ci_u32_e64 v0, null, 0, 0, s0 +; GISEL-GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] entry: @@ -236,107 +186,56 @@ } define <3 x i32> @add3_i96(<3 x i32> %0, <3 x i32> %1) { -; SDAG-VI-LABEL: add3_i96: -; SDAG-VI: ; %bb.0: -; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-VI-NEXT: v_add_u32_e32 v1, vcc, v4, v1 -; SDAG-VI-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc -; SDAG-VI-NEXT: v_add_u32_e32 v0, vcc, v3, v0 -; SDAG-VI-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; SDAG-VI-NEXT: v_add_u32_e32 v1, vcc, v1, v3 -; SDAG-VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; SDAG-VI-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; SDAG-VI-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: add3_i96: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_add_u32_e32 v1, vcc, v4, v1 +; VI-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, v3, v0 +; VI-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; VI-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, v5, v2 +; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v3 +; VI-NEXT: s_setpc_b64 s[30:31] ; -; SDAG-GFX9-LABEL: add3_i96: -; SDAG-GFX9: ; %bb.0: -; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v4, v1 -; SDAG-GFX9-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, vcc -; SDAG-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 -; SDAG-GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; SDAG-GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 -; SDAG-GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc -; SDAG-GFX9-NEXT: v_add3_u32 v2, v5, v2, v3 -; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] -; -; SDAG-GFX10-LABEL: add3_i96: -; SDAG-GFX10: ; %bb.0: -; SDAG-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; SDAG-GFX10-NEXT: v_add_co_u32 v0, s4, v3, v0 -; SDAG-GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 -; SDAG-GFX10-NEXT: v_add_co_u32 v1, s4, v4, v1 -; SDAG-GFX10-NEXT: v_add_co_ci_u32_e64 v4, s4, 0, 0, s4 -; SDAG-GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 -; SDAG-GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo -; SDAG-GFX10-NEXT: v_add3_u32 v2, v5, v2, v3 -; SDAG-GFX10-NEXT: s_setpc_b64 s[30:31] -; -; SDAG-GFX11-LABEL: add3_i96: -; SDAG-GFX11: ; %bb.0: -; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; SDAG-GFX11-NEXT: v_add_co_u32 v0, s0, v3, v0 -; SDAG-GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 -; SDAG-GFX11-NEXT: v_add_co_u32 v1, s0, v4, v1 -; SDAG-GFX11-NEXT: v_add_co_ci_u32_e64 v4, null, 0, 0, s0 -; SDAG-GFX11-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 -; SDAG-GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo -; SDAG-GFX11-NEXT: v_add3_u32 v2, v5, v2, v3 -; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-VI-LABEL: add3_i96: -; GISEL-VI: ; %bb.0: -; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-VI-NEXT: v_add_u32_e32 v0, vcc, v3, v0 -; GISEL-VI-NEXT: v_addc_u32_e64 v3, s[4:5], 0, 0, vcc -; GISEL-VI-NEXT: v_add_u32_e32 v1, vcc, v4, v1 -; GISEL-VI-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc -; GISEL-VI-NEXT: v_add_u32_e32 v1, vcc, v1, v3 -; GISEL-VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc -; GISEL-VI-NEXT: v_add_u32_e32 v2, vcc, v5, v2 -; GISEL-VI-NEXT: v_add_u32_e32 v2, vcc, v2, v3 -; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: add3_i96: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v4, v1 +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GFX9-NEXT: v_add3_u32 v2, v5, v2, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX9-LABEL: add3_i96: -; GISEL-GFX9: ; %bb.0: -; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v3, v0 -; GISEL-GFX9-NEXT: v_addc_co_u32_e64 v3, s[4:5], 0, 0, vcc -; GISEL-GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v4, v1 -; GISEL-GFX9-NEXT: v_addc_co_u32_e64 v4, s[4:5], 0, 0, vcc -; GISEL-GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3 -; GISEL-GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc -; GISEL-GFX9-NEXT: v_add3_u32 v2, v5, v2, v3 -; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: add3_i96: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_add_co_u32 v0, s4, v3, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v1, s4, v4, v1 +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s4, 0, 0, s4 +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 +; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo +; GFX10-NEXT: v_add3_u32 v2, v5, v2, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-GFX10-LABEL: add3_i96: -; GISEL-GFX10: ; %bb.0: -; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GISEL-GFX10-NEXT: v_add_co_u32 v0, s4, v3, v0 -; GISEL-GFX10-NEXT: v_add_co_ci_u32_e64 v3, s4, 0, 0, s4 -; GISEL-GFX10-NEXT: v_add_co_u32 v1, s4, v4, v1 -; GISEL-GFX10-NEXT: v_add_co_ci_u32_e64 v4, s4, 0, 0, s4 -; GISEL-GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 -; GISEL-GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo -; GISEL-GFX10-NEXT: v_add3_u32 v2, v5, v2, v3 -; GISEL-GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-GFX11-LABEL: add3_i96: -; GISEL-GFX11: ; %bb.0: -; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GISEL-GFX11-NEXT: v_add_co_u32 v0, s0, v3, v0 -; GISEL-GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, 0, s0 -; GISEL-GFX11-NEXT: v_add_co_u32 v1, s0, v4, v1 -; GISEL-GFX11-NEXT: v_add_co_ci_u32_e64 v4, null, 0, 0, s0 -; GISEL-GFX11-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 -; GISEL-GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo -; GISEL-GFX11-NEXT: v_add3_u32 v2, v5, v2, v3 -; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: add3_i96: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_add_co_u32 v0, s0, v3, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 +; GFX11-NEXT: v_add_co_u32 v1, s0, v4, v1 +; GFX11-NEXT: v_add_co_ci_u32_e64 v4, null, 0, 0, s0 +; GFX11-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 +; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo +; GFX11-NEXT: v_add3_u32 v2, v5, v2, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] %3 = extractelement <3 x i32> %0, i64 0 %4 = zext i32 %3 to i64 %5 = extractelement <3 x i32> %1, i64 0 @@ -362,8 +261,3 @@ %25 = insertelement <3 x i32> %24, i32 %20, i32 2 ret <3 x i32> %25 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX10: {{.*}} -; GFX11: {{.*}} -; GFX9: {{.*}} -; VI: {{.*}}