Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -638,7 +638,10 @@ bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo); /// Do constant folding when opportunities are exposed after MIR building. - bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo); + bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo); + + /// Do constant folding when opportunities are exposed after MIR building. + bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo); /// \returns true if it is possible to narrow the width of a scalar binop /// feeding a G_AND instruction \p MI. Index: llvm/include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -273,6 +273,10 @@ const Register Op2, const MachineRegisterInfo &MRI); +std::optional ConstantFoldCastOp(unsigned Opcode, LLT DstTy, + const Register Op0, + const MachineRegisterInfo &MRI); + std::optional ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI); Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -895,10 +895,16 @@ def reassocs : GICombineGroup<[reassoc_ptradd, reassoc_comm_binops]>; // Constant fold operations. -def constant_fold : GICombineRule< +def constant_fold_binop : GICombineRule< (defs root:$d, apint_matchinfo:$matchinfo), (match (wip_match_opcode G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL, G_LSHR, G_ASHR):$d, - [{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]), + [{ return Helper.matchConstantFoldBinOp(*${d}, ${matchinfo}); }]), + (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>; + +def constant_fold_cast_op : GICombineRule< + (defs root:$d, apint_matchinfo:$matchinfo), + (match (wip_match_opcode G_ZEXT, G_SEXT, G_ANYEXT):$d, + [{ return Helper.matchConstantFoldCastOp(*${d}, ${matchinfo}); }]), (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>; def mulo_by_2: GICombineRule< @@ -1156,7 +1162,7 @@ const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, div_rem_to_divrem, funnel_shift_combines, commute_shift, - form_bitfield_extract, constant_fold, fabs_fneg_fold, + form_bitfield_extract, constant_fold_binop, constant_fold_cast_op, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, sub_add_reg, select_to_minmax, redundant_binop_in_equality, Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4519,7 +4519,19 @@ return false; } -bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) { +bool CombinerHelper::matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register SrcOp = MI.getOperand(1).getReg(); + + if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) { + MatchInfo = *MaybeCst; + return true; + } + + return false; +} + +bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) { Register Op1 = MI.getOperand(1).getReg(); Register Op2 = MI.getOperand(2).getReg(); auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI); Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -773,6 +773,29 @@ return std::nullopt; } +std::optional llvm::ConstantFoldCastOp(unsigned Opcode, LLT DstTy, + const Register Op0, + const MachineRegisterInfo &MRI) { + std::optional Val = getIConstantVRegVal(Op0, MRI); + if (!Val) + return Val; + + const unsigned DstSize = DstTy.getScalarSizeInBits(); + + switch (Opcode) { + case TargetOpcode::G_SEXT: + return Val->sext(DstSize); + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + // TODO: DAG considers target preference when constant folding any_extend. + return Val->zext(DstSize); + default: + break; + } + + llvm_unreachable("unexpected cast opcode to constant fold"); +} + std::optional llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI) { Index: llvm/lib/Target/AArch64/AArch64Combine.td =================================================================== --- llvm/lib/Target/AArch64/AArch64Combine.td +++ llvm/lib/Target/AArch64/AArch64Combine.td @@ -227,7 +227,7 @@ form_bitfield_extract, rotate_out_of_range, icmp_to_true_false_known_bits, merge_unmerge, select_combines, fold_merge_to_zext, - constant_fold, identity_combines, + constant_fold_binop, identity_combines, ptr_add_immed_chain, overlapping_and, split_store_zero_128, undef_combines, select_to_minmax]> { Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -502,7 +502,7 @@ define i32 @fetch_and_or(ptr %p) #0 { ; CHECK-NOLSE-O1-LABEL: fetch_and_or: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: mov w9, #5 +; CHECK-NOLSE-O1-NEXT: mov w9, #5 ; =0x5 ; CHECK-NOLSE-O1-NEXT: LBB8_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] @@ -525,7 +525,7 @@ ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB8_2 Depth 2 ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: mov w9, #5 +; CHECK-NOLSE-O0-NEXT: mov w9, #5 ; =0x5 ; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB8_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB8_1 Depth=1 @@ -552,13 +552,13 @@ ; ; CHECK-LSE-O1-LABEL: fetch_and_or: ; CHECK-LSE-O1: ; %bb.0: -; CHECK-LSE-O1-NEXT: mov w8, #5 +; CHECK-LSE-O1-NEXT: mov w8, #5 ; =0x5 ; CHECK-LSE-O1-NEXT: ldsetal w8, w0, [x0] ; CHECK-LSE-O1-NEXT: ret ; ; CHECK-LSE-O0-LABEL: fetch_and_or: ; CHECK-LSE-O0: ; %bb.0: -; CHECK-LSE-O0-NEXT: mov w8, #5 +; CHECK-LSE-O0-NEXT: mov w8, #5 ; =0x5 ; CHECK-LSE-O0-NEXT: ldsetal w8, w0, [x0] ; CHECK-LSE-O0-NEXT: ret %val = atomicrmw or ptr %p, i32 5 seq_cst @@ -616,13 +616,13 @@ ; ; CHECK-LSE-O1-LABEL: fetch_and_or_64: ; CHECK-LSE-O1: ; %bb.0: -; CHECK-LSE-O1-NEXT: mov w8, #7 +; CHECK-LSE-O1-NEXT: mov w8, #7 ; =0x7 ; CHECK-LSE-O1-NEXT: ldset x8, x0, [x0] ; CHECK-LSE-O1-NEXT: ret ; ; CHECK-LSE-O0-LABEL: fetch_and_or_64: ; CHECK-LSE-O0: ; %bb.0: -; CHECK-LSE-O0-NEXT: mov w8, #7 +; CHECK-LSE-O0-NEXT: mov w8, #7 ; =0x7 ; CHECK-LSE-O0-NEXT: ; kill: def $x8 killed $w8 ; CHECK-LSE-O0-NEXT: ldset x8, x0, [x0] ; CHECK-LSE-O0-NEXT: ret @@ -982,19 +982,19 @@ define void @atomc_store(ptr %p) #0 { ; CHECK-NOLSE-LABEL: atomc_store: ; CHECK-NOLSE: ; %bb.0: -; CHECK-NOLSE-NEXT: mov w8, #4 +; CHECK-NOLSE-NEXT: mov w8, #4 ; =0x4 ; CHECK-NOLSE-NEXT: stlr w8, [x0] ; CHECK-NOLSE-NEXT: ret ; ; CHECK-LSE-O1-LABEL: atomc_store: ; CHECK-LSE-O1: ; %bb.0: -; CHECK-LSE-O1-NEXT: mov w8, #4 +; CHECK-LSE-O1-NEXT: mov w8, #4 ; =0x4 ; CHECK-LSE-O1-NEXT: stlr w8, [x0] ; CHECK-LSE-O1-NEXT: ret ; ; CHECK-LSE-O0-LABEL: atomc_store: ; CHECK-LSE-O0: ; %bb.0: -; CHECK-LSE-O0-NEXT: mov w8, #4 +; CHECK-LSE-O0-NEXT: mov w8, #4 ; =0x4 ; CHECK-LSE-O0-NEXT: stlr w8, [x0] ; CHECK-LSE-O0-NEXT: ret store atomic i32 4, ptr %p seq_cst, align 4 @@ -2729,24 +2729,23 @@ ; CHECK-NOLSE-O1-LABEL: cmpxchg_i8: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: mov x8, x0 -; CHECK-NOLSE-O1-NEXT: mov w9, w1 -; CHECK-NOLSE-O1-NEXT: mov w1, wzr ; CHECK-NOLSE-O1-NEXT: ; kill: def $w2 killed $w2 def $x2 ; CHECK-NOLSE-O1-NEXT: LBB47_1: ; %cmpxchg.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldxrb w0, [x8] -; CHECK-NOLSE-O1-NEXT: and w10, w0, #0xff -; CHECK-NOLSE-O1-NEXT: cmp w10, w9, uxtb +; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xff +; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxtb ; CHECK-NOLSE-O1-NEXT: b.ne LBB47_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB47_1 Depth=1 -; CHECK-NOLSE-O1-NEXT: stxrb w10, w2, [x8] -; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB47_1 +; CHECK-NOLSE-O1-NEXT: stxrb w9, w2, [x8] +; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB47_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 +; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB47_4: ; %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: mov w1, wzr ; CHECK-NOLSE-O1-NEXT: clrex ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret @@ -2796,24 +2795,23 @@ ; CHECK-NOLSE-O1-LABEL: cmpxchg_i16: ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: mov x8, x0 -; CHECK-NOLSE-O1-NEXT: mov w9, w1 -; CHECK-NOLSE-O1-NEXT: mov w1, wzr ; CHECK-NOLSE-O1-NEXT: ; kill: def $w2 killed $w2 def $x2 ; CHECK-NOLSE-O1-NEXT: LBB48_1: ; %cmpxchg.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldxrh w0, [x8] -; CHECK-NOLSE-O1-NEXT: and w10, w0, #0xffff -; CHECK-NOLSE-O1-NEXT: cmp w10, w9, uxth +; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xffff +; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxth ; CHECK-NOLSE-O1-NEXT: b.ne LBB48_4 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB48_1 Depth=1 -; CHECK-NOLSE-O1-NEXT: stxrh w10, w2, [x8] -; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB48_1 +; CHECK-NOLSE-O1-NEXT: stxrh w9, w2, [x8] +; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB48_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 +; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB48_4: ; %cmpxchg.nostore +; CHECK-NOLSE-O1-NEXT: mov w1, wzr ; CHECK-NOLSE-O1-NEXT: clrex ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -633,6 +633,7 @@ ; CHECK-NOLSE-NEXT: renamable $w9 = LDRHHui killed renamable $x1, 0, pcsections !0 :: (load unordered (s16) from %ir.p16) ; CHECK-NOLSE-NEXT: renamable $w0 = ADDWrx killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-NOLSE-NEXT: RET undef $lr, implicit $w0 + ; ; CHECK-LDAPR-LABEL: name: load_zext ; CHECK-LDAPR: bb.0 (%ir-block.0): ; CHECK-LDAPR-NEXT: liveins: $x0, $x1 @@ -659,6 +660,7 @@ ; CHECK-NOLSE-NEXT: renamable $w0 = LDARW killed renamable $x0, pcsections !0 :: (load seq_cst (s32) from %ir.p32) ; CHECK-NOLSE-NEXT: renamable $x1 = LDARX killed renamable $x1, pcsections !0 :: (load acquire (s64) from %ir.p64) ; CHECK-NOLSE-NEXT: RET undef $lr, implicit $w0, implicit $x1 + ; ; CHECK-LDAPR-LABEL: name: load_acq ; CHECK-LDAPR: bb.0 (%ir-block.0): ; CHECK-LDAPR-NEXT: liveins: $x0, $x1 @@ -685,6 +687,7 @@ ; CHECK-NOLSE-NEXT: renamable $w9 = SBFMWri killed renamable $w9, 0, 15 ; CHECK-NOLSE-NEXT: renamable $w0 = ADDWrx killed renamable $w9, killed renamable $w8, 32, pcsections !0 ; CHECK-NOLSE-NEXT: RET undef $lr, implicit $w0 + ; ; CHECK-LDAPR-LABEL: name: load_sext ; CHECK-LDAPR: bb.0 (%ir-block.0): ; CHECK-LDAPR-NEXT: liveins: $x0, $x1 @@ -1248,25 +1251,23 @@ ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0 - ; CHECK-NEXT: $w9 = ORRWrs $wzr, $w1, 0 ; CHECK-NEXT: renamable $w2 = KILL $w2, implicit-def $x2 - ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) - ; CHECK-NEXT: liveins: $w1, $w9, $x2, $x8 + ; CHECK-NEXT: liveins: $w1, $x2, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w0, 7, pcsections !0 - ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0 + ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000) - ; CHECK-NEXT: liveins: $w1, $w9, $x0, $x2, $x8 + ; CHECK-NEXT: liveins: $w1, $x0, $x2, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr) - ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1 + ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr) + ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: liveins: $x0 @@ -1276,8 +1277,9 @@ ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.cmpxchg.nostore: - ; CHECK-NEXT: liveins: $w1, $x0 + ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 ; CHECK-NEXT: CLREX 15, pcsections !0 ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0 ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 @@ -1292,25 +1294,23 @@ ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0 - ; CHECK-NEXT: $w9 = ORRWrs $wzr, $w1, 0 ; CHECK-NEXT: renamable $w2 = KILL $w2, implicit-def $x2 - ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.cmpxchg.start: ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) - ; CHECK-NEXT: liveins: $w1, $w9, $x2, $x8 + ; CHECK-NEXT: liveins: $w1, $x2, $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w0, 15, pcsections !0 - ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w10, renamable $w9, 8, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0 + ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.cmpxchg.trystore: ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000) - ; CHECK-NEXT: liveins: $w1, $w9, $x0, $x2, $x8 + ; CHECK-NEXT: liveins: $w1, $x0, $x2, $x8 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr) - ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1 + ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr) + ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: liveins: $x0 @@ -1320,8 +1320,9 @@ ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.cmpxchg.nostore: - ; CHECK-NEXT: liveins: $w1, $x0 + ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 ; CHECK-NEXT: CLREX 15, pcsections !0 ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0 ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1 Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -10,9 +10,9 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_merge ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $w0 = COPY [[DEF]](s32) - ; CHECK: $w1 = COPY [[DEF1]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_IMPLICIT_DEF %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) @@ -30,11 +30,11 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_merge_3ops ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $w0 = COPY [[DEF]](s32) - ; CHECK: $w1 = COPY [[DEF1]](s32) - ; CHECK: $w2 = COPY [[DEF2]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $w2 = COPY [[DEF2]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_IMPLICIT_DEF %5:_(s32) = G_IMPLICIT_DEF @@ -54,9 +54,9 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_build_vector ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $w0 = COPY [[DEF]](s32) - ; CHECK: $w1 = COPY [[DEF1]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_IMPLICIT_DEF %2:_(<2 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32) @@ -74,11 +74,11 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_buildvector_3ops ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $w0 = COPY [[DEF]](s32) - ; CHECK: $w1 = COPY [[DEF1]](s32) - ; CHECK: $w2 = COPY [[DEF2]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $w2 = COPY [[DEF2]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_IMPLICIT_DEF %5:_(s32) = G_IMPLICIT_DEF @@ -98,9 +98,9 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_concat_vectors ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1 - ; CHECK: $w0 = COPY [[COPY]](<2 x s16>) - ; CHECK: $w1 = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1 + ; CHECK-NEXT: $w0 = COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: $w1 = COPY [[COPY1]](<2 x s16>) %0:_(<2 x s16>) = COPY $w0 %1:_(<2 x s16>) = COPY $w1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>) @@ -118,9 +118,9 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_bitcast_merge ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $w0 = COPY [[DEF]](s32) - ; CHECK: $w1 = COPY [[DEF1]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_IMPLICIT_DEF %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) @@ -139,13 +139,13 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK: $h0 = COPY [[UV]](s16) - ; CHECK: $h1 = COPY [[UV1]](s16) - ; CHECK: $h2 = COPY [[UV2]](s16) - ; CHECK: $h3 = COPY [[UV3]](s16) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[MV]](s64) + ; CHECK-NEXT: $h0 = COPY [[UV]](s16) + ; CHECK-NEXT: $h1 = COPY [[UV1]](s16) + ; CHECK-NEXT: $h2 = COPY [[UV2]](s16) + ; CHECK-NEXT: $h3 = COPY [[UV3]](s16) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_IMPLICIT_DEF %2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32) @@ -167,11 +167,11 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types_but_same_size ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: $w0 = COPY [[BITCAST]](s32) - ; CHECK: $w1 = COPY [[BITCAST1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $w1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: $w0 = COPY [[BITCAST]](s32) + ; CHECK-NEXT: $w1 = COPY [[BITCAST1]](s32) %0:_(<2 x s16>) = COPY $w0 %1:_(<2 x s16>) = COPY $w1 %2:_(<4 x s16>) = G_CONCAT_VECTORS %0(<2 x s16>), %1(<2 x s16>) @@ -194,37 +194,37 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_cst ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 16 - ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 15 - ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 14 - ; CHECK: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 13 - ; CHECK: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 12 - ; CHECK: [[C5:%[0-9]+]]:_(s8) = G_CONSTANT i8 11 - ; CHECK: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 10 - ; CHECK: [[C7:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 - ; CHECK: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 - ; CHECK: [[C9:%[0-9]+]]:_(s8) = G_CONSTANT i8 7 - ; CHECK: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6 - ; CHECK: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 - ; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 - ; CHECK: [[C13:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 - ; CHECK: [[C14:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 - ; CHECK: [[C15:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK: $b0 = COPY [[C]](s8) - ; CHECK: $b1 = COPY [[C1]](s8) - ; CHECK: $b2 = COPY [[C2]](s8) - ; CHECK: $b3 = COPY [[C3]](s8) - ; CHECK: $b4 = COPY [[C4]](s8) - ; CHECK: $b5 = COPY [[C5]](s8) - ; CHECK: $b6 = COPY [[C6]](s8) - ; CHECK: $b7 = COPY [[C7]](s8) - ; CHECK: $b8 = COPY [[C8]](s8) - ; CHECK: $b9 = COPY [[C9]](s8) - ; CHECK: $b10 = COPY [[C10]](s8) - ; CHECK: $b11 = COPY [[C11]](s8) - ; CHECK: $b12 = COPY [[C12]](s8) - ; CHECK: $b13 = COPY [[C13]](s8) - ; CHECK: $b14 = COPY [[C14]](s8) - ; CHECK: $b15 = COPY [[C15]](s8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 15 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 14 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 13 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s8) = G_CONSTANT i8 12 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s8) = G_CONSTANT i8 11 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s8) = G_CONSTANT i8 10 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s8) = G_CONSTANT i8 8 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s8) = G_CONSTANT i8 7 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 6 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK-NEXT: $b0 = COPY [[C]](s8) + ; CHECK-NEXT: $b1 = COPY [[C1]](s8) + ; CHECK-NEXT: $b2 = COPY [[C2]](s8) + ; CHECK-NEXT: $b3 = COPY [[C3]](s8) + ; CHECK-NEXT: $b4 = COPY [[C4]](s8) + ; CHECK-NEXT: $b5 = COPY [[C5]](s8) + ; CHECK-NEXT: $b6 = COPY [[C6]](s8) + ; CHECK-NEXT: $b7 = COPY [[C7]](s8) + ; CHECK-NEXT: $b8 = COPY [[C8]](s8) + ; CHECK-NEXT: $b9 = COPY [[C9]](s8) + ; CHECK-NEXT: $b10 = COPY [[C10]](s8) + ; CHECK-NEXT: $b11 = COPY [[C11]](s8) + ; CHECK-NEXT: $b12 = COPY [[C12]](s8) + ; CHECK-NEXT: $b13 = COPY [[C13]](s8) + ; CHECK-NEXT: $b14 = COPY [[C14]](s8) + ; CHECK-NEXT: $b15 = COPY [[C15]](s8) %0:_(s128) = G_CONSTANT i128 1339673755198158349044581307228491536 %1:_(s8),%2:_(s8),%3:_(s8),%4:_(s8),%5:_(s8),%6:_(s8),%7:_(s8),%8:_(s8),%9:_(s8),%10:_(s8),%11:_(s8),%12:_(s8),%13:_(s8),%14:_(s8),%15:_(s8),%16:_(s8) = G_UNMERGE_VALUES %0(s128) $b0 = COPY %1(s8) @@ -252,15 +252,12 @@ body: | bb.1: ; CHECK-LABEL: name: test_combine_unmerge_cst_36bit - ; CHECK: [[C:%[0-9]+]]:_(s13) = G_CONSTANT i13 1 - ; CHECK: [[C1:%[0-9]+]]:_(s13) = G_CONSTANT i13 2 - ; CHECK: [[C2:%[0-9]+]]:_(s13) = G_CONSTANT i13 3 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[C]](s13) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s16) = G_ZEXT [[C1]](s13) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s16) = G_ZEXT [[C2]](s13) - ; CHECK: $h0 = COPY [[ZEXT]](s16) - ; CHECK: $h1 = COPY [[ZEXT1]](s16) - ; CHECK: $h2 = COPY [[ZEXT2]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 + ; CHECK-NEXT: $h0 = COPY [[C]](s16) + ; CHECK-NEXT: $h1 = COPY [[C1]](s16) + ; CHECK-NEXT: $h2 = COPY [[C2]](s16) %0:_(s39) = G_CONSTANT i39 201342977 %1:_(s13),%2:_(s13),%3:_(s13) = G_UNMERGE_VALUES %0(s39) %4:_(s16) = G_ZEXT %1(s13) @@ -278,13 +275,13 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_fpcst ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: $h0 = COPY [[C]](s16) - ; CHECK: $h1 = COPY [[C1]](s16) - ; CHECK: $h2 = COPY [[C2]](s16) - ; CHECK: $h3 = COPY [[C3]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: $h0 = COPY [[C]](s16) + ; CHECK-NEXT: $h1 = COPY [[C1]](s16) + ; CHECK-NEXT: $h2 = COPY [[C2]](s16) + ; CHECK-NEXT: $h3 = COPY [[C3]](s16) %0:_(s64) = G_FCONSTANT double 0x0004000300020001 %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64) $h0 = COPY %1(s16) @@ -300,8 +297,8 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) - ; CHECK: $h0 = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16) %0:_(s64) = COPY $x0 %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64) $h0 = COPY %1(s16) @@ -314,8 +311,8 @@ bb.1: ; CHECK-LABEL: name: test_dont_combine_unmerge_dead_to_trunc ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: $h0 = COPY [[UV2]](s16) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: $h0 = COPY [[UV2]](s16) %0:_(s64) = COPY $x0 %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64) $h0 = COPY %3(s16) @@ -329,10 +326,10 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in_n_out ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32) - ; CHECK: $w0 = COPY [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32) + ; CHECK-NEXT: $w0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<4 x s16>) = COPY $x0 %1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>) $w0 = COPY %1(<2 x s16>) @@ -346,9 +343,9 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64) - ; CHECK: $h0 = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64) + ; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16) %0:_(<2 x s32>) = COPY $x0 %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(<2 x s32>) $h0 = COPY %1(s16) @@ -364,9 +361,9 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_zext_to_zext_same_size ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: $w0 = COPY [[COPY]](s32) - ; CHECK: $w1 = COPY [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: $w1 = COPY [[C]](s32) %0:_(s32) = COPY $w0 %3:_(s64) = G_ZEXT %0(s32) %1:_(s32),%2:_(s32) = G_UNMERGE_VALUES %3(s64) @@ -383,12 +380,12 @@ bb.1: ; CHECK-LABEL: name: test_combine_unmerge_zext_to_zext ; CHECK: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; CHECK: $h0 = COPY [[ZEXT]](s16) - ; CHECK: $h1 = COPY [[C]](s16) - ; CHECK: $h2 = COPY [[C]](s16) - ; CHECK: $h3 = COPY [[C]](s16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[COPY]](s8) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK-NEXT: $h0 = COPY [[ZEXT]](s16) + ; CHECK-NEXT: $h1 = COPY [[C]](s16) + ; CHECK-NEXT: $h2 = COPY [[C]](s16) + ; CHECK-NEXT: $h3 = COPY [[C]](s16) %0:_(s8) = COPY $b0 %3:_(s64) = G_ZEXT %0(s8) %1:_(s16),%2:_(s16),%4:_(s16),%5:_(s16) = G_UNMERGE_VALUES %3(s64) @@ -407,12 +404,12 @@ bb.1: ; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_src_bigger ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK: $h0 = COPY [[UV]](s16) - ; CHECK: $h1 = COPY [[UV1]](s16) - ; CHECK: $h2 = COPY [[UV2]](s16) - ; CHECK: $h3 = COPY [[UV3]](s16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK-NEXT: $h0 = COPY [[UV]](s16) + ; CHECK-NEXT: $h1 = COPY [[UV1]](s16) + ; CHECK-NEXT: $h2 = COPY [[UV2]](s16) + ; CHECK-NEXT: $h3 = COPY [[UV3]](s16) %0:_(s32) = COPY $w0 %3:_(s64) = G_ZEXT %0(s32) %1:_(s16),%2:_(s16),%4:_(s16),%5:_(s16) = G_UNMERGE_VALUES %3(s64) @@ -430,10 +427,10 @@ bb.1: ; CHECK-LABEL: name: test_dont_combine_unmerge_zext_to_zext_src_vector ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $w0 - ; CHECK: [[ZEXT:%[0-9]+]]:_(<2 x s32>) = G_ZEXT [[COPY]](<2 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](<2 x s32>) - ; CHECK: $w0 = COPY [[UV]](s32) - ; CHECK: $w1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<2 x s32>) = G_ZEXT [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](<2 x s32>) + ; CHECK-NEXT: $w0 = COPY [[UV]](s32) + ; CHECK-NEXT: $w1 = COPY [[UV1]](s32) %0:_(<2 x s16>) = COPY $w0 %3:_(<2 x s32>) = G_ZEXT %0(<2 x s16>) %1:_(s32),%2:_(s32) = G_UNMERGE_VALUES %3(<2 x s32>) Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir @@ -8,14 +8,16 @@ body: | bb.1: ; CHECK-LABEL: name: test - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) - ; CHECK-NEXT: [[UDIVREM:%[0-9]+]]:_(s32), [[UDIVREM1:%[0-9]+]]:_ = G_UDIVREM [[SEXT]], [[SEXT1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UDIVREM1]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UDIVREM]](s32) - ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT2]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483647 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[C]], [[C1]] + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UMULH]], [[C2]](s32) + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[C]], [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UREM]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LSHR]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[OR]](s64) ; CHECK-NEXT: $w0 = COPY [[TRUNC1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -47,14 +49,12 @@ body: | bb.1: ; CHECK-LABEL: name: test_inverted_div_rem - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) - ; CHECK-NEXT: [[UDIVREM:%[0-9]+]]:_(s32), [[UDIVREM1:%[0-9]+]]:_ = G_UDIVREM [[SEXT]], [[SEXT1]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[UDIVREM:%[0-9]+]]:_(s32), [[UDIVREM1:%[0-9]+]]:_ = G_UDIVREM [[C]], [[C]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UDIVREM]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UDIVREM1]](s32) - ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT2]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT]] ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[OR]](s64) ; CHECK-NEXT: $w0 = COPY [[TRUNC1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir @@ -17,21 +17,16 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_BRCOND [[DEF]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C2]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C1]] ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[FREEZE]], [[C]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C1]](s64) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SHL]], [[UDIV]] - ; CHECK-NEXT: G_STORE [[ADD]](s64), [[COPY]](p0) :: (store (s64)) + ; CHECK-NEXT: G_STORE [[UDIV]](s64), [[COPY]](p0) :: (store (s64)) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.1: Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-binop-same-val.mir @@ -30,9 +30,10 @@ ; CHECK-LABEL: name: and_same ; CHECK: liveins: $x0 - ; CHECK: %copy:_(s64) = COPY $x0 - ; CHECK: $x0 = COPY %copy(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-NEXT: $x0 = COPY %copy(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy:_(s64) = COPY $x0 %and:_(s64) = G_AND %copy, %copy $x0 = COPY %and(s64) @@ -50,11 +51,12 @@ ; CHECK-LABEL: name: and_same2 ; CHECK: liveins: $x0, $x1 - ; CHECK: %copy1:_(s64) = COPY $x0 - ; CHECK: %copy2:_(s64) = COPY $x1 - ; CHECK: %or:_(s64) = G_OR %copy1, %copy2 - ; CHECK: $x0 = COPY %or(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy1:_(s64) = COPY $x0 + ; CHECK-NEXT: %copy2:_(s64) = COPY $x1 + ; CHECK-NEXT: %or:_(s64) = G_OR %copy1, %copy2 + ; CHECK-NEXT: $x0 = COPY %or(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy1:_(s64) = COPY $x0 %copy2:_(s64) = COPY $x1 %or:_(s64) = G_OR %copy1, %copy2 @@ -76,14 +78,15 @@ ; CHECK-LABEL: name: or_and_not_same ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: %copy1:_(s64) = COPY $x0 - ; CHECK: %copy2:_(s64) = COPY $x1 - ; CHECK: %copy3:_(s64) = COPY $x2 - ; CHECK: %or1:_(s64) = G_OR %copy1, %copy2 - ; CHECK: %or2:_(s64) = G_OR %copy1, %copy3 - ; CHECK: %and:_(s64) = G_AND %or1, %or2 - ; CHECK: $x0 = COPY %and(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy1:_(s64) = COPY $x0 + ; CHECK-NEXT: %copy2:_(s64) = COPY $x1 + ; CHECK-NEXT: %copy3:_(s64) = COPY $x2 + ; CHECK-NEXT: %or1:_(s64) = G_OR %copy1, %copy2 + ; CHECK-NEXT: %or2:_(s64) = G_OR %copy1, %copy3 + ; CHECK-NEXT: %and:_(s64) = G_AND %or1, %or2 + ; CHECK-NEXT: $x0 = COPY %and(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy1:_(s64) = COPY $x0 %copy2:_(s64) = COPY $x1 %copy3:_(s64) = COPY $x2 Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir @@ -38,30 +38,36 @@ body: | ; CHECK-LABEL: name: foo ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]] - ; CHECK: G_BRCOND [[XOR]](s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1.if.then: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY1]], [[COPY]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = nsw G_ADD [[ADD]], [[COPY1]] - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2.if.end: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[COPY1]], [[COPY1]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[MUL]], [[C1]] - ; CHECK: bb.3.return: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.1, [[ADD2]](s32), %bb.2 - ; CHECK: $w0 = COPY [[PHI]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]] + ; CHECK-NEXT: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY1]], [[COPY]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = nsw G_ADD [[ADD]], [[COPY1]] + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.end: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[COPY1]], [[COPY1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[MUL]], [[C1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.return: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.1, [[ADD2]](s32), %bb.2 + ; CHECK-NEXT: $w0 = COPY [[PHI]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.1.entry: liveins: $w0, $w1 @@ -94,18 +100,19 @@ body: | ; CHECK-LABEL: name: dont_combine_same_block ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: $w0, $w1 - ; CHECK: %cond:_(s1) = G_IMPLICIT_DEF - ; CHECK: G_BRCOND %cond(s1), %bb.1 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cond:_(s1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BRCOND %cond(s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: RET_ReallyLR bb.0: liveins: $w0, $w1 %cond:_(s1) = G_IMPLICIT_DEF - ; The G_BRCOND and G_BR have the same target here. Don't change anything. G_BRCOND %cond(s1), %bb.1 G_BR %bb.1 bb.1: Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir @@ -22,17 +22,20 @@ ; DARWIN-LABEL: name: bzero_unknown_width ; DARWIN: liveins: $x0, $x1 - ; DARWIN: %ptr:_(p0) = COPY $x0 - ; DARWIN: %width:_(s64) = COPY $x1 - ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) - ; DARWIN: RET_ReallyLR + ; DARWIN-NEXT: {{ $}} + ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0 + ; DARWIN-NEXT: %width:_(s64) = COPY $x1 + ; DARWIN-NEXT: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) + ; DARWIN-NEXT: RET_ReallyLR + ; ; UNKNOWN-LABEL: name: bzero_unknown_width ; UNKNOWN: liveins: $x0, $x1 - ; UNKNOWN: %ptr:_(p0) = COPY $x0 - ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 - ; UNKNOWN: %width:_(s64) = COPY $x1 - ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) - ; UNKNOWN: RET_ReallyLR + ; UNKNOWN-NEXT: {{ $}} + ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0 + ; UNKNOWN-NEXT: %zero:_(s8) = G_CONSTANT i8 0 + ; UNKNOWN-NEXT: %width:_(s64) = COPY $x1 + ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) + ; UNKNOWN-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 %zero:_(s8) = G_CONSTANT i8 0 %width:_(s64) = COPY $x1 @@ -47,17 +50,20 @@ liveins: $x0, $x1 ; DARWIN-LABEL: name: bzero_tail_unknown_width ; DARWIN: liveins: $x0, $x1 - ; DARWIN: %ptr:_(p0) = COPY $x0 - ; DARWIN: %width:_(s64) = COPY $x1 - ; DARWIN: G_BZERO %ptr(p0), %width(s64), 1 :: (store (s32)) - ; DARWIN: RET_ReallyLR + ; DARWIN-NEXT: {{ $}} + ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0 + ; DARWIN-NEXT: %width:_(s64) = COPY $x1 + ; DARWIN-NEXT: G_BZERO %ptr(p0), %width(s64), 1 :: (store (s32)) + ; DARWIN-NEXT: RET_ReallyLR + ; ; UNKNOWN-LABEL: name: bzero_tail_unknown_width ; UNKNOWN: liveins: $x0, $x1 - ; UNKNOWN: %ptr:_(p0) = COPY $x0 - ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 - ; UNKNOWN: %width:_(s64) = COPY $x1 - ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store (s32)) - ; UNKNOWN: RET_ReallyLR + ; UNKNOWN-NEXT: {{ $}} + ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0 + ; UNKNOWN-NEXT: %zero:_(s8) = G_CONSTANT i8 0 + ; UNKNOWN-NEXT: %width:_(s64) = COPY $x1 + ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store (s32)) + ; UNKNOWN-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 %zero:_(s8) = G_CONSTANT i8 0 %width:_(s64) = COPY $x1 @@ -74,17 +80,20 @@ ; DARWIN-LABEL: name: bzero_constant_width ; DARWIN: liveins: $x0, $x1 - ; DARWIN: %ptr:_(p0) = COPY $x0 - ; DARWIN: %width:_(s64) = G_CONSTANT i64 1024 - ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) - ; DARWIN: RET_ReallyLR + ; DARWIN-NEXT: {{ $}} + ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0 + ; DARWIN-NEXT: %width:_(s64) = G_CONSTANT i64 1024 + ; DARWIN-NEXT: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) + ; DARWIN-NEXT: RET_ReallyLR + ; ; UNKNOWN-LABEL: name: bzero_constant_width ; UNKNOWN: liveins: $x0, $x1 - ; UNKNOWN: %ptr:_(p0) = COPY $x0 - ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 - ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 1024 - ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) - ; UNKNOWN: RET_ReallyLR + ; UNKNOWN-NEXT: {{ $}} + ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0 + ; UNKNOWN-NEXT: %zero:_(s8) = G_CONSTANT i8 0 + ; UNKNOWN-NEXT: %width:_(s64) = G_CONSTANT i64 1024 + ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) + ; UNKNOWN-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 %zero:_(s8) = G_CONSTANT i8 0 %width:_(s64) = G_CONSTANT i64 1024 @@ -101,17 +110,20 @@ ; DARWIN-LABEL: name: bzero_constant_width_minsize ; DARWIN: liveins: $x0, $x1 - ; DARWIN: %ptr:_(p0) = COPY $x0 - ; DARWIN: %width:_(s64) = G_CONSTANT i64 256 - ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) - ; DARWIN: RET_ReallyLR + ; DARWIN-NEXT: {{ $}} + ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0 + ; DARWIN-NEXT: %width:_(s64) = G_CONSTANT i64 256 + ; DARWIN-NEXT: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) + ; DARWIN-NEXT: RET_ReallyLR + ; ; UNKNOWN-LABEL: name: bzero_constant_width_minsize ; UNKNOWN: liveins: $x0, $x1 - ; UNKNOWN: %ptr:_(p0) = COPY $x0 - ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 - ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256 - ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) - ; UNKNOWN: RET_ReallyLR + ; UNKNOWN-NEXT: {{ $}} + ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0 + ; UNKNOWN-NEXT: %zero:_(s8) = G_CONSTANT i8 0 + ; UNKNOWN-NEXT: %width:_(s64) = G_CONSTANT i64 256 + ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) + ; UNKNOWN-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 %zero:_(s8) = G_CONSTANT i8 0 %width:_(s64) = G_CONSTANT i64 256 @@ -128,18 +140,21 @@ ; DARWIN-LABEL: name: not_zero ; DARWIN: liveins: $x0, $x1 - ; DARWIN: %ptr:_(p0) = COPY $x0 - ; DARWIN: %not_zero:_(s8) = G_CONSTANT i8 1 - ; DARWIN: %width:_(s64) = G_CONSTANT i64 256 - ; DARWIN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32)) - ; DARWIN: RET_ReallyLR + ; DARWIN-NEXT: {{ $}} + ; DARWIN-NEXT: %ptr:_(p0) = COPY $x0 + ; DARWIN-NEXT: %not_zero:_(s8) = G_CONSTANT i8 1 + ; DARWIN-NEXT: %width:_(s64) = G_CONSTANT i64 256 + ; DARWIN-NEXT: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32)) + ; DARWIN-NEXT: RET_ReallyLR + ; ; UNKNOWN-LABEL: name: not_zero ; UNKNOWN: liveins: $x0, $x1 - ; UNKNOWN: %ptr:_(p0) = COPY $x0 - ; UNKNOWN: %not_zero:_(s8) = G_CONSTANT i8 1 - ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256 - ; UNKNOWN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32)) - ; UNKNOWN: RET_ReallyLR + ; UNKNOWN-NEXT: {{ $}} + ; UNKNOWN-NEXT: %ptr:_(p0) = COPY $x0 + ; UNKNOWN-NEXT: %not_zero:_(s8) = G_CONSTANT i8 1 + ; UNKNOWN-NEXT: %width:_(s64) = G_CONSTANT i64 256 + ; UNKNOWN-NEXT: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32)) + ; UNKNOWN-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 %not_zero:_(s8) = G_CONSTANT i8 1 %width:_(s64) = G_CONSTANT i64 256 Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-concat-vectors.mir @@ -12,12 +12,13 @@ ; CHECK-LABEL: name: concat_to_build_vector ; CHECK: liveins: $x0, $x1, $x2, $x3 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64) - ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %2:_(s64) = COPY $x2 @@ -37,12 +38,13 @@ ; CHECK-LABEL: name: concat_to_build_vector_ptr ; CHECK: liveins: $x0, $x1, $x2, $x3 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 - ; CHECK: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY2]](p0), [[COPY3]](p0) - ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[COPY2]](p0), [[COPY3]](p0) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>) %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(p0) = COPY $x2 @@ -60,7 +62,7 @@ bb.1: ; CHECK-LABEL: name: concat_to_undef ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CHECK: RET_ReallyLR implicit [[DEF]](<4 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit [[DEF]](<4 x s64>) %4:_(<2 x s64>) = G_IMPLICIT_DEF %5:_(<2 x s64>) = G_IMPLICIT_DEF %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5 @@ -78,11 +80,12 @@ ; CHECK-LABEL: name: concat_to_build_vector_with_undef ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[DEF]](s64), [[DEF]](s64) - ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[DEF]](s64), [[DEF]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) %0:_(s64) = COPY $x0 %1:_(s64) = COPY $x1 %4:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1 @@ -100,11 +103,12 @@ ; CHECK-LABEL: name: concat_to_build_vector_with_undef_ptr ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[DEF]](p0), [[DEF]](p0) - ; CHECK: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x p0>) = G_BUILD_VECTOR [[COPY]](p0), [[COPY1]](p0), [[DEF]](p0), [[DEF]](p0) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>) %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %4:_(<2 x p0>) = G_BUILD_VECTOR %0(p0), %1 @@ -123,10 +127,11 @@ ; CHECK-LABEL: name: concat_to_build_vector_negative_test ; CHECK: liveins: $q0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[DEF]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s64>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[DEF]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s64>) %4:_(<2 x s64>) = COPY $q0 %5:_(<2 x s64>) = G_IMPLICIT_DEF %6:_(<4 x s64>) = G_CONCAT_VECTORS %4(<2 x s64>), %5 Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-copy-prop-disabled.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-copy-prop-disabled.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-copy-prop-disabled.mir @@ -29,13 +29,18 @@ bb.0.entry: liveins: $x0 ; ENABLED-LABEL: name: test_copy - ; ENABLED: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; ENABLED: $x0 = COPY [[COPY]](p0) + ; ENABLED: liveins: $x0 + ; ENABLED-NEXT: {{ $}} + ; ENABLED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; ENABLED-NEXT: $x0 = COPY [[COPY]](p0) + ; ; DISABLED-LABEL: name: test_copy - ; DISABLED: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; DISABLED: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) - ; DISABLED: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY1]](p0) - ; DISABLED: $x0 = COPY [[COPY2]](p0) + ; DISABLED: liveins: $x0 + ; DISABLED-NEXT: {{ $}} + ; DISABLED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; DISABLED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) + ; DISABLED-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY1]](p0) + ; DISABLED-NEXT: $x0 = COPY [[COPY2]](p0) %0:_(p0) = COPY $x0 %1:_(p0) = COPY %0 %2:_(p0) = COPY %1 Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir @@ -8,26 +8,30 @@ body: | ; CHECK-LABEL: name: sext_icst_through_phi ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: %one:_(s32) = G_CONSTANT i32 2 - ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one - ; CHECK: G_BRCOND %cmp(s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32) - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32) - ; CHECK: bb.3: - ; CHECK: %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2 - ; CHECK: $x0 = COPY %ext(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: %ext:_(s64) = G_PHI [[C]](s64), %bb.1, [[C1]](s64), %bb.2 + ; CHECK-NEXT: $x0 = COPY %ext(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 bb.1.entry: liveins: $w0, $w1 @@ -61,26 +65,30 @@ body: | ; CHECK-LABEL: name: zext_icst_through_phi ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: %one:_(s32) = G_CONSTANT i32 2 - ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one - ; CHECK: G_BRCOND %cmp(s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %cst32_4(s32) - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %cst32_10(s32) - ; CHECK: bb.3: - ; CHECK: %ext:_(s64) = G_PHI [[ZEXT]](s64), %bb.1, [[ZEXT1]](s64), %bb.2 - ; CHECK: $x0 = COPY %ext(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: %ext:_(s64) = G_PHI [[C]](s64), %bb.1, [[C1]](s64), %bb.2 + ; CHECK-NEXT: $x0 = COPY %ext(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 bb.1.entry: liveins: $w0, $w1 @@ -114,24 +122,30 @@ body: | ; CHECK-LABEL: name: sext_load_through_phi_vector ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $x0, $q0, $q1 - ; CHECK: %ptr:_(p0) = COPY $x0 - ; CHECK: %cmp:_(s1) = G_IMPLICIT_DEF - ; CHECK: G_BRCOND %cmp(s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>)) - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>)) - ; CHECK: bb.3: - ; CHECK: %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.1, %ld2(<4 x s32>), %bb.2 - ; CHECK: %ext:_(<4 x s64>) = G_SEXT %phi(<4 x s32>) - ; CHECK: G_STORE %ext(<4 x s64>), %ptr(p0) :: (store (<4 x s64>)) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x0, $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 + ; CHECK-NEXT: %cmp:_(s1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>)) + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>)) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.1, %ld2(<4 x s32>), %bb.2 + ; CHECK-NEXT: %ext:_(<4 x s64>) = G_SEXT %phi(<4 x s32>) + ; CHECK-NEXT: G_STORE %ext(<4 x s64>), %ptr(p0) :: (store (<4 x s64>)) + ; CHECK-NEXT: RET_ReallyLR bb.1.entry: liveins: $x0, $q0, $q1 @@ -166,27 +180,33 @@ body: | ; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: %base:_(p0) = COPY $x2 - ; CHECK: %one:_(s32) = G_CONSTANT i32 2 - ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one - ; CHECK: G_BRCOND %cmp(s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 - ; CHECK: bb.3: - ; CHECK: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2 - ; CHECK: %ext:_(s64) = G_SEXT %phi(s32) - ; CHECK: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64) - ; CHECK: $x0 = COPY %ptr(p0) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %base:_(p0) = COPY $x2 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst32_4:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2 + ; CHECK-NEXT: %ext:_(s64) = G_SEXT %phi(s32) + ; CHECK-NEXT: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64) + ; CHECK-NEXT: $x0 = COPY %ptr(p0) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 bb.1.entry: liveins: $w0, $w1, $x2 @@ -223,29 +243,33 @@ body: | ; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd_multiuse ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: %base:_(p0) = COPY $x2 - ; CHECK: %one:_(s32) = G_CONSTANT i32 2 - ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one - ; CHECK: G_BRCOND %cmp(s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32) - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32) - ; CHECK: bb.3: - ; CHECK: %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2 - ; CHECK: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64) - ; CHECK: $x0 = COPY %ptr(p0) - ; CHECK: $x1 = COPY %ext(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %base:_(p0) = COPY $x2 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: %ext:_(s64) = G_PHI [[C]](s64), %bb.1, [[C1]](s64), %bb.2 + ; CHECK-NEXT: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64) + ; CHECK-NEXT: $x0 = COPY %ptr(p0) + ; CHECK-NEXT: $x1 = COPY %ext(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 bb.1.entry: liveins: $w0, $w1, $x2 @@ -283,31 +307,39 @@ body: | ; CHECK-LABEL: name: zext_icst_through_phi_too_many_incoming ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: %one:_(s32) = G_CONSTANT i32 2 - ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one - ; CHECK: G_BRCOND %cmp(s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) - ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4 - ; CHECK: %cond:_(s1) = G_IMPLICIT_DEF - ; CHECK: G_BRCOND %cond(s1), %bb.3 - ; CHECK: G_BR %bb.4 - ; CHECK: bb.2: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 - ; CHECK: G_BR %bb.4 - ; CHECK: bb.3: - ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: %cst32_42:_(s32) = G_CONSTANT i32 42 - ; CHECK: bb.4: - ; CHECK: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2, %cst32_42(s32), %bb.3 - ; CHECK: %ext:_(s64) = G_ZEXT %phi(s32) - ; CHECK: $x0 = COPY %ext(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst32_4:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: %cond:_(s1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BRCOND %cond(s1), %bb.3 + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst32_42:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2, %cst32_42(s32), %bb.3 + ; CHECK-NEXT: %ext:_(s64) = G_ZEXT %phi(s32) + ; CHECK-NEXT: $x0 = COPY %ext(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 bb.1.entry: liveins: $w0, $w1 @@ -347,26 +379,32 @@ body: | ; CHECK-LABEL: name: sext_add_through_phi ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: %one:_(s32) = G_CONSTANT i32 2 - ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one - ; CHECK: G_BRCOND %cmp(s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %add:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 - ; CHECK: bb.3: - ; CHECK: %phi:_(s32) = G_PHI %add(s32), %bb.1, %cst32_10(s32), %bb.2 - ; CHECK: %ext:_(s64) = G_SEXT %phi(s32) - ; CHECK: $x0 = COPY %ext(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %add:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst32_10:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: %phi:_(s32) = G_PHI %add(s32), %bb.1, %cst32_10(s32), %bb.2 + ; CHECK-NEXT: %ext:_(s64) = G_SEXT %phi(s32) + ; CHECK-NEXT: $x0 = COPY %ext(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 bb.1.entry: liveins: $w0, $w1 @@ -400,27 +438,32 @@ body: | ; CHECK-LABEL: name: anyext_add_through_phi ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: %one:_(s32) = G_CONSTANT i32 2 - ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one - ; CHECK: G_BRCOND %cmp(s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %add:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %add(s32) - ; CHECK: G_BR %bb.3 - ; CHECK: bb.2: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %cst32_10(s32) - ; CHECK: bb.3: - ; CHECK: %ext:_(s64) = G_PHI [[ANYEXT]](s64), %bb.1, [[ANYEXT1]](s64), %bb.2 - ; CHECK: $x0 = COPY %ext(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one + ; CHECK-NEXT: G_BRCOND %cmp(s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %add:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %add(s32) + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: %ext:_(s64) = G_PHI [[ANYEXT]](s64), %bb.1, [[C]](s64), %bb.2 + ; CHECK-NEXT: $x0 = COPY %ext(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 bb.1.entry: liveins: $w0, $w1 Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ptradd-chain.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ptradd-chain.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ptradd-chain.mir @@ -118,8 +118,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[COPY1]](s64) ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C]](s64) ; CHECK-NEXT: %ld:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64)) ; CHECK-NEXT: $x0 = COPY %ld(s64) Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir @@ -310,8 +310,7 @@ ; CHECK-LABEL: name: look_through_zext ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %zero:_(s8) = G_CONSTANT i8 0 - ; CHECK-NEXT: %zext_zero:_(s64) = G_ZEXT %zero(s8) + ; CHECK-NEXT: %zext_zero:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: $x0 = COPY %zext_zero(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %zero:_(s8) = G_CONSTANT i8 0 @@ -384,7 +383,9 @@ bb.1: liveins: $q0 ; CHECK-LABEL: name: lshr_of_vec_zero - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 ; CHECK-NEXT: $q0 = COPY [[COPY]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 @@ -400,7 +401,9 @@ bb.1: liveins: $q0 ; CHECK-LABEL: name: ptradd_of_vec_zero - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 ; CHECK-NEXT: $q0 = COPY [[COPY]](<2 x p0>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x p0>) = COPY $q0 @@ -419,7 +422,9 @@ liveins: $x0 ; CHECK-LABEL: name: i128_or_cst - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128)) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 9223372036854775808 ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s128) = G_OR [[LOAD]], [[C]] Index: llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir @@ -310,7 +310,7 @@ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] - ; CHECK-NEXT: DBG_VALUE {{%[0-9]+}}:_(s16) + ; CHECK-NEXT: DBG_VALUE %6:_(s16) ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} @@ -438,23 +438,22 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 16 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 10 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s16), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16) ; CHECK-NEXT: G_BRCOND [[UADDO1]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[ANYEXT]](s32), %bb.2, [[ANYEXT1]](s32), %bb.0 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.2, [[ANYEXT]](s32), %bb.0 ; CHECK-NEXT: $w0 = COPY [[PHI]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: G_BR %bb.1 bb.1: liveins: $w0, $w1 Index: llvm/test/CodeGen/AArch64/bool-ext-inc.ll =================================================================== --- llvm/test/CodeGen/AArch64/bool-ext-inc.ll +++ llvm/test/CodeGen/AArch64/bool-ext-inc.ll @@ -107,7 +107,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w0, #-1 +; CHECK-NEXT: mov w0, #-1 // =0xffffffff ; CHECK-NEXT: bl callee_signext_i1 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -117,8 +117,7 @@ ; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISEL-NEXT: .cfi_def_cfa_offset 16 ; GISEL-NEXT: .cfi_offset w30, -16 -; GISEL-NEXT: mov w8, #1 -; GISEL-NEXT: sbfx w0, w8, #0, #1 +; GISEL-NEXT: mov w0, #-1 // =0xffffffff ; GISEL-NEXT: bl callee_signext_i1 ; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISEL-NEXT: ret Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir @@ -82,11 +82,9 @@ ; CHECK-LABEL: name: test_lshr_i44 ; CHECK: liveins: $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s44) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $sgpr0 = COPY [[C]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[C]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -148,11 +146,9 @@ ; CHECK-LABEL: name: test_shl_i44 ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s44) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[C]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 @@ -222,10 +218,10 @@ ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s44) = G_SSHLSAT [[TRUNC]], [[C]](s44) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s44) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %1:_(s32) = COPY $sgpr0 %2:_(s32) = COPY $sgpr1 @@ -261,10 +257,10 @@ ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s55) = G_SSHLSAT [[TRUNC]], [[C]](s55) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s55) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32) - ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %1:_(s32) = COPY $vgpr0 %2:_(s32) = COPY $vgpr1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir @@ -19,6 +19,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: shl_s64_by_2_from_anyext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -51,6 +52,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: shl_s64_by_2_from_sext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -83,6 +85,7 @@ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SHL]](s64) + ; ; GFX9-LABEL: name: shl_s64_by_2_from_zext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -116,6 +119,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_anyext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -153,6 +157,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -190,6 +195,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_sext_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -227,6 +233,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL %masked, [[C]](s32) ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s32_lookthrough_amount ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -267,6 +274,7 @@ ; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s32) ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; ; GFX9-LABEL: name: narrow_shl_s32_by_2_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -307,6 +315,7 @@ ; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 ; GFX6-NEXT: %shl:_(s64) = G_SHL %extend, %shiftamt(s32) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -339,18 +348,13 @@ ; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0 - ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) - ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 - ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX6-NEXT: %shl:_(s32) = G_CONSTANT i32 0 ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; ; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) - ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 - ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX9-NEXT: %shl:_(s32) = G_CONSTANT i32 0 ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) %zero:_(s16) = G_CONSTANT i16 0 %extend:_(s32) = G_ZEXT %zero:_(s16) @@ -377,6 +381,7 @@ ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>) ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + ; ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -413,6 +418,7 @@ ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir @@ -20,6 +20,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16) ; GFX6-NEXT: %shl:_(s32) = G_ZEXT [[SHL]](s16) ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; ; GFX9-LABEL: name: narrow_shl_s32_by_2_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -59,6 +60,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL %masked, [[C]](s16) ; GFX6-NEXT: %shl:_(s64) = G_ZEXT [[SHL]](s16) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(s64) + ; ; GFX9-LABEL: name: narrow_shl_s64_by_2_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -98,6 +100,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s8) = G_SHL %masked, [[C]](s8) ; GFX6-NEXT: %result:_(s32) = G_ZEXT [[SHL]](s8) ; GFX6-NEXT: $vgpr0 = COPY %result(s32) + ; ; GFX9-LABEL: name: narrow_shl_s16_by_2_from_zext_s8 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -139,6 +142,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x s16>) ; GFX6-NEXT: %shl:_(<2 x s32>) = G_ZEXT [[SHL]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + ; ; GFX9-LABEL: name: narrow_shl_v2s32_by_2_from_zext_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -181,6 +185,7 @@ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL %masked, %shiftamtvec(<2 x s32>) ; GFX6-NEXT: %shl:_(<2 x s64>) = G_ZEXT [[SHL]](<2 x s32>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x s64>) + ; ; GFX9-LABEL: name: narrow_shl_v2s64_by_2_from_anyext_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -215,19 +220,14 @@ ; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 ; GFX6: liveins: $vgpr0 ; GFX6-NEXT: {{ $}} - ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0 - ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) - ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 - ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX6-NEXT: %extend:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: $vgpr0 = COPY %extend(s32) + ; ; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0 - ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) - ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 - ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) - ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX9-NEXT: %extend:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: $vgpr0 = COPY %extend(s32) %zero:_(s16) = G_CONSTANT i16 0 %extend:_(s32) = G_ZEXT %zero:_(s16) %shiftamt:_(s16) = G_CONSTANT i16 16 @@ -253,6 +253,7 @@ ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>) ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + ; ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} @@ -289,6 +290,7 @@ ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ashr.mir @@ -26,8 +26,7 @@ body: | bb.0: ; CHECK-LABEL: name: constant_fold_ashr_s16_s16 - ; CHECK: %shift:_(s16) = G_CONSTANT i16 -772 - ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %shift(s16) + ; CHECK: %ext:_(s32) = G_CONSTANT i32 64764 ; CHECK-NEXT: $vgpr0 = COPY %ext(s32) %val:_(s16) = G_CONSTANT i16 -12345 %shift_amt:_(s16) = G_CONSTANT i16 4 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-lshr.mir @@ -26,8 +26,7 @@ body: | bb.0: ; CHECK-LABEL: name: constant_fold_lshr_s16_s16 - ; CHECK: %shift:_(s16) = G_CONSTANT i16 3324 - ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %shift(s16) + ; CHECK: %ext:_(s32) = G_CONSTANT i32 3324 ; CHECK-NEXT: $vgpr0 = COPY %ext(s32) %val:_(s16) = G_CONSTANT i16 -12345 %shift_amt:_(s16) = G_CONSTANT i16 4 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-shl.mir @@ -26,8 +26,7 @@ body: | bb.0: ; CHECK-LABEL: name: constant_fold_shl_s16_s16 - ; CHECK: %shift:_(s16) = G_CONSTANT i16 912 - ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %shift(s16) + ; CHECK: %ext:_(s32) = G_CONSTANT i32 912 ; CHECK-NEXT: $vgpr0 = COPY %ext(s32) %val:_(s16) = G_CONSTANT i16 12345 %shift_amt:_(s16) = G_CONSTANT i16 4 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1094,22 +1094,18 @@ ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v4, v1 ; CHECK-NEXT: v_mov_b32_e32 v6, 0x1000 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v4, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v3, s[4:5] ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v8 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc -; CHECK-NEXT: s_bfe_i32 s4, 1, 0x10000 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v6, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3 ; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v4, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 @@ -1132,177 +1128,173 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000 -; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v8, 0 ; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000 ; GISEL-NEXT: s_subb_u32 s7, 0, 0 -; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v8 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v7, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v7 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v9, 0 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v10, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v10, v4 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v9, v[7:8] -; GISEL-NEXT: v_mul_hi_u32 v8, v9, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v10, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, v9, v7 -; GISEL-NEXT: v_mul_lo_u32 v12, v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v13, v9, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v10, v7 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; GISEL-NEXT: v_trunc_f32_e32 v6, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v6 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v7, 0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v9, v[5:6] +; GISEL-NEXT: v_mul_hi_u32 v10, v7, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v7, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v6, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v9, v4 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v11, 0 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v10, v5, vcc -; GISEL-NEXT: v_mov_b32_e32 v4, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, v[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v7, v4 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v10, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v9, v[4:5] ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v11, v[8:9] +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v10, v[6:7] ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v9, v0, v4 -; GISEL-NEXT: v_mul_lo_u32 v0, v5, v7 -; GISEL-NEXT: v_mul_lo_u32 v10, v11, v8 +; GISEL-NEXT: v_xor_b32_e32 v11, v0, v4 +; GISEL-NEXT: v_mul_lo_u32 v0, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, v10, v6 ; GISEL-NEXT: v_xor_b32_e32 v12, v1, v4 -; GISEL-NEXT: v_mul_hi_u32 v1, v11, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_mul_hi_u32 v1, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v5, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_mul_hi_u32 v10, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v1, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v7, v10, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_mul_hi_u32 v6, v9, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v9, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v9, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x1000 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v12, v1 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v9, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v12, v1 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v10, 0 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v12, v1 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v7 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v5, v11, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], 0, v10, v[7:8] -; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000 -; GISEL-NEXT: v_mov_b32_e32 v9, s6 -; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v12, v7, vcc -; GISEL-NEXT: v_sub_i32_e64 v7, s[4:5], v12, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v10, v12, v1 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v10, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v9, v[6:7] +; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000 +; GISEL-NEXT: s_subb_u32 s7, 0, 0 +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v12, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v12, v6 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, 0x1000 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, -1, v7, s[4:5] +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v8 ; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0x1000 -; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v10 -; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v6 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v10, vcc +; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1 +; GISEL-NEXT: v_trunc_f32_e32 v6, v6 +; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v1 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 -; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v13, v1 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v13 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 -; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000 -; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000 -; GISEL-NEXT: v_mov_b32_e32 v15, s4 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v15, v15, v6, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: s_subb_u32 s7, 0, 0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v9 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v14, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v14, -1, v14, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2] +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v8 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[6:7] ; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v9, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v13, v0 -; GISEL-NEXT: v_mul_lo_u32 v9, v14, v6 -; GISEL-NEXT: v_mul_hi_u32 v15, v14, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v8, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0 +; GISEL-NEXT: v_mul_lo_u32 v8, v13, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 +; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 -; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 +; GISEL-NEXT: v_mul_lo_u32 v14, v15, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_mul_hi_u32 v8, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v0 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v9, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v10, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v7, vcc ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v9, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8 +; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 -; GISEL-NEXT: v_mul_lo_u32 v7, v9, v6 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8 -; GISEL-NEXT: v_mul_hi_u32 v3, v9, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9 +; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc @@ -1310,7 +1302,7 @@ ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v9, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 @@ -1321,13 +1313,13 @@ ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc ; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v9, v10, v4 +; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 @@ -1349,28 +1341,24 @@ ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v7, s6 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc -; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GISEL-NEXT: v_mov_b32_e32 v5, s4 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 @@ -1379,10 +1367,10 @@ ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v8 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v8 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: @@ -1394,7 +1382,6 @@ ; CGP-NEXT: s_movk_i32 s7, 0x1000 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; CGP-NEXT: v_trunc_f32_e32 v6, v5 @@ -1492,9 +1479,8 @@ ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc ; CGP-NEXT: v_cvt_f32_u32_e32 v6, 0x1000 -; CGP-NEXT: v_mov_b32_e32 v8, s8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v7, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v8, -1, v7, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v1, 0 ; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v1 @@ -1506,30 +1492,28 @@ ; CGP-NEXT: v_trunc_f32_e32 v6, v6 ; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v13, v1 -; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 -; CGP-NEXT: v_mov_b32_e32 v15, s4 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 -; CGP-NEXT: v_cvt_u32_f32_e32 v16, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v6 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v16, v[1:2] +; CGP-NEXT: v_cndmask_b32_e32 v14, -1, v14, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2] ; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v11 ; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7] -; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v12, vcc +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v16, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v15, v0 ; CGP-NEXT: v_mul_lo_u32 v11, v13, v6 ; CGP-NEXT: v_mul_hi_u32 v14, v13, v0 -; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v16, v0 +; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v15, v0 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v16, v6 +; CGP-NEXT: v_mul_lo_u32 v14, v15, v6 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 ; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 @@ -1537,13 +1521,13 @@ ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 -; CGP-NEXT: v_mul_hi_u32 v6, v16, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v15, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0 -; CGP-NEXT: v_addc_u32_e32 v13, vcc, v16, v1, vcc +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc ; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v6, v9, v7, vcc @@ -1609,22 +1593,18 @@ ; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc ; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5 ; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: s_bfe_i32 s6, 1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v7, s6 ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 ; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cndmask_b32_e64 v3, v7, v6, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5] ; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v9 ; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v10, vcc -; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v4, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6 ; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 @@ -1744,22 +1724,18 @@ ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v4, v1 ; CHECK-NEXT: v_mov_b32_e32 v6, 0x12d8fb ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v4, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v3, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v3, s[4:5] ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v8 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v5, vcc -; CHECK-NEXT: s_bfe_i32 s4, 1, 0x10000 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v6, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v3 ; CHECK-NEXT: v_addc_u32_e32 v6, vcc, 0, v4, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 @@ -1782,177 +1758,173 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb -; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v8, 0 ; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb ; GISEL-NEXT: s_subb_u32 s7, 0, 0 -; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 +; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v8 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GISEL-NEXT: v_trunc_f32_e32 v7, v5 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v7 -; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v9, 0 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v10, v[5:6] -; GISEL-NEXT: v_mul_lo_u32 v5, v10, v4 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v9, v[7:8] -; GISEL-NEXT: v_mul_hi_u32 v8, v9, v4 -; GISEL-NEXT: v_mul_hi_u32 v4, v10, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, v9, v7 -; GISEL-NEXT: v_mul_lo_u32 v12, v10, v7 -; GISEL-NEXT: v_mul_hi_u32 v13, v9, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v10, v7 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; GISEL-NEXT: v_trunc_f32_e32 v6, v5 +; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v6 +; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v7, 0 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v9, v[5:6] +; GISEL-NEXT: v_mul_hi_u32 v10, v7, v4 +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v7, v[5:6] +; GISEL-NEXT: v_mul_lo_u32 v6, v9, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v9, v4 +; GISEL-NEXT: v_mul_lo_u32 v11, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v12, v9, v5 +; GISEL-NEXT: v_mul_hi_u32 v13, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v11, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v9, v4 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v11, 0 -; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v10, v5, vcc -; GISEL-NEXT: v_mov_b32_e32 v4, v8 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, v[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v7, v4 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v5, vcc +; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s6, v10, 0 +; GISEL-NEXT: v_mov_b32_e32 v4, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v9, v[4:5] ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v11, v[8:9] +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v10, v[6:7] ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; GISEL-NEXT: v_xor_b32_e32 v9, v0, v4 -; GISEL-NEXT: v_mul_lo_u32 v0, v5, v7 -; GISEL-NEXT: v_mul_lo_u32 v10, v11, v8 +; GISEL-NEXT: v_xor_b32_e32 v11, v0, v4 +; GISEL-NEXT: v_mul_lo_u32 v0, v9, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, v10, v6 ; GISEL-NEXT: v_xor_b32_e32 v12, v1, v4 -; GISEL-NEXT: v_mul_hi_u32 v1, v11, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_mul_hi_u32 v1, v10, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v9, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v5, v8 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_mul_hi_u32 v10, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v1, v9, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; GISEL-NEXT: v_mul_hi_u32 v7, v10, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_mul_hi_u32 v6, v9, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v9, v1 -; GISEL-NEXT: v_mul_hi_u32 v10, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v9, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 ; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v10, v12, v1 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_mul_hi_u32 v8, v9, v1 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v11, v12, v1 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v10, 0 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v12, v1 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v7 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v5, v11, v[1:2] -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v9, v0 -; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], 0, v10, v[7:8] -; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000 -; GISEL-NEXT: v_mov_b32_e32 v9, s6 -; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v12, v7, vcc -; GISEL-NEXT: v_sub_i32_e64 v7, s[4:5], v12, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v10, v12, v1 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v9, 0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v6 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v10, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v9, v[6:7] +; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb +; GISEL-NEXT: s_subb_u32 s7, 0, 0 +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v12, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v12, v6 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 -; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, -1, v7, s[4:5] +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v8 ; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc -; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0x12d8fb -; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v10 -; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc +; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v6 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v9 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v10, vcc +; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v1 +; GISEL-NEXT: v_trunc_f32_e32 v6, v6 +; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v1 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 -; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; GISEL-NEXT: v_trunc_f32_e32 v13, v1 -; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v13 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 -; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000 -; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb -; GISEL-NEXT: v_mov_b32_e32 v15, s4 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0 -; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v15, v15, v6, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: s_subb_u32 s7, 0, 0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v9 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v14, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v14, -1, v14, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2] +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v8 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[6:7] ; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v9, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v13, v0 -; GISEL-NEXT: v_mul_lo_u32 v9, v14, v6 -; GISEL-NEXT: v_mul_hi_u32 v15, v14, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v8, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0 +; GISEL-NEXT: v_mul_lo_u32 v8, v13, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v0 ; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 +; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v15, v13, v6 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 -; GISEL-NEXT: v_mul_hi_u32 v9, v14, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 -; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 +; GISEL-NEXT: v_mul_lo_u32 v14, v15, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; GISEL-NEXT: v_mul_hi_u32 v8, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v14, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8 +; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v0 -; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v13, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v9, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v10, v7, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v7, vcc ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] -; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4 -; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v9, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc -; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8 +; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4 +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9 ; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 -; GISEL-NEXT: v_mul_lo_u32 v7, v9, v6 -; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8 -; GISEL-NEXT: v_mul_hi_u32 v3, v9, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9 +; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc @@ -1960,7 +1932,7 @@ ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v9, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 @@ -1971,13 +1943,13 @@ ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v9, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc ; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 -; GISEL-NEXT: v_xor_b32_e32 v9, v10, v4 +; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 @@ -1999,28 +1971,24 @@ ; GISEL-NEXT: v_mov_b32_e32 v0, v3 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 -; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v7, s6 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5] ; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc -; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; GISEL-NEXT: v_mov_b32_e32 v5, s4 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 @@ -2029,10 +1997,10 @@ ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, v2, v8 -; GISEL-NEXT: v_xor_b32_e32 v3, v3, v8 -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: @@ -2044,7 +2012,6 @@ ; CGP-NEXT: s_mov_b32 s7, 0x12d8fb ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; CGP-NEXT: v_trunc_f32_e32 v6, v5 @@ -2142,9 +2109,8 @@ ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v6, vcc ; CGP-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb -; CGP-NEXT: v_mov_b32_e32 v8, s8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, v8, v7, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v8, -1, v7, s[4:5] ; CGP-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v1, 0 ; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v1 @@ -2156,30 +2122,28 @@ ; CGP-NEXT: v_trunc_f32_e32 v6, v6 ; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v13, v1 -; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 -; CGP-NEXT: v_mov_b32_e32 v15, s4 -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 -; CGP-NEXT: v_cvt_u32_f32_e32 v16, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v15, v6 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc -; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v16, v[1:2] +; CGP-NEXT: v_cndmask_b32_e32 v14, -1, v14, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2] ; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v11 ; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7] -; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v12, vcc +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v12, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v16, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v15, v0 ; CGP-NEXT: v_mul_lo_u32 v11, v13, v6 ; CGP-NEXT: v_mul_hi_u32 v14, v13, v0 -; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v16, v0 +; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v15, v0 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v14, v16, v6 +; CGP-NEXT: v_mul_lo_u32 v14, v15, v6 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 ; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0 @@ -2187,13 +2151,13 @@ ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 -; CGP-NEXT: v_mul_hi_u32 v6, v16, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v15, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0 -; CGP-NEXT: v_addc_u32_e32 v13, vcc, v16, v1, vcc +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v15, v1, vcc ; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v6, v9, v7, vcc @@ -2259,22 +2223,18 @@ ; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc ; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5 ; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: s_bfe_i32 s6, 1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v7, s6 ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 ; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_cndmask_b32_e64 v3, v7, v6, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5] ; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v9 ; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v10, vcc -; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v4, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6 ; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1074,23 +1074,21 @@ ; CHECK-NEXT: v_mov_b32_e32 v6, 0x1000 ; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v4, v1, s[4:5] ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 -; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v4, s6 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x1000, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x1000, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc @@ -1202,121 +1200,119 @@ ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[1:2] -; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000 +; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000 +; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v10, v0 ; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v10, s6 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, v10, v1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[4:5] ; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0x1000 ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v8, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v8, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc ; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 ; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GISEL-NEXT: v_trunc_f32_e32 v6, v1 ; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 -; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000 -; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v5 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v6 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 -; GISEL-NEXT: v_cndmask_b32_e32 v16, v10, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2] -; GISEL-NEXT: s_subb_u32 s7, 0, 0 -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v12, v5 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v14, v[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v13, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v12, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0 -; GISEL-NEXT: v_mul_lo_u32 v12, v14, v6 -; GISEL-NEXT: v_mul_hi_u32 v16, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v16 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v7, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v11, v5 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v14, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v6 +; GISEL-NEXT: v_mul_hi_u32 v15, v13, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc +; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v15, v6 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; GISEL-NEXT: v_mul_hi_u32 v12, v14, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v14, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v15, v11 +; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v0 -; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v15, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v12, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v14, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] ; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4 ; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v12, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v13, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v11, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 ; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8 -; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0 -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v6 -; GISEL-NEXT: v_xor_b32_e32 v13, v3, v8 -; GISEL-NEXT: v_mul_hi_u32 v3, v12, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 +; GISEL-NEXT: v_xor_b32_e32 v10, v2, v8 +; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v11, v6 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v14, v6 +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 -; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v10, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 ; GISEL-NEXT: v_xor_b32_e32 v9, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 +; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v10, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v12, 0 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v11, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0 @@ -1324,25 +1320,23 @@ ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v12, v[6:7] -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 -; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v11, v[6:7] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v10, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[4:5] -; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; GISEL-NEXT: v_mov_b32_e32 v10, s4 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc @@ -1364,7 +1358,6 @@ ; CGP-NEXT: s_movk_i32 s7, 0x1000 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; CGP-NEXT: v_trunc_f32_e32 v6, v5 @@ -1454,149 +1447,148 @@ ; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v6, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v9, vcc, v8, v0 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v6, vcc +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v0 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v6, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, v6, v1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[4:5] ; CGP-NEXT: v_cvt_f32_u32_e32 v1, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v7, 0 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc -; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v7 +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 ; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, v9, v4 -; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4 +; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 ; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; CGP-NEXT: v_trunc_f32_e32 v7, v1 -; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v14, v0 -; CGP-NEXT: v_cvt_u32_f32_e32 v15, v7 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v16, v6, v8, vcc -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v15, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v12, v4 -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], -1, v14, v[7:8] -; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v13, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; CGP-NEXT: v_cndmask_b32_e32 v8, v12, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v15, v0 -; CGP-NEXT: v_mul_lo_u32 v12, v14, v7 -; CGP-NEXT: v_mul_hi_u32 v16, v14, v0 -; CGP-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v15, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16 +; CGP-NEXT: v_trunc_f32_e32 v6, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v15, -1, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v4 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v14, v0 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v6 +; CGP-NEXT: v_mul_hi_u32 v15, v13, v0 +; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v15, v7 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; CGP-NEXT: v_mul_hi_u32 v12, v14, v7 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; CGP-NEXT: v_mul_hi_u32 v7, v15, v7 +; CGP-NEXT: v_mul_lo_u32 v15, v14, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v0 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 +; CGP-NEXT: v_mul_hi_u32 v6, v14, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v7, v1 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v0 -; CGP-NEXT: v_addc_u32_e32 v14, vcc, v15, v1, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v12, 0 -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v8, vcc -; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc -; CGP-NEXT: v_xor_b32_e32 v10, v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v14, v[1:2] -; CGP-NEXT: v_xor_b32_e32 v1, v9, v5 -; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], -1, v12, v[7:8] -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v2, v9 -; CGP-NEXT: v_mul_lo_u32 v2, v14, v0 -; CGP-NEXT: v_mul_lo_u32 v8, v12, v7 -; CGP-NEXT: v_xor_b32_e32 v13, v3, v9 -; CGP-NEXT: v_mul_hi_u32 v3, v12, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v14, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v12, vcc +; CGP-NEXT: v_xor_b32_e32 v9, v6, v5 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v8, v5 +; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v11, v[6:7] ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_xor_b32_e32 v7, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v10, v11, v6 +; CGP-NEXT: v_xor_b32_e32 v12, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v3, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v3, v14, v7 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CGP-NEXT: v_mul_hi_u32 v8, v12, v7 +; CGP-NEXT: v_mul_lo_u32 v3, v13, v6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v7, v14, v7 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v0 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v8, v11, v2 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v5 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v6, v12, v3 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v2 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v9, v5 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v5, v11, v3 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v7, v3 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v11, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v12, v2 ; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s7, v3, 0 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v7 -; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v7, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v5, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5 ; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 ; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v7, v6, v7, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v9 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v9 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v8 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result @@ -1703,23 +1695,21 @@ ; CHECK-NEXT: v_mov_b32_e32 v6, 0x12d8fb ; CHECK-NEXT: v_subb_u32_e64 v2, vcc, v4, v1, s[4:5] ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 -; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v4, s6 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v3, vcc +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x12d8fb, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v5, -1, v5, vcc +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x12d8fb, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc @@ -1831,121 +1821,119 @@ ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[1:2] -; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000 +; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb +; GISEL-NEXT: s_subb_u32 s7, 0, 0 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], 0, v8, v[6:7] ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v10, v0 ; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v10, s6 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v11, v10, v1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[4:5] ; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0x12d8fb ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; GISEL-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; GISEL-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v8, v5 -; GISEL-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v8, v5 +; GISEL-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc ; GISEL-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 ; GISEL-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GISEL-NEXT: v_trunc_f32_e32 v6, v1 ; GISEL-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v0 -; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb -; GISEL-NEXT: v_cvt_u32_f32_e32 v15, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v5 -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v0 +; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v6 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 -; GISEL-NEXT: v_cndmask_b32_e32 v16, v10, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v15, v[1:2] -; GISEL-NEXT: s_subb_u32 s7, 0, 0 -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v12, v5 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v14, v[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v13, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GISEL-NEXT: v_cndmask_b32_e32 v7, v12, v1, vcc -; GISEL-NEXT: v_mul_lo_u32 v1, v15, v0 -; GISEL-NEXT: v_mul_lo_u32 v12, v14, v6 -; GISEL-NEXT: v_mul_hi_u32 v16, v14, v0 -; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v15, v0 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v16 +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v7, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v11, v5 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v13, v[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc +; GISEL-NEXT: v_mul_lo_u32 v1, v14, v0 +; GISEL-NEXT: v_mul_lo_u32 v11, v13, v6 +; GISEL-NEXT: v_mul_hi_u32 v15, v13, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc +; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v16, v15, v6 -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; GISEL-NEXT: v_mul_hi_u32 v12, v14, v6 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; GISEL-NEXT: v_mul_hi_u32 v6, v15, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v14, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v15, v11 +; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v1, vcc, v12, v1 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v11, v1 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v0 -; GISEL-NEXT: v_addc_u32_e32 v14, vcc, v15, v1, vcc -; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v12, 0 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v0 +; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v14, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2] +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] ; GISEL-NEXT: v_xor_b32_e32 v1, v8, v4 ; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v3 -; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v12, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v13, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v11, v[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v8 ; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc -; GISEL-NEXT: v_xor_b32_e32 v11, v2, v8 -; GISEL-NEXT: v_mul_lo_u32 v2, v14, v0 -; GISEL-NEXT: v_mul_lo_u32 v7, v12, v6 -; GISEL-NEXT: v_xor_b32_e32 v13, v3, v8 -; GISEL-NEXT: v_mul_hi_u32 v3, v12, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v14, v0 +; GISEL-NEXT: v_xor_b32_e32 v10, v2, v8 +; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0 +; GISEL-NEXT: v_mul_lo_u32 v7, v11, v6 +; GISEL-NEXT: v_xor_b32_e32 v12, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v3, v11, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v14, v6 +; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v12, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v11, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0 -; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v13, v0 -; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0 -; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0 +; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0 +; GISEL-NEXT: v_mul_lo_u32 v6, v10, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v10, v0 +; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0 ; GISEL-NEXT: v_xor_b32_e32 v9, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GISEL-NEXT: v_mul_lo_u32 v7, v13, v2 +; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2 +; GISEL-NEXT: v_mul_hi_u32 v6, v10, v2 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v0, v3 -; GISEL-NEXT: v_mul_hi_u32 v7, v13, v2 -; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v12, 0 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2 +; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v11, 0 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v0 @@ -1953,25 +1941,23 @@ ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v6, v[0:1] ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v9, v4, vcc -; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v12, v[6:7] -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 -; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v13, v3, vcc -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v13, v3 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v11, v[6:7] +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v10, v2 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v2, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v6, v10, v6, s[4:5] -; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v7, v5 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; GISEL-NEXT: v_mov_b32_e32 v10, s4 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v7, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc @@ -1993,7 +1979,6 @@ ; CGP-NEXT: s_mov_b32 s7, 0x12d8fb ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 ; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000 ; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; CGP-NEXT: v_trunc_f32_e32 v6, v5 @@ -2083,149 +2068,148 @@ ; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v6, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v9, vcc, v8, v0 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v6, vcc +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v0 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v11, v6, vcc ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v4 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v6, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_cndmask_b32_e64 v11, v6, v1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v10, -1, v1, s[4:5] ; CGP-NEXT: v_cvt_f32_u32_e32 v1, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v7, 0 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc -; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v7 +; CGP-NEXT: v_mac_f32_e32 v1, 0x4f800000, v6 ; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, v9, v4 -; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v0, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4 +; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v0, vcc ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1 ; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 -; CGP-NEXT: v_trunc_f32_e32 v7, v1 -; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v14, v0 -; CGP-NEXT: v_cvt_u32_f32_e32 v15, v7 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v14, 0 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 -; CGP-NEXT: v_cndmask_b32_e32 v16, v6, v8, vcc -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v15, v[1:2] -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v12, v4 -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], -1, v14, v[7:8] -; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v13, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; CGP-NEXT: v_cndmask_b32_e32 v8, v12, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v1, v15, v0 -; CGP-NEXT: v_mul_lo_u32 v12, v14, v7 -; CGP-NEXT: v_mul_hi_u32 v16, v14, v0 -; CGP-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v15, v0 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v16 +; CGP-NEXT: v_trunc_f32_e32 v6, v1 +; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v15, -1, v7, vcc +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v14, v[1:2] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v11, v4 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v13, v[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v12, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v1, v14, v0 +; CGP-NEXT: v_mul_lo_u32 v11, v13, v6 +; CGP-NEXT: v_mul_hi_u32 v15, v13, v0 +; CGP-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v16, v15, v7 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; CGP-NEXT: v_mul_hi_u32 v12, v14, v7 -; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12 -; CGP-NEXT: v_mul_hi_u32 v7, v15, v7 +; CGP-NEXT: v_mul_lo_u32 v15, v14, v6 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v6 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v15, v0 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 +; CGP-NEXT: v_mul_hi_u32 v6, v14, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v7, v1 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v0 -; CGP-NEXT: v_addc_u32_e32 v14, vcc, v15, v1, vcc -; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v12, 0 -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v8, vcc -; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v13, vcc -; CGP-NEXT: v_xor_b32_e32 v10, v7, v5 -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v14, v[1:2] -; CGP-NEXT: v_xor_b32_e32 v1, v9, v5 -; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], -1, v12, v[7:8] -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc -; CGP-NEXT: v_xor_b32_e32 v11, v2, v9 -; CGP-NEXT: v_mul_lo_u32 v2, v14, v0 -; CGP-NEXT: v_mul_lo_u32 v8, v12, v7 -; CGP-NEXT: v_xor_b32_e32 v13, v3, v9 -; CGP-NEXT: v_mul_hi_u32 v3, v12, v0 -; CGP-NEXT: v_mul_hi_u32 v0, v14, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v6, v1 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v0 +; CGP-NEXT: v_addc_u32_e32 v13, vcc, v14, v1, vcc +; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v11, 0 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v12, vcc +; CGP-NEXT: v_xor_b32_e32 v9, v6, v5 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2] +; CGP-NEXT: v_xor_b32_e32 v1, v8, v5 +; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v3 +; CGP-NEXT: v_mad_u64_u32 v[6:7], s[4:5], -1, v11, v[6:7] ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc +; CGP-NEXT: v_xor_b32_e32 v7, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v0 +; CGP-NEXT: v_mul_lo_u32 v10, v11, v6 +; CGP-NEXT: v_xor_b32_e32 v12, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v3, v11, v0 +; CGP-NEXT: v_mul_hi_u32 v0, v13, v0 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v3, v14, v7 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v8, v2 -; CGP-NEXT: v_mul_hi_u32 v8, v12, v7 +; CGP-NEXT: v_mul_lo_u32 v3, v13, v6 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v10, v2 +; CGP-NEXT: v_mul_hi_u32 v10, v11, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v7, v14, v7 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; CGP-NEXT: v_mul_hi_u32 v6, v13, v6 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v0 -; CGP-NEXT: v_addc_u32_e32 v2, vcc, v14, v2, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v3 -; CGP-NEXT: v_mul_lo_u32 v8, v11, v2 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v5 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v11, v0 +; CGP-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc +; CGP-NEXT: v_mul_lo_u32 v6, v12, v3 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v2 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v9, v5 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v5, v11, v3 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v7, v3 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v7, v13, v2 -; CGP-NEXT: v_mul_hi_u32 v3, v13, v3 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v11, v2 -; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_mul_lo_u32 v6, v12, v2 +; CGP-NEXT: v_mul_hi_u32 v3, v12, v3 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v9 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v13, v2 +; CGP-NEXT: v_mul_hi_u32 v9, v12, v2 ; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], s7, v3, 0 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[3:4] -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v11, v2 -; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v7 -; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v7, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], s7, v5, v[3:4] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v2 +; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5 ; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 ; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v4 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v7, v6, v7, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v5, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v9 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v9 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v8 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result Index: llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -1905,255 +1905,247 @@ ; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v6 -; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 -; GISEL-NEXT: s_bfe_i32 s12, 1, 0x10000 -; GISEL-NEXT: s_bfe_i32 s13, 1, 0x10000 -; GISEL-NEXT: s_bfe_i32 s14, 1, 0x10000 -; GISEL-NEXT: s_bfe_i32 s15, 1, 0x10000 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v1 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v1 -; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v0 -; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v0 -; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], 0, 0, vcc -; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, s[4:5] -; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v5 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v7 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v5 -; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v1 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 +; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v0 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v0 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7 +; GISEL-NEXT: v_trunc_f32_e32 v8, v8 ; GISEL-NEXT: v_trunc_f32_e32 v11, v11 -; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v11 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 -; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v4 -; GISEL-NEXT: v_mul_lo_u32 v4, v6, v7 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v13, v8, v11 -; GISEL-NEXT: v_mul_lo_u32 v14, v6, v12 -; GISEL-NEXT: v_mul_lo_u32 v15, v9, v12 -; GISEL-NEXT: v_mul_hi_u32 v16, v6, v12 -; GISEL-NEXT: v_mul_lo_u32 v17, v8, v5 -; GISEL-NEXT: v_mul_lo_u32 v18, v10, v5 -; GISEL-NEXT: v_mul_hi_u32 v19, v8, v5 -; GISEL-NEXT: v_add_i32_e32 v4, vcc, v15, v4 -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v14 -; GISEL-NEXT: v_mul_hi_u32 v20, v12, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v7, v14 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6 +; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7 +; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 +; GISEL-NEXT: v_mul_hi_u32 v20, v6, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 ; GISEL-NEXT: v_mul_lo_u32 v18, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v4, v16 -; GISEL-NEXT: v_mul_hi_u32 v4, v5, v17 +; GISEL-NEXT: v_mul_hi_u32 v21, v7, v17 ; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v5, v13 -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v16 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v18, v4 -; GISEL-NEXT: v_mul_lo_u32 v4, v7, v16 -; GISEL-NEXT: v_mul_hi_u32 v18, v12, v16 -; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 -; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13 -; GISEL-NEXT: v_add_i32_e64 v4, s[8:9], v4, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v5, v13 -; GISEL-NEXT: v_add_i32_e64 v17, s[10:11], v19, v17 +; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12 +; GISEL-NEXT: v_mul_lo_u32 v19, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v22, v6, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v23, v7, v13 +; GISEL-NEXT: v_mul_lo_u32 v24, v11, v13 +; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23 +; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v20, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v20 -; GISEL-NEXT: v_mov_b32_e32 v20, s12 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v4, v15 -; GISEL-NEXT: v_mov_b32_e32 v4, s13 -; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 -; GISEL-NEXT: v_mov_b32_e32 v19, s14 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v12, v15 -; GISEL-NEXT: v_mov_b32_e32 v15, s15 -; GISEL-NEXT: v_mul_hi_u32 v16, v7, v16 -; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 -; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], v5, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; GISEL-NEXT: v_mul_lo_u32 v18, v6, v12 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; GISEL-NEXT: v_mul_hi_u32 v16, v6, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; GISEL-NEXT: v_mul_lo_u32 v17, v8, v5 -; GISEL-NEXT: v_mul_lo_u32 v10, v10, v5 -; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v14, s[6:7] -; GISEL-NEXT: v_mul_hi_u32 v14, v8, v5 -; GISEL-NEXT: v_addc_u32_e64 v11, vcc, v11, v13, s[8:9] -; GISEL-NEXT: v_mul_hi_u32 v13, v12, v18 -; GISEL-NEXT: v_mul_lo_u32 v6, v6, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v17 -; GISEL-NEXT: v_mul_lo_u32 v8, v8, v11 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; GISEL-NEXT: v_mul_lo_u32 v10, v7, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v7, v18 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v16 -; GISEL-NEXT: v_mul_lo_u32 v16, v11, v17 -; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; GISEL-NEXT: v_mul_lo_u32 v14, v12, v6 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; GISEL-NEXT: v_mul_lo_u32 v14, v7, v6 -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 -; GISEL-NEXT: v_mul_hi_u32 v10, v12, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v7, v6 -; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8 -; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v18 -; GISEL-NEXT: v_mul_lo_u32 v18, v11, v8 -; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v16, v13 -; GISEL-NEXT: v_mul_hi_u32 v16, v5, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v11, v8 -; GISEL-NEXT: v_add_i32_e64 v17, s[10:11], v18, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v14, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v16, s[8:9], v17, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] -; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v18 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17 -; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 +; GISEL-NEXT: v_mul_hi_u32 v15, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v18, v11, v13 +; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; GISEL-NEXT: v_mul_lo_u32 v9, v6, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v14, v6, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v20, v11, v5 +; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 -; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 -; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v17 -; GISEL-NEXT: v_mul_lo_u32 v13, 0, v10 -; GISEL-NEXT: v_mul_hi_u32 v14, v3, v10 -; GISEL-NEXT: v_mul_hi_u32 v10, 0, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7 +; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7 +; GISEL-NEXT: v_mul_lo_u32 v12, v3, v4 +; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4 +; GISEL-NEXT: v_mul_hi_u32 v14, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4 +; GISEL-NEXT: v_mul_lo_u32 v15, v2, v5 ; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5 ; GISEL-NEXT: v_mul_hi_u32 v17, v2, v5 ; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v9 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v12 -; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v7, v6, vcc -; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v11, v8, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v8, v3, v6 -; GISEL-NEXT: v_mul_lo_u32 v9, 0, v6 -; GISEL-NEXT: v_mul_hi_u32 v11, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 -; GISEL-NEXT: v_mul_lo_u32 v12, v2, v7 -; GISEL-NEXT: v_mul_lo_u32 v18, 0, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8 -; GISEL-NEXT: v_mul_hi_u32 v13, v2, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7 -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v10 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v16, v12 -; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], v18, v5 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v11 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v17 -; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], v5, v13 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v13, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v18, v13 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; GISEL-NEXT: v_mul_lo_u32 v13, v1, v9 -; GISEL-NEXT: v_mul_lo_u32 v14, 0, v9 -; GISEL-NEXT: v_mul_hi_u32 v16, v1, v9 -; GISEL-NEXT: v_mul_lo_u32 v17, v0, v5 -; GISEL-NEXT: v_mul_lo_u32 v18, 0, v5 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6 +; GISEL-NEXT: v_mul_hi_u32 v13, v1, v6 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v9 -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v13 -; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], 1, v5 -; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v17 -; GISEL-NEXT: v_add_i32_e64 v17, s[10:11], 1, v12 -; GISEL-NEXT: v_add_i32_e64 v6, s[12:13], v6, v10 -; GISEL-NEXT: v_add_i32_e64 v10, s[12:13], 1, v13 -; GISEL-NEXT: v_add_i32_e64 v7, s[14:15], v7, v8 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[14:15], v3, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v2, v0 -; GISEL-NEXT: v_sub_i32_e64 v3, s[18:19], v3, v1 -; GISEL-NEXT: v_sub_i32_e64 v2, s[20:21], v2, v0 -; GISEL-NEXT: v_mul_lo_u32 v8, v1, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[22:23], v3, v1 -; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v6, vcc -; GISEL-NEXT: v_mul_lo_u32 v3, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v14, 0, v7 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v8, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v6 +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v17, v0, v5 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v7 +; GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v5, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v10 +; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v18 +; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v19, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v15 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9 +; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v8, vcc +; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v8 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11 +; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], 0, v13, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v13, s[6:7], 0, v13 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v14, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v15, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v13, vcc, 0, v13, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v1 +; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v13, vcc ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0 -; GISEL-NEXT: v_addc_u32_e64 v0, s[6:7], 0, v7, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[14:15] -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v14, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[16:17] -; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], v18, v3 -; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], 0, v1, s[10:11] -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v16 -; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], 0, v0, s[12:13] -; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], v3, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], 0, v8, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v11 -; GISEL-NEXT: v_subb_u32_e64 v11, s[10:11], 0, v3, s[8:9] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[22:23] -; GISEL-NEXT: v_cndmask_b32_e64 v2, v20, v2, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v8 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v3 -; GISEL-NEXT: v_subbrev_u32_e64 v8, vcc, 0, v8, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v14, v19, v14, s[10:11] -; GISEL-NEXT: v_subbrev_u32_e64 v8, vcc, 0, v8, s[18:19] -; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[20:21] +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v14 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v11, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v15, v20, s[4:5] -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v17, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v13, v10, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v18, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v16, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v0, v9, v2, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v2, v5, v3, s[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, v4, s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v18, v17, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v20, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v6, v19, v21, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v6, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_v2i64_24bit: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -974,120 +974,118 @@ ; CHECK-NEXT: v_cvt_f32_u32_e32 v3, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0 ; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 -; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 -; CHECK-NEXT: v_mov_b32_e32 v4, s6 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v3 -; CHECK-NEXT: v_trunc_f32_e32 v5, v5 -; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_mul_lo_u32 v6, v5, s5 -; CHECK-NEXT: v_mul_lo_u32 v7, v3, s5 -; CHECK-NEXT: v_mul_hi_u32 v8, s5, v3 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v3 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7 -; CHECK-NEXT: v_mul_hi_u32 v9, v3, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_mul_lo_u32 v10, v3, v6 -; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6 -; CHECK-NEXT: v_mul_hi_u32 v12, v3, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v4, s5 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, s5 ; CHECK-NEXT: v_mul_hi_u32 v7, s5, v3 -; CHECK-NEXT: v_mul_lo_u32 v8, v5, s5 -; CHECK-NEXT: v_mul_lo_u32 v9, v5, v6 -; CHECK-NEXT: v_mul_hi_u32 v10, v3, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 -; CHECK-NEXT: v_sub_i32_e32 v8, vcc, v8, v3 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CHECK-NEXT: v_mul_lo_u32 v8, v3, v7 -; CHECK-NEXT: v_mul_lo_u32 v11, v5, v7 -; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v5, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v8, v3, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v11, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v1, v3 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v3 -; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 -; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v3, s5 +; CHECK-NEXT: v_mul_hi_u32 v6, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, s5 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v7, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_mul_lo_u32 v7, v3, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v9, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v10 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_mul_lo_u32 v7, v3, s4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, v3, s4 ; CHECK-NEXT: v_mul_hi_u32 v3, s4, v3 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v5, v5, s4 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, v5, v3 -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v7 -; CHECK-NEXT: v_subb_u32_e64 v5, vcc, v1, v3, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v4, v4, s4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v6 +; CHECK-NEXT: v_subb_u32_e64 v4, vcc, v1, v3, s[4:5] ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 -; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[6:7] +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[6:7] ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; CHECK-NEXT: s_mov_b64 s[4:5], vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 0x12d8fb, v6 +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x12d8fb, v5 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] -; CHECK-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v2, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v7, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem i64 %num, 1235195 ret i64 %result @@ -1099,243 +1097,239 @@ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s4, 0x12d8fb ; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb -; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb +; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; GISEL-NEXT: s_sub_u32 s5, 0, 0x12d8fb -; GISEL-NEXT: v_madmk_f32 v7, v5, 0x4f800000, v6 +; GISEL-NEXT: v_madmk_f32 v7, v6, 0x4f800000, v5 ; GISEL-NEXT: s_subb_u32 s6, 0, 0 -; GISEL-NEXT: s_bfe_i32 s7, 1, 0x10000 -; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GISEL-NEXT: v_mov_b32_e32 v5, s7 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 ; GISEL-NEXT: s_sub_u32 s7, 0, 0x12d8fb -; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; GISEL-NEXT: s_subb_u32 s8, 0, 0 -; GISEL-NEXT: s_bfe_i32 s9, 1, 0x10000 -; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 -; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v6 -; GISEL-NEXT: v_mov_b32_e32 v10, s9 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v5 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 ; GISEL-NEXT: v_trunc_f32_e32 v8, v8 -; GISEL-NEXT: v_trunc_f32_e32 v9, v9 -; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 -; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_lo_u32 v11, s5, v8 +; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_mul_lo_u32 v12, s7, v9 -; GISEL-NEXT: v_mul_lo_u32 v13, s5, v7 -; GISEL-NEXT: v_mul_lo_u32 v14, s6, v7 -; GISEL-NEXT: v_mul_hi_u32 v15, s5, v7 -; GISEL-NEXT: v_mul_lo_u32 v16, s7, v6 -; GISEL-NEXT: v_mul_lo_u32 v17, s8, v6 -; GISEL-NEXT: v_mul_hi_u32 v18, s7, v6 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11 -; GISEL-NEXT: v_mul_lo_u32 v14, v8, v13 -; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v16 -; GISEL-NEXT: v_mul_hi_u32 v20, v6, v16 -; GISEL-NEXT: v_mul_hi_u32 v16, v9, v16 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v11 -; GISEL-NEXT: v_mul_lo_u32 v18, v8, v11 -; GISEL-NEXT: v_mul_hi_u32 v21, v7, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v8, v11 -; GISEL-NEXT: v_mul_lo_u32 v22, v6, v12 -; GISEL-NEXT: v_mul_lo_u32 v23, v9, v12 -; GISEL-NEXT: v_mul_hi_u32 v24, v6, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v23, v16 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v24 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v19 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v22, v17 -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v20 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v17 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v11, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, s5, v7 -; GISEL-NEXT: v_mul_lo_u32 v13, s6, v7 -; GISEL-NEXT: v_mul_hi_u32 v14, s5, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v16 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v12, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, s7, v6 -; GISEL-NEXT: v_mul_lo_u32 v15, s8, v6 -; GISEL-NEXT: v_mul_hi_u32 v16, s7, v6 -; GISEL-NEXT: v_mul_lo_u32 v17, s5, v8 -; GISEL-NEXT: v_mul_lo_u32 v18, v8, v11 -; GISEL-NEXT: v_mul_hi_u32 v19, v7, v11 -; GISEL-NEXT: v_mul_hi_u32 v11, v8, v11 -; GISEL-NEXT: v_mul_lo_u32 v20, s7, v9 -; GISEL-NEXT: v_mul_lo_u32 v21, v9, v12 -; GISEL-NEXT: v_mul_hi_u32 v22, v6, v12 -; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v16 -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v13 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13 -; GISEL-NEXT: v_mul_hi_u32 v17, v7, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v8, v13 -; GISEL-NEXT: v_mul_lo_u32 v20, v6, v14 -; GISEL-NEXT: v_mul_lo_u32 v23, v9, v14 -; GISEL-NEXT: v_mul_hi_u32 v24, v6, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v9, v14 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, s5, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v10, s7, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, s5, v6 +; GISEL-NEXT: v_mul_lo_u32 v12, s6, v6 +; GISEL-NEXT: v_mul_hi_u32 v13, s5, v6 +; GISEL-NEXT: v_mul_lo_u32 v14, s7, v5 +; GISEL-NEXT: v_mul_lo_u32 v15, s8, v5 +; GISEL-NEXT: v_mul_hi_u32 v16, s7, v5 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v7, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 +; GISEL-NEXT: v_mul_hi_u32 v18, v5, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; GISEL-NEXT: v_mul_lo_u32 v13, v6, v9 +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v20, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v21, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v22, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v20, vcc, v21, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v23, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v24 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v21, v19 -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v20 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v21, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v18, v17 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v13, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7 -; GISEL-NEXT: v_mul_hi_u32 v13, v0, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v14, vcc -; GISEL-NEXT: v_mul_lo_u32 v12, v3, v6 -; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 -; GISEL-NEXT: v_mul_lo_u32 v15, v0, v8 -; GISEL-NEXT: v_mul_lo_u32 v16, v1, v8 -; GISEL-NEXT: v_mul_hi_u32 v17, v0, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 -; GISEL-NEXT: v_mul_lo_u32 v18, v2, v9 -; GISEL-NEXT: v_mul_lo_u32 v19, v3, v9 -; GISEL-NEXT: v_mul_hi_u32 v20, v2, v9 -; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v19, v6 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v17 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v15 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, s5, v6 +; GISEL-NEXT: v_mul_lo_u32 v11, s6, v6 +; GISEL-NEXT: v_mul_hi_u32 v12, s5, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, s7, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 +; GISEL-NEXT: v_mul_hi_u32 v14, s7, v5 +; GISEL-NEXT: v_mul_lo_u32 v15, s5, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v18, s7, v8 +; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v20, v5, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v14 +; GISEL-NEXT: v_mul_lo_u32 v13, v6, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v11 +; GISEL-NEXT: v_mul_hi_u32 v15, v6, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11 +; GISEL-NEXT: v_mul_lo_u32 v18, v5, v12 +; GISEL-NEXT: v_mul_lo_u32 v21, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v22, v5, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v21, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v20 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v22 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v17 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v20 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v11, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v5 +; GISEL-NEXT: v_mul_hi_u32 v12, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 +; GISEL-NEXT: v_mul_lo_u32 v13, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v14, v1, v7 +; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v2, v8 +; GISEL-NEXT: v_mul_lo_u32 v17, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v18, v2, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v18, v12 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v17, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v15 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v18 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, s4 -; GISEL-NEXT: v_mul_hi_u32 v7, s4, v7 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; GISEL-NEXT: v_mul_lo_u32 v14, v6, s4 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v16, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v6, s4 ; GISEL-NEXT: v_mul_hi_u32 v6, s4, v6 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_mul_lo_u32 v12, v5, s4 +; GISEL-NEXT: v_mul_hi_u32 v5, s4, v5 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, s4 ; GISEL-NEXT: v_mul_lo_u32 v8, v8, s4 -; GISEL-NEXT: v_mul_lo_u32 v9, v9, s4 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13 -; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v1, v7, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v7, vcc, v1, v6, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v14 -; GISEL-NEXT: v_subb_u32_e64 v9, vcc, v3, v6, s[6:7] -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v2, v4 -; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v5, v7, s[8:9] +; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12 +; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7] +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v2, v4 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[8:9] ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v6, v5, v6, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] ; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] ; GISEL-NEXT: s_mov_b64 s[4:5], vcc -; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 0x12d8fb, v11 -; GISEL-NEXT: v_sub_i32_e64 v14, s[6:7], v0, v4 +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9 +; GISEL-NEXT: v_sub_i32_e64 v12, s[6:7], v0, v4 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] ; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v15, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v14, v4 -; GISEL-NEXT: v_subbrev_u32_e64 v15, s[4:5], 0, v1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v13, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4 +; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5] ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5] -; GISEL-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v3, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v14, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, v13, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v12, s[4:5] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_urem_v2i64_oddk_denom: @@ -1343,236 +1337,234 @@ ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: s_mov_b32 s4, 0x12d8fb ; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb -; CGP-NEXT: v_cvt_f32_u32_e32 v6, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; CGP-NEXT: s_mov_b32 s5, 0xffed2705 -; CGP-NEXT: s_bfe_i32 s6, 1, 0x10000 ; CGP-NEXT: v_cvt_f32_u32_e32 v7, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v8, 0 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; CGP-NEXT: v_mov_b32_e32 v5, s6 +; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v8 -; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v7 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 ; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 -; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 ; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_trunc_f32_e32 v9, v9 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 ; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9 -; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, v7, s5 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v10, v8, s5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, v9, s5 -; CGP-NEXT: v_mul_lo_u32 v12, v6, s5 -; CGP-NEXT: v_mul_hi_u32 v13, s5, v6 +; CGP-NEXT: v_mul_lo_u32 v11, v5, s5 +; CGP-NEXT: v_mul_hi_u32 v12, s5, v5 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, v9, v5 +; CGP-NEXT: v_mul_lo_u32 v13, v6, s5 +; CGP-NEXT: v_mul_hi_u32 v14, s5, v6 ; CGP-NEXT: v_sub_i32_e32 v10, vcc, v10, v6 -; CGP-NEXT: v_mul_lo_u32 v14, v7, s5 -; CGP-NEXT: v_mul_hi_u32 v15, s5, v7 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, v11, v7 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_mul_lo_u32 v13, v8, v12 -; CGP-NEXT: v_mul_hi_u32 v16, v6, v12 -; CGP-NEXT: v_mul_hi_u32 v12, v8, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; CGP-NEXT: v_mul_lo_u32 v15, v9, v14 -; CGP-NEXT: v_mul_hi_u32 v17, v7, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v9, v14 -; CGP-NEXT: v_mul_lo_u32 v18, v6, v10 -; CGP-NEXT: v_mul_lo_u32 v19, v8, v10 -; CGP-NEXT: v_mul_hi_u32 v20, v6, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CGP-NEXT: v_mul_lo_u32 v12, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v15, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_mul_lo_u32 v14, v8, v13 +; CGP-NEXT: v_mul_hi_u32 v16, v6, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v8, v13 +; CGP-NEXT: v_mul_lo_u32 v17, v5, v9 +; CGP-NEXT: v_mul_lo_u32 v18, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 +; CGP-NEXT: v_mul_lo_u32 v20, v6, v10 +; CGP-NEXT: v_mul_lo_u32 v21, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v22, v6, v10 ; CGP-NEXT: v_mul_hi_u32 v10, v8, v10 -; CGP-NEXT: v_mul_lo_u32 v21, v7, v11 -; CGP-NEXT: v_mul_lo_u32 v22, v9, v11 -; CGP-NEXT: v_mul_hi_u32 v23, v7, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v9, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v18, v11 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v19, v12 -; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v21 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v20 +; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v21, v13 ; CGP-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v22, v14 -; CGP-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v20 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v23 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13 -; CGP-NEXT: v_add_i32_e32 v16, vcc, v19, v16 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v21, v15 -; CGP-NEXT: v_add_i32_e32 v17, vcc, v22, v17 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v19 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v22 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v15 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v20, v14 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v21, v16 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v5, s5 +; CGP-NEXT: v_mul_hi_u32 v11, s5, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc ; CGP-NEXT: v_mul_lo_u32 v10, v6, s5 ; CGP-NEXT: v_mul_hi_u32 v12, s5, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v7, s5 -; CGP-NEXT: v_mul_hi_u32 v13, s5, v7 -; CGP-NEXT: v_mul_lo_u32 v14, v8, s5 -; CGP-NEXT: v_mul_lo_u32 v15, v8, v10 -; CGP-NEXT: v_mul_hi_u32 v16, v6, v10 +; CGP-NEXT: v_mul_lo_u32 v13, v7, s5 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 +; CGP-NEXT: v_mul_lo_u32 v16, v8, s5 +; CGP-NEXT: v_mul_lo_u32 v17, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v18, v6, v10 ; CGP-NEXT: v_mul_hi_u32 v10, v8, v10 -; CGP-NEXT: v_mul_lo_u32 v17, v9, s5 -; CGP-NEXT: v_mul_lo_u32 v18, v9, v11 -; CGP-NEXT: v_mul_hi_u32 v19, v7, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v9, v11 -; CGP-NEXT: v_sub_i32_e32 v14, vcc, v14, v6 -; CGP-NEXT: v_sub_i32_e32 v17, vcc, v17, v7 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 -; CGP-NEXT: v_mul_lo_u32 v14, v6, v12 -; CGP-NEXT: v_mul_lo_u32 v17, v8, v12 -; CGP-NEXT: v_mul_hi_u32 v20, v6, v12 +; CGP-NEXT: v_sub_i32_e32 v13, vcc, v13, v5 +; CGP-NEXT: v_sub_i32_e32 v16, vcc, v16, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; CGP-NEXT: v_mul_lo_u32 v13, v5, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v20, v6, v12 +; CGP-NEXT: v_mul_lo_u32 v21, v8, v12 +; CGP-NEXT: v_mul_hi_u32 v22, v6, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v8, v12 -; CGP-NEXT: v_mul_lo_u32 v21, v7, v13 -; CGP-NEXT: v_mul_lo_u32 v22, v9, v13 -; CGP-NEXT: v_mul_hi_u32 v23, v7, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v17, v10 -; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v18, vcc, v18, v21 -; CGP-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v22, v11 -; CGP-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v20 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v16, v9 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v18, vcc, v18, v19 +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v20 +; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v21, v10 +; CGP-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v19 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v22 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v23 -; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v20, v17 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v21, v18 -; CGP-NEXT: v_add_i32_e32 v17, vcc, v22, v19 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v11, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc -; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 -; CGP-NEXT: v_mul_hi_u32 v12, v0, v6 -; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v13, v2, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_mul_lo_u32 v14, v0, v8 -; CGP-NEXT: v_mul_lo_u32 v15, v1, v8 -; CGP-NEXT: v_mul_hi_u32 v16, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_mul_lo_u32 v17, v2, v9 -; CGP-NEXT: v_mul_lo_u32 v18, v3, v9 -; CGP-NEXT: v_mul_hi_u32 v19, v2, v9 -; CGP-NEXT: v_mul_hi_u32 v9, v3, v9 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_mul_lo_u32 v10, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v13, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v14, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v15, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_mul_lo_u32 v16, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v17, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v18, v2, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v17, v6 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v18, v7 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v16 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v18 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v19 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v11, v5, s4 +; CGP-NEXT: v_mul_hi_u32 v5, s4, v5 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 ; CGP-NEXT: v_mul_lo_u32 v12, v6, s4 ; CGP-NEXT: v_mul_hi_u32 v6, s4, v6 -; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v13, v7, s4 -; CGP-NEXT: v_mul_hi_u32 v7, s4, v7 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_mul_lo_u32 v7, v7, s4 ; CGP-NEXT: v_mul_lo_u32 v8, v8, s4 -; CGP-NEXT: v_mul_lo_u32 v9, v9, s4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v12 -; CGP-NEXT: v_subb_u32_e64 v8, vcc, v1, v6, s[4:5] -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 +; CGP-NEXT: v_subb_u32_e64 v7, vcc, v1, v5, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 -; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v13 -; CGP-NEXT: v_subb_u32_e64 v9, vcc, v3, v7, s[6:7] -; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12 +; CGP-NEXT: v_subb_u32_e64 v8, vcc, v3, v6, s[6:7] +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v2, v4 -; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v8 -; CGP-NEXT: v_cndmask_b32_e64 v6, v5, v6, s[8:9] +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v9, vcc, v2, v4 +; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[8:9] ; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; CGP-NEXT: v_cndmask_b32_e64 v7, v5, v7, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 +; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5] ; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7] -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] ; CGP-NEXT: s_mov_b64 s[4:5], vcc -; CGP-NEXT: v_subrev_i32_e32 v12, vcc, 0x12d8fb, v10 -; CGP-NEXT: v_sub_i32_e64 v13, s[6:7], v0, v4 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9 +; CGP-NEXT: v_sub_i32_e64 v12, s[6:7], v0, v4 ; CGP-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7] -; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v4 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] ; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 -; CGP-NEXT: v_cndmask_b32_e64 v14, v5, v14, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v13, v4 -; CGP-NEXT: v_subbrev_u32_e64 v15, s[4:5], 0, v1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v13, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4 +; CGP-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5] ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 -; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] -; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; CGP-NEXT: v_cndmask_b32_e32 v4, v13, v4, vcc -; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v5, v10, v12, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5] +; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 +; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[4:5] -; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7 -; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i64> %num, ret <2 x i64> %result @@ -2408,245 +2400,243 @@ ; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v6 -; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 -; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000 -; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v1 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v1 -; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_mov_b32_e32 v4, s6 -; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v0 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v0 -; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], 0, 0, vcc -; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; GISEL-NEXT: v_mac_f32_e32 v9, 0x4f800000, v5 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v9 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v1 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 +; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v0 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v0 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 -; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v5 -; GISEL-NEXT: v_mul_f32_e32 v12, 0x2f800000, v6 -; GISEL-NEXT: v_trunc_f32_e32 v9, v9 -; GISEL-NEXT: v_trunc_f32_e32 v12, v12 -; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v9 -; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v12 -; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v12 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7 +; GISEL-NEXT: v_trunc_f32_e32 v8, v8 +; GISEL-NEXT: v_trunc_f32_e32 v11, v11 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 -; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5 -; GISEL-NEXT: v_mul_lo_u32 v16, v8, v5 -; GISEL-NEXT: v_mul_hi_u32 v17, v7, v5 -; GISEL-NEXT: v_mul_lo_u32 v18, v10, v6 -; GISEL-NEXT: v_mul_lo_u32 v19, v11, v6 -; GISEL-NEXT: v_mul_hi_u32 v20, v10, v6 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; GISEL-NEXT: v_mul_lo_u32 v16, v9, v15 -; GISEL-NEXT: v_mul_hi_u32 v21, v5, v15 -; GISEL-NEXT: v_mul_hi_u32 v15, v9, v15 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v18 -; GISEL-NEXT: v_mul_hi_u32 v22, v6, v18 -; GISEL-NEXT: v_mul_hi_u32 v18, v12, v18 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v20 -; GISEL-NEXT: v_mul_lo_u32 v17, v5, v13 -; GISEL-NEXT: v_mul_lo_u32 v20, v9, v13 -; GISEL-NEXT: v_mul_hi_u32 v23, v5, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 -; GISEL-NEXT: v_mul_lo_u32 v24, v6, v14 -; GISEL-NEXT: v_mul_lo_u32 v25, v12, v14 -; GISEL-NEXT: v_mul_hi_u32 v26, v6, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v15 -; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v24 -; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v25, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v23 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v22 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v26 -; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v20, v21 -; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v19 -; GISEL-NEXT: v_add_i32_e32 v20, vcc, v25, v22 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6 +; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7 +; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 +; GISEL-NEXT: v_mul_hi_u32 v20, v6, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; GISEL-NEXT: v_mul_lo_u32 v18, v11, v17 +; GISEL-NEXT: v_mul_hi_u32 v21, v7, v17 +; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 +; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12 +; GISEL-NEXT: v_mul_lo_u32 v19, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v22, v6, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v23, v7, v13 +; GISEL-NEXT: v_mul_lo_u32 v24, v11, v13 +; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; GISEL-NEXT: v_add_i32_e32 v17, vcc, v20, v19 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15 -; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc -; GISEL-NEXT: v_mul_lo_u32 v13, v7, v5 -; GISEL-NEXT: v_mul_lo_u32 v8, v8, v5 -; GISEL-NEXT: v_mul_hi_u32 v15, v7, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18 -; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v12, v14, vcc -; GISEL-NEXT: v_mul_lo_u32 v14, v10, v6 -; GISEL-NEXT: v_mul_lo_u32 v11, v11, v6 -; GISEL-NEXT: v_mul_hi_u32 v16, v10, v6 -; GISEL-NEXT: v_mul_lo_u32 v7, v7, v9 -; GISEL-NEXT: v_mul_lo_u32 v17, v9, v13 -; GISEL-NEXT: v_mul_hi_u32 v18, v5, v13 -; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 -; GISEL-NEXT: v_mul_lo_u32 v10, v10, v12 -; GISEL-NEXT: v_mul_lo_u32 v19, v12, v14 -; GISEL-NEXT: v_mul_hi_u32 v20, v6, v14 -; GISEL-NEXT: v_mul_hi_u32 v14, v12, v14 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v10 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v16 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 -; GISEL-NEXT: v_mul_lo_u32 v11, v9, v7 -; GISEL-NEXT: v_mul_hi_u32 v15, v5, v7 -; GISEL-NEXT: v_mul_hi_u32 v7, v9, v7 -; GISEL-NEXT: v_mul_lo_u32 v16, v6, v8 -; GISEL-NEXT: v_mul_lo_u32 v21, v12, v8 -; GISEL-NEXT: v_mul_hi_u32 v22, v6, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v14, vcc, v21, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23 +; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 -; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v16 -; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20 +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18 +; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v15 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 -; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v9, v7, vcc -; GISEL-NEXT: v_mul_lo_u32 v9, 0, v5 -; GISEL-NEXT: v_mul_hi_u32 v10, v3, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 -; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v12, v8, vcc -; GISEL-NEXT: v_mul_lo_u32 v11, 0, v6 -; GISEL-NEXT: v_mul_hi_u32 v12, v2, v6 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6 +; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 +; GISEL-NEXT: v_mul_hi_u32 v15, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, v8 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v18, v11, v13 +; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13 +; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v9 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; GISEL-NEXT: v_mul_lo_u32 v9, v6, v4 +; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 +; GISEL-NEXT: v_mul_hi_u32 v14, v6, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v20, v11, v5 +; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc +; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6 ; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 -; GISEL-NEXT: v_mul_lo_u32 v13, v3, v7 -; GISEL-NEXT: v_mul_lo_u32 v14, 0, v7 -; GISEL-NEXT: v_mul_hi_u32 v15, v3, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7 +; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7 ; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7 -; GISEL-NEXT: v_mul_lo_u32 v16, v2, v8 -; GISEL-NEXT: v_mul_lo_u32 v17, 0, v8 -; GISEL-NEXT: v_mul_hi_u32 v18, v2, v8 -; GISEL-NEXT: v_mul_hi_u32 v8, 0, v8 -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v12, v3, v4 +; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4 +; GISEL-NEXT: v_mul_hi_u32 v14, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4 +; GISEL-NEXT: v_mul_lo_u32 v15, v2, v5 +; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5 +; GISEL-NEXT: v_mul_hi_u32 v17, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v13, v6 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v14, v5 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v17, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GISEL-NEXT: v_mul_lo_u32 v10, v1, v5 -; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 -; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 -; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6 -; GISEL-NEXT: v_mul_lo_u32 v14, 0, v6 -; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; GISEL-NEXT: v_mul_lo_u32 v7, v1, v7 -; GISEL-NEXT: v_mul_lo_u32 v8, v0, v8 -; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 -; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8 -; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v10 -; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], 0, v5, vcc -; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], 0, v5 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v13, 0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v0, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v4, v1, v4 +; GISEL-NEXT: v_mul_lo_u32 v5, v0, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9 +; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v4, vcc +; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], 0, v4 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v12 -; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], 0, v6, s[4:5] -; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11 +; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], 0, v5 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 -; GISEL-NEXT: v_cndmask_b32_e64 v8, v4, v8, s[6:7] -; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v4, v10, vcc -; GISEL-NEXT: v_subbrev_u32_e64 v6, vcc, 0, v6, s[4:5] -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v3, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v7, -1, v7, s[6:7] +; GISEL-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v3, v1 +; GISEL-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v2, v0 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v1 -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v2, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v13, v0 -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v11, vcc +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v10, v1 +; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v4, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v12, v4, v12, vcc -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v11, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v13, -1, v13, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v12, v0 ; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v5, vcc -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v12, v0, s[4:5] ; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v13, v0 -; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v6, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 -; GISEL-NEXT: v_cndmask_b32_e64 v4, v13, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v15, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v6, v14, s[4:5] -; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v15, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_urem_v2i64_24bit: Index: llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -131,65 +131,35 @@ ; FIXME: DAG and GlobalISel return different values for i1 true define i1 @allflags_f16(half %x) nounwind { -; GFX7SELDAG-LABEL: allflags_f16: -; GFX7SELDAG: ; %bb.0: -; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7SELDAG-NEXT: v_mov_b32_e32 v0, 1 -; GFX7SELDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX7GLISEL-LABEL: allflags_f16: -; GFX7GLISEL: ; %bb.0: -; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, -1 -; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX8SELDAG-LABEL: allflags_f16: -; GFX8SELDAG: ; %bb.0: -; GFX8SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8SELDAG-NEXT: v_mov_b32_e32 v0, 1 -; GFX8SELDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8GLISEL-LABEL: allflags_f16: -; GFX8GLISEL: ; %bb.0: -; GFX8GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8GLISEL-NEXT: v_mov_b32_e32 v0, -1 -; GFX8GLISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX9SELDAG-LABEL: allflags_f16: -; GFX9SELDAG: ; %bb.0: -; GFX9SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9SELDAG-NEXT: v_mov_b32_e32 v0, 1 -; GFX9SELDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9GLISEL-LABEL: allflags_f16: -; GFX9GLISEL: ; %bb.0: -; GFX9GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9GLISEL-NEXT: v_mov_b32_e32 v0, -1 -; GFX9GLISEL-NEXT: s_setpc_b64 s[30:31] +; GFX7CHECK-LABEL: allflags_f16: +; GFX7CHECK: ; %bb.0: +; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7CHECK-NEXT: v_mov_b32_e32 v0, 1 +; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] ; -; GFX10SELDAG-LABEL: allflags_f16: -; GFX10SELDAG: ; %bb.0: -; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, 1 -; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] +; GFX8CHECK-LABEL: allflags_f16: +; GFX8CHECK: ; %bb.0: +; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8CHECK-NEXT: v_mov_b32_e32 v0, 1 +; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] ; -; GFX10GLISEL-LABEL: allflags_f16: -; GFX10GLISEL: ; %bb.0: -; GFX10GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10GLISEL-NEXT: v_mov_b32_e32 v0, -1 -; GFX10GLISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9CHECK-LABEL: allflags_f16: +; GFX9CHECK: ; %bb.0: +; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 1 +; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] ; -; GFX11SELDAG-LABEL: allflags_f16: -; GFX11SELDAG: ; %bb.0: -; GFX11SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11SELDAG-NEXT: v_mov_b32_e32 v0, 1 -; GFX11SELDAG-NEXT: s_setpc_b64 s[30:31] +; GFX10CHECK-LABEL: allflags_f16: +; GFX10CHECK: ; %bb.0: +; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 1 +; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] ; -; GFX11GLISEL-LABEL: allflags_f16: -; GFX11GLISEL: ; %bb.0: -; GFX11GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11GLISEL-NEXT: v_mov_b32_e32 v0, -1 -; GFX11GLISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11CHECK-LABEL: allflags_f16: +; GFX11CHECK: ; %bb.0: +; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 1 +; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1023) ; 0x3ff ret i1 %1 }