diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -736,6 +736,14 @@ bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info); + /// Transform G_ADD(x, -cst) to G_SUB(x, cst). + bool matchAddNegConstant(MachineInstr &MI, APInt &Cst); + bool applyAddNegConstant(MachineInstr &MI, APInt &Cst); + + /// Transform G_ADD(x, G_SUB(y, x)) to y. + /// Transform G_ADD(G_SUB(y, x), x) to y. + bool matchAddSubSameReg(MachineInstr &MI, Register &Src); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -898,6 +898,24 @@ [{ return Helper.matchCombineFMinMaxNaN(*${root}, ${info}); }]), (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${info}); }])>; +// Transform (add x, -cst) -> (sub x, cst) +def add_neg_constant_matchinfo : GIDefMatchData<"APInt">; +def add_neg_constant: GICombineRule< + (defs root:$root, add_neg_constant_matchinfo:$matchinfo), + (match (wip_match_opcode G_ADD):$root, + [{ return Helper.matchAddNegConstant(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyAddNegConstant(*${root}, ${matchinfo}); }])>; + +// Transform (add x, (sub y, x)) -> y +// Transform (add (sub y, x), x) -> y +def add_sub_reg_matchinfo : GIDefMatchData<"Register">; +def add_sub_reg: GICombineRule < + (defs root:$root, add_sub_reg_matchinfo:$matchinfo), + (match (wip_match_opcode G_ADD):$root, + [{ return Helper.matchAddSubSameReg(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, + ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -913,11 +931,13 @@ binop_same_val, binop_left_to_zero, binop_right_to_zero, p2i_to_i2p, i2p_to_p2i, anyext_trunc_fold, - fneg_fneg_fold, right_identity_one]>; + fneg_fneg_fold, right_identity_one, + add_sub_reg]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, overlapping_and, mulo_by_2, mulo_by_0, - addo_by_0, combine_minmax_nan]>; + addo_by_0, combine_minmax_nan, + add_neg_constant]>; def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5628,6 +5628,46 @@ return MatchNaN(1) || MatchNaN(2); } +bool CombinerHelper::matchAddNegConstant(MachineInstr &MI, APInt &Cst) { + assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD"); + Register CstSrc = MI.getOperand(2).getReg(); + MachineInstr *MaybeCst = MRI.getVRegDef(CstSrc); + + if (MaybeCst->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + Cst = MaybeCst->getOperand(1).getCImm()->getValue(); + return Cst.isNegative(); +} + +bool CombinerHelper::applyAddNegConstant(MachineInstr &MI, APInt &Cst) { + assert(MI.getOpcode() == TargetOpcode::G_ADD && Cst.isNegative() && + "Expected a G_ADD and negative constant"); + Register Dst = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(1).getReg(); + + Builder.setInstrAndDebugLoc(MI); + Builder.buildSub(Dst, Reg, Builder.buildConstant(MRI.getType(Reg), -Cst), + MI.getFlags()); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD"); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + // Helper lambda to check for opportunities for + // A + (B - A) -> B + // (B - A) + A -> B + auto CheckFold = [&](Register &MaybeSub, Register &MaybeSameReg) { + Register Reg; + return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) && + Reg == MaybeSameReg; + }; + return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add.mir @@ -0,0 +1,197 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s + +--- +name: add_neg_constant +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: add_neg_constant + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C]] + ; CHECK-NEXT: $w0 = COPY [[SUB]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_CONSTANT i32 -1 + %2:_(s32) = G_ADD %0, %1 + $w0 = COPY %2 +... +--- +name: add_neg_constant_wide +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + ; CHECK-LABEL: name: add_neg_constant_wide + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s128) = G_SUB [[COPY]], [[C]] + ; CHECK-NEXT: $q0 = COPY [[SUB]](s128) + %0:_(s128) = COPY $q0 + %1:_(s128) = G_CONSTANT i128 -1 + %2:_(s128) = G_ADD %0, %1 + $q0 = COPY %2 +... +--- +name: add_neg_constant_vec +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: add_neg_constant_vec + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<4 x s16>) = G_ADD [[COPY]], [[BUILD_VECTOR]] + ; CHECK-NEXT: $x0 = COPY [[ADD]](<4 x s16>) + %0:_(<4 x s16>) = COPY $x0 + %1:_(s16) = G_CONSTANT i16 -1 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_ADD %0, %2 + $x0 = COPY %3(<4 x s16>) +... +--- +name: add_lhs_sub_reg +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: add_lhs_sub_reg + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_SUB %0, %1 + %3:_(s32) = G_ADD %2, %1 + $w0 = COPY %3 +... +--- +name: add_lhs_sub_reg_wide +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: add_lhs_sub_reg_wide + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $q0 + ; CHECK-NEXT: $q0 = COPY [[COPY]](s128) + %0:_(s128) = COPY $q0 + %1:_(s128) = COPY $q1 + %2:_(s128) = G_SUB %0, %1 + %3:_(s128) = G_ADD %2, %1 + $q0 = COPY %3 +... +--- +name: add_lhs_sub_reg_vec +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: add_lhs_sub_reg_vec + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0 + ; CHECK-NEXT: $x0 = COPY [[COPY]](<4 x s16>) + %0:_(<4 x s16>) = COPY $x0 + %1:_(<4 x s16>) = COPY $x1 + %2:_(<4 x s16>) = G_SUB %0, %1 + %3:_(<4 x s16>) = G_ADD %2, %1 + $x0 = COPY %3 +... +--- +name: add_rhs_sub_reg +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: add_rhs_sub_reg + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_SUB %0, %1 + %3:_(s32) = G_ADD %1, %2 + $w0 = COPY %3 +... +--- +name: add_rhs_sub_reg_wide +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: add_rhs_sub_reg_wide + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $q0 + ; CHECK-NEXT: $q0 = COPY [[COPY]](s128) + %0:_(s128) = COPY $q0 + %1:_(s128) = COPY $q1 + %2:_(s128) = G_SUB %0, %1 + %3:_(s128) = G_ADD %1, %2 + $q0 = COPY %3 +... +--- +name: add_rhs_sub_reg_vec +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: add_rhs_sub_reg_vec + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0 + ; CHECK-NEXT: $x0 = COPY [[COPY]](<4 x s16>) + %0:_(<4 x s16>) = COPY $x0 + %1:_(<4 x s16>) = COPY $x1 + %2:_(<4 x s16>) = G_SUB %0, %1 + %3:_(<4 x s16>) = G_ADD %1, %2 + $x0 = COPY %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll @@ -126,9 +126,9 @@ ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v2, 0xffffffc0 -; GFX8-NEXT: v_add_u16_e32 v1, 0xffc0, v0 -; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 64 +; GFX8-NEXT: v_subrev_u16_e32 v1, 64, v0 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -154,7 +154,7 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 4 -; GFX8-NEXT: v_add_u16_e32 v1, 0xffc0, v0 +; GFX8-NEXT: v_subrev_u16_e32 v1, 64, v0 ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -180,10 +180,10 @@ ; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0 -; GFX8-NEXT: v_add_u16_e32 v2, 4, v0 -; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, 64 +; GFX8-NEXT: v_add_u16_e32 v1, 4, v0 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_add_v2i16_neg_inline_imm_hi: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir @@ -73,9 +73,8 @@ ; GCN-NEXT: %shift_amt:_(s32) = COPY $vgpr1 ; GCN-NEXT: %one:_(s32) = G_CONSTANT i32 1 ; GCN-NEXT: %one_bit:_(s32) = G_SHL %one, %shift_amt(s32) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD %one_bit, [[C]] - ; GCN-NEXT: %rem:_(s32) = G_AND %var, [[ADD]] + ; GCN-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB %one_bit, %one + ; GCN-NEXT: %rem:_(s32) = G_AND %var, [[SUB]] ; GCN-NEXT: $vgpr0 = COPY %rem(s32) %var:_(s32) = COPY $vgpr0 %shift_amt:_(s32) = COPY $vgpr1 @@ -99,9 +98,8 @@ ; GCN-NEXT: %shiftamt:_(s32) = COPY $vgpr2 ; GCN-NEXT: %one:_(s64) = G_CONSTANT i64 1 ; GCN-NEXT: %one_bit:_(s64) = G_SHL %one, %shiftamt(s32) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %one_bit, [[C]] - ; GCN-NEXT: %rem:_(s64) = G_AND %var, [[ADD]] + ; GCN-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB %one_bit, %one + ; GCN-NEXT: %rem:_(s64) = G_AND %var, [[SUB]] ; GCN-NEXT: $vgpr0_vgpr1 = COPY %rem(s64) %var:_(s64) = COPY $vgpr0_vgpr1 %shiftamt:_(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -1254,7 +1254,7 @@ ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_mov_b32 s0, s2 ; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_add_i32 m0, s18, -1 +; GCN-NEXT: s_sub_i32 m0, s18, 1 ; GCN-NEXT: s_mov_b32 s2, s4 ; GCN-NEXT: s_mov_b32 s3, s5 ; GCN-NEXT: s_mov_b32 s4, s6 @@ -1276,7 +1276,7 @@ ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_add_i32 m0, s18, -1 +; GFX10-NEXT: s_sub_i32 m0, s18, 1 ; GFX10-NEXT: s_mov_b32 s2, s4 ; GFX10-NEXT: s_mov_b32 s3, s5 ; GFX10-NEXT: s_mov_b32 s4, s6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll @@ -50,8 +50,8 @@ ; LOOP-NEXT: buffer_load_ubyte v8, v[6:7], s[0:3], 0 addr64 ; LOOP-NEXT: v_add_i32_e32 v6, vcc, v0, v4 ; LOOP-NEXT: v_addc_u32_e32 v7, vcc, v1, v5, vcc -; LOOP-NEXT: v_add_i32_e32 v4, vcc, -1, v4 -; LOOP-NEXT: v_addc_u32_e32 v5, vcc, -1, v5, vcc +; LOOP-NEXT: v_subrev_i32_e32 v4, vcc, 1, v4 +; LOOP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc ; LOOP-NEXT: v_cmp_eq_u32_e32 vcc, -1, v4 ; LOOP-NEXT: s_waitcnt vmcnt(0) ; LOOP-NEXT: buffer_store_byte v8, v[6:7], s[0:3], 0 addr64 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -928,7 +928,7 @@ ; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 24 ; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v0 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 23, v3 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0xff800000, v0 +; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, 0x800000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -982,7 +982,7 @@ ; GFX8-NEXT: s_cselect_b32 s1, 1, 0 ; GFX8-NEXT: s_xor_b32 s0, s1, s0 ; GFX8-NEXT: s_ashr_i32 s1, s3, 23 -; GFX8-NEXT: s_add_i32 s1, s1, 0xff800000 +; GFX8-NEXT: s_sub_i32 s1, s1, 0x800000 ; GFX8-NEXT: s_and_b32 s0, s0, 1 ; GFX8-NEXT: s_cmp_lg_u32 s0, 0 ; GFX8-NEXT: s_cselect_b32 s0, s1, s2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -928,7 +928,7 @@ ; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 24 ; GFX8-NEXT: v_cmp_lt_i32_e64 s[6:7], 0, v0 ; GFX8-NEXT: v_ashrrev_i32_e32 v0, 23, v3 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0xff800000, v0 +; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, 0x800000, v0 ; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -982,7 +982,7 @@ ; GFX8-NEXT: s_cselect_b32 s1, 1, 0 ; GFX8-NEXT: s_xor_b32 s0, s1, s0 ; GFX8-NEXT: s_ashr_i32 s1, s3, 23 -; GFX8-NEXT: s_add_i32 s1, s1, 0xff800000 +; GFX8-NEXT: s_sub_i32 s1, s1, 0x800000 ; GFX8-NEXT: s_and_b32 s0, s0, 1 ; GFX8-NEXT: s_cmp_lg_u32 s0, 0 ; GFX8-NEXT: s_cselect_b32 s0, s1, s2