diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -147,9 +147,10 @@ bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); bool applySextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); - bool matchElideBrByInvertingCond(MachineInstr &MI); - void applyElideBrByInvertingCond(MachineInstr &MI); - bool tryElideBrByInvertingCond(MachineInstr &MI); + /// If a brcond's true block is not the fallthrough, make it so by inverting + /// the condition and swapping operands. + bool matchOptBrCondByInvertingCond(MachineInstr &MI); + void applyOptBrCondByInvertingCond(MachineInstr &MI); /// If \p MI is G_CONCAT_VECTORS, try to combine it. /// Returns true if MI changed. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -245,5 +245,9 @@ /// the value \p Val contains a true value. bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP); + +/// Returns an integer representing true, as defined by the +/// TargetBooleanContents. +int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP); } // End namespace llvm. #endif diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -145,13 +145,11 @@ [{ return Helper.matchCombineIndexedLoadStore(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyCombineIndexedLoadStore(*${root}, ${matchinfo}); }])>; -// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of -// all_combines because it wasn't there. -def elide_br_by_inverting_cond : GICombineRule< +def opt_brcond_by_inverting_cond : GICombineRule< (defs root:$root), (match (wip_match_opcode G_BR):$root, - [{ return Helper.matchElideBrByInvertingCond(*${root}); }]), - (apply [{ Helper.applyElideBrByInvertingCond(*${root}); }])>; + [{ return Helper.matchOptBrCondByInvertingCond(*${root}); }]), + (apply [{ Helper.applyOptBrCondByInvertingCond(*${root}); }])>; def ptr_add_immed_matchdata : GIDefMatchData<"PtrAddChain">; def ptr_add_immed_chain : GICombineRule< @@ -416,4 +414,4 @@ shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, ext_ext_fold, - not_cmp_fold]>; + not_cmp_fold, opt_brcond_by_inverting_cond]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -881,14 +881,12 @@ LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); } -bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { +bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_BR) return false; // Try to match the following: // bb1: - // %c(s32) = G_ICMP pred, %a, %b - // %c1(s1) = G_TRUNC %c(s32) // G_BRCOND %c1, %bb2 // G_BR %bb3 // bb2: @@ -898,7 +896,7 @@ // The above pattern does not have a fall through to the successor bb2, always // resulting in a branch no matter which path is taken. Here we try to find // and replace that pattern with conditional branch to bb3 and otherwise - // fallthrough to bb2. + // fallthrough to bb2. This is generally better for branch predictors. MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator BrIt(MI); @@ -913,40 +911,34 @@ // Check that the next block is the conditional branch target. if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) return false; - - MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); - if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || - !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg())) - return false; return true; } -bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { - if (!matchElideBrByInvertingCond(MI)) - return false; - applyElideBrByInvertingCond(MI); - return true; -} - -void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { +void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); MachineBasicBlock::iterator BrIt(MI); MachineInstr *BrCond = &*std::prev(BrIt); - MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); - CmpInst::Predicate InversePred = CmpInst::getInversePredicate( - (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); + Builder.setInstrAndDebugLoc(*BrCond); + LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); + // FIXME: Does int/fp matter for this? If so, we might need to restrict + // this to i1 only since we might not know for sure what kind of + // compare generated the condition value. + auto True = Builder.buildConstant( + Ty, getICmpTrueVal(getTargetLowering(), false, false)); + auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True); - // Invert the G_ICMP condition. - Observer.changingInstr(*CmpMI); - CmpMI->getOperand(1).setPredicate(InversePred); - Observer.changedInstr(*CmpMI); + auto *FallthroughBB = BrCond->getOperand(1).getMBB(); + Observer.changingInstr(MI); + MI.getOperand(0).setMBB(FallthroughBB); + Observer.changedInstr(MI); - // Change the conditional branch target. + // Change the conditional branch to use the inverted condition and + // new target block. Observer.changingInstr(*BrCond); + BrCond->getOperand(0).setReg(Xor.getReg(0)); BrCond->getOperand(1).setMBB(BrTarget); Observer.changedInstr(*BrCond); - MI.eraseFromParent(); } static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -740,3 +740,15 @@ } llvm_unreachable("Invalid boolean contents"); } + +int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, + bool IsFP) { + switch (TLI.getBooleanContents(IsVector, IsFP)) { + case TargetLowering::UndefinedBooleanContent: + case TargetLowering::ZeroOrOneBooleanContent: + return 1; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return -1; + } + llvm_unreachable("Invalid boolean contents"); +} diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -19,7 +19,6 @@ def AArch64PreLegalizerCombinerHelper: GICombinerHelper< "AArch64GenPreLegalizerCombinerHelper", [all_combines, - elide_br_by_inverting_cond, fconstant_to_constant]> { let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; let StateClass = "AArch64PreLegalizerCombinerHelperState"; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -42,8 +42,7 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< - "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, - elide_br_by_inverting_cond]> { + "AMDGPUGenPreLegalizerCombinerHelper", [all_combines]> { let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll b/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/const-0.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -global-isel -O0 -o - %s | FileCheck %s - -%struct.comp = type { i8*, i32, i8*, [3 x i8], i32 } - -define void @regbranch() { -; CHECK-LABEL: regbranch: -; CHECK: mov {{w[0-9]+}}, #0 -cond_next240.i: - br i1 false, label %cond_true251.i, label %cond_next272.i - -cond_true251.i: - switch i8 0, label %cond_next272.i [ - i8 42, label %bb268.i - i8 43, label %bb268.i - i8 63, label %bb268.i - ] - -bb268.i: - br label %cond_next272.i - -cond_next272.i: - %len.2.i = phi i32 [ 0, %bb268.i ], [ 0, %cond_next240.i ], [ 0, %cond_true251.i ] - %tmp278.i = icmp eq i32 %len.2.i, 1 - ret void -} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="opt_brcond_by_inverting_cond" -global-isel -verify-machineinstrs %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" @@ -38,8 +38,11 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]] + ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]] + ; CHECK: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK: G_BR %bb.1 ; CHECK: bb.1.if.then: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY1]], [[COPY]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir @@ -8,6 +8,8 @@ define i16 @const_s16() { ret i16 42 } define i32 @const_s32() { ret i32 42 } define i64 @const_s64() { ret i64 1234567890123 } + define i32 @const_s32_zero() { ret i32 0 } + define i64 @const_s64_zero() { ret i64 0 } define i8* @const_p0_0() { ret i8* null } define i32 @fconst_s32() { ret i32 42 } @@ -81,6 +83,38 @@ $x0 = COPY %0(s64) ... +--- +name: const_s32_zero +legalized: true +regBankSelected: true +registers: + - { id: 0, class: gpr } + +body: | + bb.0: + ; CHECK-LABEL: name: const_s32_zero + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: $w0 = COPY [[COPY]] + %0(s32) = G_CONSTANT i32 0 + $w0 = COPY %0(s32) +... + +--- +name: const_s64_zero +legalized: true +regBankSelected: true +registers: + - { id: 0, class: gpr } + +body: | + bb.0: + ; CHECK-LABEL: name: const_s64_zero + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $xzr + ; CHECK: $x0 = COPY [[COPY]] + %0(s64) = G_CONSTANT i64 0 + $x0 = COPY %0(s64) +... + --- name: const_p0_0 legalized: true diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll @@ -52,9 +52,10 @@ ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 ; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_xor_b32 s0, s0, -1 ; GCN-NEXT: s_and_b32 s0, s0, 1 ; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_cbranch_scc0 BB3_2 +; GCN-NEXT: s_cbranch_scc1 BB3_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: flat_store_dword v[0:1], v0 @@ -80,9 +81,10 @@ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_and_b32 s0, s0, s1 +; GCN-NEXT: s_xor_b32 s0, s0, -1 ; GCN-NEXT: s_and_b32 s0, s0, 1 ; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_cbranch_scc0 BB4_2 +; GCN-NEXT: s_cbranch_scc1 BB4_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: flat_store_dword v[0:1], v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll @@ -51,11 +51,11 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_load_dword s0, s[4:5], 0x11 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_cmp_eq_u32 s1, s0 +; CI-NEXT: s_cmp_lg_u32 s1, s0 ; CI-NEXT: s_cselect_b32 s0, 1, 0 ; CI-NEXT: s_and_b32 s0, s0, 1 ; CI-NEXT: s_cmp_lg_u32 s0, 0 -; CI-NEXT: s_cbranch_scc0 BB1_2 +; CI-NEXT: s_cbranch_scc1 BB1_2 ; CI-NEXT: ; %bb.1: ; %bb0 ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: flat_store_dword v[0:1], v0 @@ -68,11 +68,11 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 -; GFX9-NEXT: s_cmp_eq_u32 s1, s0 +; GFX9-NEXT: s_cmp_lg_u32 s1, s0 ; GFX9-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-NEXT: s_cbranch_scc0 BB1_2 +; GFX9-NEXT: s_cbranch_scc1 BB1_2 ; GFX9-NEXT: ; %bb.1: ; %bb0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: global_store_dword v[0:1], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll @@ -51,11 +51,11 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_load_dword s0, s[4:5], 0x10 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_cmp_eq_u32 s1, s0 +; CI-NEXT: s_cmp_lg_u32 s1, s0 ; CI-NEXT: s_cselect_b32 s0, 1, 0 ; CI-NEXT: s_and_b32 s0, s0, 1 ; CI-NEXT: s_cmp_lg_u32 s0, 0 -; CI-NEXT: s_cbranch_scc0 BB1_2 +; CI-NEXT: s_cbranch_scc1 BB1_2 ; CI-NEXT: ; %bb.1: ; %bb0 ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: flat_store_dword v[0:1], v0 @@ -68,11 +68,11 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 -; GFX9-NEXT: s_cmp_eq_u32 s1, s0 +; GFX9-NEXT: s_cmp_lg_u32 s1, s0 ; GFX9-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-NEXT: s_cbranch_scc0 BB1_2 +; GFX9-NEXT: s_cbranch_scc1 BB1_2 ; GFX9-NEXT: ; %bb.1: ; %bb0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: global_store_dword v[0:1], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -29,9 +29,10 @@ ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: BB0_2: ; %Flow +; GFX9-NEXT: s_xor_b32 s0, s0, -1 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-NEXT: s_cbranch_scc0 BB0_4 +; GFX9-NEXT: s_cbranch_scc1 BB0_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: global_store_dword v[0:1], v0, off @@ -109,9 +110,10 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: BB1_2: ; %Flow +; GFX9-NEXT: s_xor_b32 s0, s0, -1 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 -; GFX9-NEXT: s_cbranch_scc0 BB1_4 +; GFX9-NEXT: s_cbranch_scc1 BB1_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -357,9 +357,10 @@ ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: BB1_3: ; %Flow -; CHECK-NEXT: s_and_b32 s0, s1, 1 +; CHECK-NEXT: s_xor_b32 s0, s1, -1 +; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc0 BB1_5 +; CHECK-NEXT: s_cbranch_scc1 BB1_5 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 ; CHECK-NEXT: s_sub_i32 s0, 0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -351,9 +351,10 @@ ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: BB1_3: ; %Flow -; CHECK-NEXT: s_and_b32 s0, s1, 1 +; CHECK-NEXT: s_xor_b32 s0, s1, -1 +; CHECK-NEXT: s_and_b32 s0, s0, 1 ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 -; CHECK-NEXT: s_cbranch_scc0 BB1_5 +; CHECK-NEXT: s_cbranch_scc1 BB1_5 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 ; CHECK-NEXT: s_sub_i32 s0, 0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -323,9 +323,10 @@ ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: BB1_3: ; %Flow -; CHECK-NEXT: s_and_b32 s1, s5, 1 +; CHECK-NEXT: s_xor_b32 s1, s5, -1 +; CHECK-NEXT: s_and_b32 s1, s1, 1 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc0 BB1_5 +; CHECK-NEXT: s_cbranch_scc1 BB1_5 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -319,9 +319,10 @@ ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 ; CHECK-NEXT: BB1_3: ; %Flow -; CHECK-NEXT: s_and_b32 s1, s5, 1 +; CHECK-NEXT: s_xor_b32 s1, s5, -1 +; CHECK-NEXT: s_and_b32 s1, s1, 1 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 -; CHECK-NEXT: s_cbranch_scc0 BB1_5 +; CHECK-NEXT: s_cbranch_scc1 BB1_5 ; CHECK-NEXT: ; %bb.4: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2