Index: llvm/trunk/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/trunk/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -62,6 +62,9 @@ bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); + bool matchCombineBr(MachineInstr &MI); + bool tryCombineBr(MachineInstr &MI); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); Index: llvm/trunk/include/llvm/CodeGen/MachineOperand.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineOperand.h +++ llvm/trunk/include/llvm/CodeGen/MachineOperand.h @@ -684,6 +684,11 @@ Contents.RegMask = RegMaskPtr; } + void setPredicate(unsigned Predicate) { + assert(isPredicate() && "Wrong MachineOperand mutator"); + Contents.Pred = Predicate; + } + //===--------------------------------------------------------------------===// // Other methods. //===--------------------------------------------------------------------===// Index: llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -342,6 +342,68 @@ Observer.changedInstr(MI); } +bool CombinerHelper::matchCombineBr(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); + // Try to match the following: + // bb1: + // %c(s32) = G_ICMP pred, %a, %b + // %c1(s1) = G_TRUNC %c(s32) + // G_BRCOND %c1, %bb2 + // G_BR %bb3 + // bb2: + // ... + // bb3: + + // The above pattern does not have a fall through to the successor bb2, always + // resulting in a branch no matter which path is taken. Here we try to find + // and replace that pattern with conditional branch to bb3 and otherwise + // fallthrough to bb2. + + MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::iterator BrIt(MI); + if (BrIt == MBB->begin()) + return false; + assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator"); + + MachineInstr *BrCond = &*std::prev(BrIt); + if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) + return false; + + // Check that the next block is the conditional branch target. + if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) + return false; + + MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); + if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP || + !MRI.hasOneUse(CmpMI->getOperand(0).getReg())) + return false; + return true; +} + +bool CombinerHelper::tryCombineBr(MachineInstr &MI) { + if (!matchCombineBr(MI)) + return false; + MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); + MachineBasicBlock::iterator BrIt(MI); + MachineInstr *BrCond = &*std::prev(BrIt); + MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg()); + + CmpInst::Predicate InversePred = CmpInst::getInversePredicate( + (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate()); + + // Invert the G_ICMP condition. + Observer.changingInstr(*CmpMI); + CmpMI->getOperand(1).setPredicate(InversePred); + Observer.changedInstr(*CmpMI); + + // Change the conditional branch target. + Observer.changingInstr(*BrCond); + BrCond->getOperand(1).setMBB(BrTarget); + Observer.changedInstr(*BrCond); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; Index: llvm/trunk/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp @@ -45,6 +45,8 @@ return false; case TargetOpcode::COPY: return Helper.tryCombineCopy(MI); + case TargetOpcode::G_BR: + return Helper.tryCombineBr(MI); case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -26,10 +26,9 @@ ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1) - ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]] + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2 - ; CHECK: G_BR %bb.3 + ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.3 ; CHECK: bb.2.if.then: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir @@ -0,0 +1,81 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios5.0.0" + + define i32 @foo(i32 %a, i32 %b) { + entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + + if.then: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %a, %b + br label %return + + if.end: + %mul = mul nsw i32 %b, %b + %add2 = add nuw nsw i32 %mul, 2 + br label %return + + return: + %retval.0 = phi i32 [ %add1, %if.then ], [ %add2, %if.end ] + ret i32 %retval.0 + } + + +... +--- +name: foo +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %5:_(s32) = nsw G_ADD [[COPY1]], [[COPY]] + ; CHECK: %6:_(s32) = nsw G_ADD %5, [[COPY1]] + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.if.end: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: %7:_(s32) = nsw G_MUL [[COPY1]], [[COPY1]] + ; CHECK: %8:_(s32) = nuw nsw G_ADD %7, [[C1]] + ; CHECK: bb.3.return: + ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI %6(s32), %bb.1, %8(s32), %bb.2 + ; CHECK: $w0 = COPY [[PHI]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + bb.1.entry: + liveins: $w0, $w1 + + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_CONSTANT i32 0 + %5:_(s32) = G_CONSTANT i32 2 + %3:_(s1) = G_ICMP intpred(sgt), %0(s32), %2 + G_BRCOND %3(s1), %bb.2 + G_BR %bb.3 + + bb.2.if.then: + %7:_(s32) = nsw G_ADD %1, %0 + %8:_(s32) = nsw G_ADD %7, %1 + G_BR %bb.4 + + bb.3.if.end: + %4:_(s32) = nsw G_MUL %1, %1 + %6:_(s32) = nuw nsw G_ADD %4, %5 + + bb.4.return: + %10:_(s32) = G_PHI %8(s32), %bb.2, %6(s32), %bb.3 + $w0 = COPY %10(s32) + RET_ReallyLR implicit $w0 + +... Index: llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll +++ llvm/trunk/test/CodeGen/AArch64/speculation-hardening.ll @@ -26,12 +26,11 @@ ; NOSLH-NOT: csetm x16, ne %cmp = icmp slt i32 %call, %N br i1 %cmp, label %if.then, label %return -; GlobalISel lowers the branch to a b.ne sometimes instead of b.ge as expected.. -; CHECK: b.[[COND:(ge)|(lt)|(ne)]] +; CHECK: b.[[COND:(ge)|(lt)|(ne)|(eq)]] if.then: ; preds = %entry -; NOSLH-NOT: csel x16, x16, xzr, {{(lt)|(ge)|(eq)}} -; SLH-DAG: csel x16, x16, xzr, {{(lt)|(ge)|(eq)}} +; NOSLH-NOT: csel x16, x16, xzr, {{(lt)|(ge)|(eq)|(ne)}} +; SLH-DAG: csel x16, x16, xzr, {{(lt)|(ge)|(eq)|(ne)}} %idxprom = sext i32 %i to i64 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom %0 = load i8, i8* %arrayidx, align 1 @@ -136,7 +135,7 @@ %l7 = icmp sgt i32 %l0, %l1 br i1 %l7, label %then, label %else ; GlobalISel lowers the branch to a b.ne sometimes instead of b.ge as expected.. -; CHECK: b.[[COND:(le)|(gt)|(ne)]] +; CHECK: b.[[COND:(le)|(gt)|(ne)|(eq)]] then: ; SLH-DAG: csel x16, x16, xzr, [[COND]] @@ -144,7 +143,7 @@ br label %postif else: -; SLH-DAG: csel x16, x16, xzr, {{(gt)|(le)|(eq)}} +; SLH-DAG: csel x16, x16, xzr, {{(gt)|(le)|(eq)|(ne)}} %l11 = sdiv i32 %l1, %l0 br label %postif