Index: lib/Target/AArch64/AArch64ConditionalCompares.cpp =================================================================== --- lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -51,6 +51,12 @@ static cl::opt Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11")); +// disable speculation of triangle when its tail is the only latch block +// of this loop. +static cl::opt DisableTriangleLatch( + "aarch64-ccmp-disable-triangle-latch", cl::init(false), cl::Hidden, + cl::desc("Disable when the tail block is a loop latch.")); + STATISTIC(NumConsidered, "Number of ccmps considered"); STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)"); STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)"); @@ -867,6 +873,18 @@ DEBUG(dbgs() << "Too many instructions to speculate.\n"); return false; } + + // Heuristic: If the tail is the only latch block for this loop then the + // compare conversion delays the loop backedge because we now execute ccmp + // instruction inside the critical path of the loop. + if (DisableTriangleLatch && Loops) + if (MachineLoop *ML = Loops->getLoopFor(CmpConv.Head)) + if (MachineBasicBlock *LatchBB = ML->getLoopLatch()) + if (LatchBB == CmpConv.Tail) { + DEBUG(dbgs() << "Won't speculate when tail block is a loop latch.\n"); + return false; + } + return true; } Index: test/CodeGen/AArch64/aarch64-ccmp-heuristics.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-ccmp-heuristics.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -mcpu=kryo -mtriple=aarch64--linux-gnu -verify-machineinstrs -aarch64-ccmp -aarch64-ccmp-disable-triangle-latch| FileCheck %s + +%struct.arc = type { i64, %struct.node*, %struct.node*, i32, %struct.arc*, %struct.arc*, i64, i64 } +%struct.node = type { i64, i32, %struct.node*, %struct.node*, %struct.node*, %struct.node*, %struct.arc*, %struct.arc*, %struct.arc*, %struct.arc*, i64, i64, i32, i32 } +%struct.basket = type { %struct.arc*, i64, i64 } + +; CHECK: foo +; CHECK: %if.then34 +; CHECK: cmp x{{[0-9]+}}, #1 +; CHECK-NEXT: b.ge +; CHECK: %if.then34.if.else.exit +; CHECK: cmp w{{[0-9]+}}, #2 +; CHECK-NEXT: b.ne +; Function Attrs: nounwind +define void @foo() #0 { +entry: + br label %for.body + +for.body: ; preds = %for.inc, %entry + %arc = phi %struct.arc* [ %add.ptr60, %for.inc ], [ undef, %entry ] + %ident32 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 3 + %ident32.load = load i32, i32* %ident32, align 8 + %cmp33 = icmp sgt i32 %ident32.load, 0 + br i1 %cmp33, label %if.then34, label %for.inc + +if.then34: ; preds = %for.body + %cost35 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 0 + %0 = load i64, i64* %cost35, align 8 + %tail36 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 1 + %1 = load %struct.node*, %struct.node** %tail36, align 8 + %potential37 = getelementptr inbounds %struct.node, %struct.node* %1, i64 0, i32 0 + %2 = load i64, i64* %potential37, align 8 + %sub38 = sub nsw i64 %0, %2 + %head39 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 0, i32 2 + %3= load %struct.node*, %struct.node** %head39, align 8 + %potential40 = getelementptr inbounds %struct.node, %struct.node* %3, i64 0, i32 0 + %4 = load i64, i64* %potential40, align 8 + %add41 = add nsw i64 %4, %sub38 + %cmp.i = icmp sgt i64 %add41, 0 + br i1 %cmp.i, label %land.lhs.true.i, label %if.then34.if.else.exit + +land.lhs.true.i: ; preds = %if.then34 + %cmp1.i = icmp eq i32 %ident32.load, 1 + br i1 %cmp1.i, label %if.then43, label %for.inc + +if.then34.if.else.exit: ; preds = %if.then34 + %cmp2.i = icmp sgt i64 %add41, 0 + %cmp4.i = icmp eq i32 %ident32.load, 2 + %cmp4.i. = and i1 %cmp4.i, %cmp2.i + br i1 %cmp4.i., label %if.then43, label %for.inc + +if.then43: ; preds = %if.then34 + %abs_cost56 = getelementptr inbounds %struct.basket, %struct.basket* undef, i64 0, i32 2 + br label %for.inc + +for.inc: ; preds = %if.then43, %if.then34, %for.body + %add.ptr60 = getelementptr inbounds %struct.arc, %struct.arc* %arc, i64 undef + br label %for.body +}