Index: lib/Transforms/Scalar/LoopRotation.cpp =================================================================== --- lib/Transforms/Scalar/LoopRotation.cpp +++ lib/Transforms/Scalar/LoopRotation.cpp @@ -169,6 +169,27 @@ } } +static bool shouldRotateLoopExitingLatch(Loop *L) { + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + + // Look for a phi which is only used outside the loop (via a LCSSA phi) + // And only has uses from other phis. This often means that rotating the + // loop can remove the phi. + for (auto &Phi : Header->phis()) { + if (llvm::any_of(Phi.users(), [L](const User *U) { + return !isa(U) || (isa(U) && + L->contains(cast(U))); + })) + continue; + if (!isa(Phi.getIncomingValueForBlock(Latch))) + continue; + return true; + } + + return false; +} + /// Rotate loop LP. Return true if the loop is rotated. /// /// \param SimplifiedLatch is true if the latch was just folded into the final @@ -203,8 +224,9 @@ return false; // Rotate if either the loop latch does *not* exit the loop, or if the loop - // latch was just simplified. - if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch) + // latch was just simplified. Or if we think it will be profitable. + if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && + !shouldRotateLoopExitingLatch(L)) return false; // Check size of original header and reject loop if it is very big or we can't Index: test/Transforms/LoopRotate/loopexitinglatch.ll =================================================================== --- /dev/null +++ test/Transforms/LoopRotate/loopexitinglatch.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.base-arm-none-eabi" + +%struct.List = type { %struct.List*, i32 } + +define void @list_add(%struct.List** nocapture %list, %struct.List* %data) { +; CHECK-LABEL: @list_add( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load %struct.List*, %struct.List** [[LIST:%.*]], align 4 +; CHECK-NEXT: [[VAL2:%.*]] = getelementptr inbounds [[STRUCT_LIST:%.*]], %struct.List* [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[VAL2]], align 4 +; CHECK-NEXT: [[VAL1:%.*]] = getelementptr inbounds [[STRUCT_LIST]], %struct.List* [[DATA:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[VAL1]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN_LR_PH:%.*]], label [[IF_ELSE6:%.*]] +; CHECK: if.then.lr.ph: +; CHECK-NEXT: br label [[IF_THEN:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[CURR_0:%.*]] = phi %struct.List* [ [[TMP5:%.*]], [[IF_THEN]] ] +; CHECK-NEXT: [[PREV_0:%.*]] = phi %struct.List* [ [[CURR_04:%.*]], [[IF_THEN]] ] +; CHECK-NEXT: [[VAL:%.*]] = getelementptr inbounds [[STRUCT_LIST]], %struct.List* [[CURR_0]], i32 0, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[VAL]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[VAL1]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN]], label [[FOR_COND_IF_ELSE6_CRIT_EDGE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[CURR_04]] = phi %struct.List* [ [[TMP0]], [[IF_THEN_LR_PH]] ], [ [[CURR_0]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LIST]], %struct.List* [[CURR_04]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5]] = load %struct.List*, %struct.List** [[NEXT]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq %struct.List* [[TMP5]], null +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[FOR_COND]] +; CHECK: if.else: +; CHECK-NEXT: [[NEXT_LCSSA:%.*]] = phi %struct.List** [ [[NEXT]], [[IF_THEN]] ] +; CHECK-NEXT: store %struct.List* [[DATA]], %struct.List** [[NEXT_LCSSA]], align 4 +; CHECK-NEXT: [[NEXT5:%.*]] = getelementptr inbounds [[STRUCT_LIST]], %struct.List* [[DATA]], i32 0, i32 0 +; CHECK-NEXT: store %struct.List* null, %struct.List** [[NEXT5]], align 4 +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.cond.if.else6_crit_edge: +; CHECK-NEXT: [[SPLIT:%.*]] = phi %struct.List* [ [[PREV_0]], [[FOR_COND]] ] +; CHECK-NEXT: br label [[IF_ELSE6]] +; CHECK: if.else6: +; CHECK-NEXT: [[PREV_0_LCSSA:%.*]] = phi %struct.List* [ [[SPLIT]], [[FOR_COND_IF_ELSE6_CRIT_EDGE]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TOBOOL7:%.*]] = icmp eq %struct.List* [[PREV_0_LCSSA]], null +; CHECK-NEXT: br i1 [[TOBOOL7]], label [[IF_ELSE12:%.*]], label [[IF_THEN8:%.*]] +; CHECK: if.then8: +; CHECK-NEXT: [[NEXT9:%.*]] = getelementptr inbounds [[STRUCT_LIST]], %struct.List* [[PREV_0_LCSSA]], i32 0, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.List* [[PREV_0_LCSSA]] to i32* +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast %struct.List* [[DATA]] to i32* +; CHECK-NEXT: store i32 [[TMP7]], i32* [[TMP8]], align 4 +; CHECK-NEXT: store %struct.List* [[DATA]], %struct.List** [[NEXT9]], align 4 +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: if.else12: +; CHECK-NEXT: [[TMP9:%.*]] = bitcast %struct.List** [[LIST]] to i32* +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.List* [[DATA]] to i32* +; CHECK-NEXT: store i32 [[TMP10]], i32* [[TMP11]], align 4 +; CHECK-NEXT: store %struct.List* [[DATA]], %struct.List** [[LIST]], align 4 +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %0 = load %struct.List*, %struct.List** %list, align 4 + br label %for.cond + +for.cond: ; preds = %if.then, %entry + %curr.0 = phi %struct.List* [ %0, %entry ], [ %3, %if.then ] + %prev.0 = phi %struct.List* [ null, %entry ], [ %curr.0, %if.then ] + %val = getelementptr inbounds %struct.List, %struct.List* %curr.0, i32 0, i32 1 + %1 = load i32, i32* %val, align 4 + %val1 = getelementptr inbounds %struct.List, %struct.List* %data, i32 0, i32 1 + %2 = load i32, i32* %val1, align 4 + %cmp = icmp slt i32 %1, %2 + br i1 %cmp, label %if.then, label %if.else6 + +if.then: ; preds = %for.cond + %next = getelementptr inbounds %struct.List, %struct.List* %curr.0, i32 0, i32 0 + %3 = load %struct.List*, %struct.List** %next, align 4 + %tobool = icmp eq %struct.List* %3, null + br i1 %tobool, label %if.else, label %for.cond + +if.else: ; preds = %if.then + %next.lcssa = phi %struct.List** [ %next, %if.then ] + store %struct.List* %data, %struct.List** %next.lcssa, align 4 + %next5 = getelementptr inbounds %struct.List, %struct.List* %data, i32 0, i32 0 + store %struct.List* null, %struct.List** %next5, align 4 + br label %for.end + +if.else6: ; preds = %for.cond + %prev.0.lcssa = phi %struct.List* [ %prev.0, %for.cond ] + %tobool7 = icmp eq %struct.List* %prev.0.lcssa, null + br i1 %tobool7, label %if.else12, label %if.then8 + +if.then8: ; preds = %if.else6 + %next9 = getelementptr inbounds %struct.List, %struct.List* %prev.0.lcssa, i32 0, i32 0 + %4 = bitcast %struct.List* %prev.0.lcssa to i32* + %5 = load i32, i32* %4, align 4 + %6 = bitcast %struct.List* %data to i32* + store i32 %5, i32* %6, align 4 + store %struct.List* %data, %struct.List** %next9, align 4 + br label %for.end + +if.else12: ; preds = %if.else6 + %7 = bitcast %struct.List** %list to i32* + %8 = load i32, i32* %7, align 4 + %9 = bitcast %struct.List* %data to i32* + store i32 %8, i32* %9, align 4 + store %struct.List* %data, %struct.List** %list, align 4 + br label %for.end + +for.end: ; preds = %if.else12, %if.then8, %if.else + ret void +}