diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -703,7 +703,6 @@ std::pair> haveIdenticalTripCounts(const FusionCandidate &FC0, const FusionCandidate &FC1) const { - const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L); if (isa(TripCount0)) { UncomputableTripCount++; @@ -1040,6 +1039,112 @@ return Fused; } + // Returns true if the instruction \p I can be hoisted to the end of the + // preheader of \p FC0. \p SafeToHoist contains the instructions that are + // known to be safe to hoist. The instructions encountered that cannot be + // hoisted are in \p NotHoisting. + // TODO: Move functionality into CodeMoverUtils + bool canHoistInst(Instruction &I, + const SmallVector &SafeToHoist, + const SmallVector &NotHoisting, + const FusionCandidate &FC0) const { + const BasicBlock *FC0PreheaderTarget = FC0.Preheader->getSingleSuccessor(); + assert(FC0PreheaderTarget && + "Expected single successor for loop preheader."); + + for (Use &Op : I.operands()) { + if (auto *OpInst = dyn_cast(Op)) { + bool OpHoisted = is_contained(SafeToHoist, OpInst); + // Check if we have already decided to hoist this operand. In this + // case, it does not dominate FC0 *yet*, but will after we hoist it. + if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) { + return false; + } + } + } + + // If this isn't a memory inst, hoisting is safe + if (!I.mayReadOrWriteMemory()) + return true; + + LLVM_DEBUG(dbgs() << "Checking if this mem inst can be hoisted.\n"); + for (Instruction *NotHoistedInst : NotHoisting) { + if (auto D = DI.depends(&I, NotHoistedInst, true)) { + // Dependency is not read-before-write, write-before-read or + // write-before-write + if (D->isFlow() || D->isAnti() || D->isOutput()) { + LLVM_DEBUG(dbgs() << "Inst depends on an instruction in FC1's " + "preheader that is not being hoisted.\n"); + return false; + } + } + } + + for (Instruction *ReadInst : FC0.MemReads) { + if (auto D = DI.depends(ReadInst, &I, true)) { + // Dependency is not read-before-write + if (D->isAnti()) { + LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC0.\n"); + return false; + } + } + } + + for (Instruction *WriteInst : FC0.MemWrites) { + if (auto D = DI.depends(WriteInst, &I, true)) { + // Dependency is not write-before-read or write-before-write + if (D->isFlow() || D->isOutput()) { + LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC0.\n"); + return false; + } + } + } + return true; + } + + // Returns true if the instruction \p I can be sunk to the top of the exit + // block of \p FC1. + // TODO: Move functionality into CodeMoverUtils + bool canSinkInst(Instruction &I, const FusionCandidate &FC1) const { + for (User *U : I.users()) { + if (auto *UI{dyn_cast(U)}) { + // Cannot sink if user in loop + // If FC1 has phi users of this value, we cannot sink it into FC1. + if (FC1.L->contains(UI)) { + // Cannot hoist or sink this instruction. No hoisting/sinking + // should take place, loops should not fuse + return false; + } + } + } + + // If this isn't a memory inst, sinking is safe + if (!I.mayReadOrWriteMemory()) + return true; + + for (Instruction *ReadInst : FC1.MemReads) { + if (auto D = DI.depends(&I, ReadInst, true)) { + // Dependency is not write-before-read + if (D->isFlow()) { + LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC1.\n"); + return false; + } + } + } + + for (Instruction *WriteInst : FC1.MemWrites) { + if (auto D = DI.depends(&I, WriteInst, true)) { + // Dependency is not write-before-write or read-before-write + if (D->isOutput() || D->isAnti()) { + LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC1.\n"); + return false; + } + } + } + + return true; + } + /// Collect instructions in the \p FC1 Preheader that can be hoisted /// to the \p FC0 Preheader or sunk into the \p FC1 Body bool collectMovablePreheaderInsts( @@ -1047,6 +1152,10 @@ SmallVector &SafeToHoist, SmallVector &SafeToSink) const { BasicBlock *FC1Preheader = FC1.Preheader; + // Save the instructions that are not being hoisted, so we know not to hoist + // mem insts that they dominate. + SmallVector NotHoisting; + for (Instruction &I : *FC1Preheader) { // Can't move a branch if (&I == FC1Preheader->getTerminator()) @@ -1055,52 +1164,33 @@ // TODO: The case of mayReadFromMemory we can handle but requires // additional work with a dependence analysis so for now we give // up on memory reads. - if (I.mayHaveSideEffects() || I.mayReadFromMemory()) { - LLVM_DEBUG(dbgs() << "Inst: " << I << " may have side-effects.\n"); + if (I.mayThrow() || !I.willReturn()) { + LLVM_DEBUG(dbgs() << "Inst: " << I << " may throw or won't return.\n"); return false; } LLVM_DEBUG(dbgs() << "Checking Inst: " << I << "\n"); - // First check if can be hoisted - // If the operands of this instruction dominate the FC0 Preheader - // target block, then it is safe to move them to the end of the FC0 - const BasicBlock *FC0PreheaderTarget = - FC0.Preheader->getSingleSuccessor(); - assert(FC0PreheaderTarget && - "Expected single successor for loop preheader."); - bool CanHoistInst = true; - for (Use &Op : I.operands()) { - if (auto *OpInst = dyn_cast(Op)) { - bool OpHoisted = is_contained(SafeToHoist, OpInst); - // Check if we have already decided to hoist this operand. In this - // case, it does not dominate FC0 *yet*, but will after we hoist it. - if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) { - CanHoistInst = false; - break; - } - } + if (I.isAtomic() || I.isVolatile()) { + LLVM_DEBUG( + dbgs() << "\tInstruction is volatile or atomic. Cannot move it.\n"); + return false; } - if (CanHoistInst) { + + if (canHoistInst(I, SafeToHoist, NotHoisting, FC0)) { SafeToHoist.push_back(&I); LLVM_DEBUG(dbgs() << "\tSafe to hoist.\n"); } else { LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n"); + NotHoisting.push_back(&I); - for (User *U : I.users()) { - if (auto *UI{dyn_cast(U)}) { - // Cannot sink if user in loop - // If FC1 has phi users of this value, we cannot sink it into FC1. - if (FC1.L->contains(UI)) { - // Cannot hoist or sink this instruction. No hoisting/sinking - // should take place, loops should not fuse - LLVM_DEBUG(dbgs() << "\tCould not sink.\n"); - return false; - } - } + if (canSinkInst(I, FC1)) { + SafeToSink.push_back(&I); + LLVM_DEBUG(dbgs() << "\tSafe to sink.\n"); + } else { + LLVM_DEBUG(dbgs() << "\tCould not sink.\n"); + return false; } - SafeToSink.push_back(&I); - LLVM_DEBUG(dbgs() << "\tSafe to sink.\n"); } } LLVM_DEBUG( @@ -1331,7 +1421,6 @@ const FusionCandidate &FC1, SmallVector &HoistInsts, SmallVector &SinkInsts) const { - // All preheader instructions except the branch must be hoisted or sunk assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 && "Attempting to sink and hoist preheader instructions, but not all " diff --git a/llvm/test/Transforms/LoopFusion/hoist_load.ll b/llvm/test/Transforms/LoopFusion/hoist_load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopFusion/hoist_load.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s +; REQUIRES: asserts +; CHECK: Safe to hoist. + +@A = common global [100 x i32] zeroinitializer, align 16 +define void @hoist_preheader(i32 %N) { +; CHECK-LABEL: @hoist_preheader( +; CHECK-NEXT: pre1: +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[BODY1:%.*]] +; CHECK: body1: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ] +; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ] +; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]] +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]] +; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]] +; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]] +; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +pre1: + %ptr = alloca i32 + br label %body1 + +body1: ; preds = %pre1, %body1 + %i = phi i32 [%i_next, %body1], [0, %pre1] + %i_next = add i32 1, %i + %cond = icmp ne i32 %i, %N + br i1 %cond, label %body1, label %pre2 + +pre2: + %b = load i32, i32 * %ptr + br label %body2 + +body2: ; preds = %pre2, %body2 + %i2 = phi i32 [%i_next2, %body2], [0, %pre2] + %i_next2 = add i32 1, %i2 + %cond2 = icmp ne i32 %i2, %N + br i1 %cond2, label %body2, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopFusion/hoist_store.ll b/llvm/test/Transforms/LoopFusion/hoist_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopFusion/hoist_store.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s +; REQUIRES: asserts +; CHECK: Safe to hoist. + +@A = common global [100 x i32] zeroinitializer, align 16 +define void @hoist_preheader(i32 %N) { +; CHECK-LABEL: @hoist_preheader( +; CHECK-NEXT: pre1: +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 3, i32* [[PTR]], align 4 +; CHECK-NEXT: br label [[BODY1:%.*]] +; CHECK: body1: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ] +; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ] +; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]] +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]] +; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]] +; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]] +; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +pre1: + %ptr = alloca i32 + br label %body1 + +body1: ; preds = %pre1, %body1 + %i = phi i32 [%i_next, %body1], [0, %pre1] + %i_next = add i32 1, %i + %cond = icmp ne i32 %i, %N + br i1 %cond, label %body1, label %pre2 + +pre2: + store i32 3, i32* %ptr + br label %body2 + +body2: ; preds = %pre2, %body2 + %i2 = phi i32 [%i_next2, %body2], [0, %pre2] + %i_next2 = add i32 1, %i2 + %cond2 = icmp ne i32 %i2, %N + br i1 %cond2, label %body2, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_atomic.ll copy from llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll copy to llvm/test/Transforms/LoopFusion/no_sink_hoist_atomic.ll --- a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll +++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_atomic.ll @@ -1,19 +1,17 @@ ; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s ; REQUIRES: asserts -; CHECK: have side-effects. ; CHECK: Could not hoist/sink all instructions -@A = common global [100 x i32] zeroinitializer, align 16 define void @sink_preheader(i32 %N) { ; CHECK:pre1: -; CHECK-NEXT: %ptr = alloca i32, align 4 +; CHECK-NEXT: %ptr = alloca i32 ; CHECK-NEXT: br label %body1 pre1: %ptr = alloca i32 br label %body1 ; CHECK:body1: -; CHECK-NOT: %stay = +; CHECK-NOT: store atomic i32 3, i32* %ptr seq_cst, align 4 body1: ; preds = %pre1, %body1 %i = phi i32 [%i_next, %body1], [0, %pre1] %i_next = add i32 1, %i @@ -21,13 +19,13 @@ br i1 %cond, label %body1, label %pre2 ; CHECK:pre2: -; CHECK-NEXT: store i32 3, i32* %ptr +; CHECK-NEXT: store atomic i32 3, i32* %ptr seq_cst, align 4 pre2: - store i32 3, i32* %ptr + store atomic i32 3, i32* %ptr seq_cst, align 4 br label %body2 ; CHECK: body2: -; CHECK-NOT: %stay = +; CHECK-NOT: store atomic i32 3, i32* %ptr seq_cst, align 4 body2: ; preds = %pre2, %body2 %i2 = phi i32 [%i_next2, %body2], [0, %pre2] %i_next2 = add i32 1, %i2 @@ -35,7 +33,6 @@ br i1 %cond2, label %body2, label %exit ; CHECK: exit: -; CHECK-NOT: %stay = exit: ret void } diff --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_load.ll copy from llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll copy to llvm/test/Transforms/LoopFusion/no_sink_hoist_load.ll --- a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll +++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_load.ll @@ -1,6 +1,5 @@ ; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s ; REQUIRES: asserts -; CHECK: have side-effects. ; CHECK: Could not hoist/sink all instructions @A = common global [100 x i32] zeroinitializer, align 16 @@ -9,7 +8,7 @@ ; CHECK-NEXT: %ptr = alloca i32, align 4 ; CHECK-NEXT: br label %body1 pre1: - %ptr = alloca i32 + %ptr = alloca i32, align 4 br label %body1 ; CHECK:body1: @@ -18,12 +17,13 @@ %i = phi i32 [%i_next, %body1], [0, %pre1] %i_next = add i32 1, %i %cond = icmp ne i32 %i, %N + store i32 3, i32* %ptr br i1 %cond, label %body1, label %pre2 ; CHECK:pre2: -; CHECK-NEXT: store i32 3, i32* %ptr +; CHECK-NEXT: %stay = load i32, i32* %ptr pre2: - store i32 3, i32* %ptr + %stay = load i32, i32* %ptr br label %body2 ; CHECK: body2: @@ -32,6 +32,7 @@ %i2 = phi i32 [%i_next2, %body2], [0, %pre2] %i_next2 = add i32 1, %i2 %cond2 = icmp ne i32 %i2, %N + store i32 3, i32* %ptr br i1 %cond2, label %body2, label %exit ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll --- a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll +++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll @@ -1,6 +1,5 @@ ; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s ; REQUIRES: asserts -; CHECK: have side-effects. ; CHECK: Could not hoist/sink all instructions @A = common global [100 x i32] zeroinitializer, align 16 @@ -9,15 +8,16 @@ ; CHECK-NEXT: %ptr = alloca i32, align 4 ; CHECK-NEXT: br label %body1 pre1: - %ptr = alloca i32 + %ptr = alloca i32, align 4 br label %body1 ; CHECK:body1: -; CHECK-NOT: %stay = +; CHECK-NOT: store i32 3, i32* %ptr body1: ; preds = %pre1, %body1 %i = phi i32 [%i_next, %body1], [0, %pre1] %i_next = add i32 1, %i %cond = icmp ne i32 %i, %N + %load1 = load i32, i32* %ptr br i1 %cond, label %body1, label %pre2 ; CHECK:pre2: @@ -27,15 +27,16 @@ br label %body2 ; CHECK: body2: -; CHECK-NOT: %stay = +; CHECK-NOT: store i32 3, i32* %ptr body2: ; preds = %pre2, %body2 %i2 = phi i32 [%i_next2, %body2], [0, %pre2] %i_next2 = add i32 1, %i2 %cond2 = icmp ne i32 %i2, %N + %load2 = load i32, i32* %ptr br i1 %cond2, label %body2, label %exit ; CHECK: exit: -; CHECK-NOT: %stay = +; CHECK-NOT: store i32 3, i32* %ptr exit: ret void } diff --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll --- a/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll +++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll @@ -1,6 +1,5 @@ ; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s ; REQUIRES: asserts -; CHECK: may have side-effects ; CHECK: Could not hoist/sink all instructions declare void @unknown_func() @@ -12,7 +11,7 @@ br label %body1 ; CHECK:body1: -; CHECK-NOT: %stay = +; CHECK-NOT: call void @unknown_func() body1: ; preds = %pre1, %body1 %i = phi i32 [%i_next, %body1], [0, %pre1] %i_next = add i32 1, %i @@ -26,7 +25,7 @@ br label %body2 ; CHECK: body2: -; CHECK-NOT: %stay = +; CHECK-NOT: call void @unknown_func() body2: ; preds = %pre2, %body2 %i2 = phi i32 [%i_next2, %body2], [0, %pre2] %i_next2 = add i32 1, %i2 @@ -34,7 +33,6 @@ br i1 %cond2, label %body2, label %exit ; CHECK: exit: -; CHECK-NOT: %stay = exit: ret void } diff --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_volatile.ll copy from llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll copy to llvm/test/Transforms/LoopFusion/no_sink_hoist_volatile.ll --- a/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll +++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_volatile.ll @@ -1,18 +1,19 @@ ; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s ; REQUIRES: asserts -; CHECK: may have side-effects ; CHECK: Could not hoist/sink all instructions declare void @unknown_func() define void @sink_preheader(i32 %N) { ; CHECK:pre1: +; CHECK-NEXT: %ptr = alloca i32 ; CHECK-NEXT: br label %body1 pre1: + %ptr = alloca i32 br label %body1 ; CHECK:body1: -; CHECK-NOT: %stay = +; CHECK-NOT: store volatile i32 3, i32* %ptr body1: ; preds = %pre1, %body1 %i = phi i32 [%i_next, %body1], [0, %pre1] %i_next = add i32 1, %i @@ -20,13 +21,13 @@ br i1 %cond, label %body1, label %pre2 ; CHECK:pre2: -; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: store volatile i32 3, i32* %ptr pre2: - call void @unknown_func() + store volatile i32 3, i32* %ptr br label %body2 ; CHECK: body2: -; CHECK-NOT: %stay = +; CHECK-NOT: store volatile i32 3, i32* %ptr body2: ; preds = %pre2, %body2 %i2 = phi i32 [%i_next2, %body2], [0, %pre2] %i_next2 = add i32 1, %i2 @@ -34,7 +35,6 @@ br i1 %cond2, label %body2, label %exit ; CHECK: exit: -; CHECK-NOT: %stay = exit: ret void } diff --git a/llvm/test/Transforms/LoopFusion/sink_load.ll b/llvm/test/Transforms/LoopFusion/sink_load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopFusion/sink_load.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s +; REQUIRES: asserts +; CHECK: Safe to sink. + +@A = common global [100 x i32] zeroinitializer, align 16 +define void @sink_preheader(i32 %N) { +; CHECK-LABEL: @sink_preheader( +; CHECK-NEXT: pre1: +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BODY1:%.*]] +; CHECK: body1: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ] +; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ] +; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]] +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]] +; CHECK-NEXT: store i32 3, i32* [[PTR]], align 4 +; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]] +; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]] +; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: ret void +; +pre1: + %ptr = alloca i32 + br label %body1 + +body1: ; preds = %pre1, %body1 + %i = phi i32 [%i_next, %body1], [0, %pre1] + %i_next = add i32 1, %i + %cond = icmp ne i32 %i, %N + store i32 3, i32* %ptr + br i1 %cond, label %body1, label %pre2 + +pre2: + %b = load i32, i32 * %ptr + br label %body2 + +body2: ; preds = %pre2, %body2 + %i2 = phi i32 [%i_next2, %body2], [0, %pre2] + %i_next2 = add i32 1, %i2 + %cond2 = icmp ne i32 %i2, %N + br i1 %cond2, label %body2, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopFusion/sink_store.ll b/llvm/test/Transforms/LoopFusion/sink_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopFusion/sink_store.ll @@ -0,0 +1,46 @@ +; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +@A = common global [100 x i32] zeroinitializer, align 16 +define void @sink_preheader(i32 %N) { +; CHECK-LABEL: @sink_preheader( +; CHECK-NEXT: pre1: +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BODY1:%.*]] +; CHECK: body1: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ] +; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ] +; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]] +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]] +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]] +; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]] +; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: store i32 3, i32* [[PTR]], align 4 +; CHECK-NEXT: ret void +; +pre1: + %ptr = alloca i32 + br label %body1 + +body1: ; preds = %pre1, %body1 + %i = phi i32 [%i_next, %body1], [0, %pre1] + %i_next = add i32 1, %i + %cond = icmp ne i32 %i, %N + %b = load i32, i32 * %ptr + br i1 %cond, label %body1, label %pre2 + +pre2: + store i32 3, i32* %ptr + br label %body2 + +body2: ; preds = %pre2, %body2 + %i2 = phi i32 [%i_next2, %body2], [0, %pre2] + %i_next2 = add i32 1, %i2 + %cond2 = icmp ne i32 %i2, %N + br i1 %cond2, label %body2, label %exit + +exit: + ret void +}