Index: llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h =================================================================== --- llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -100,6 +100,10 @@ /// increment at this position. Instruction *IVIncInsertPos; + // If this is non-null, do not hoist this instruction while hoisting IV + // increment. + Instruction *IVIncProhibitHoisting; + /// Phis that complete an IV chain. Reuse DenseSet> ChainedPhis; @@ -308,6 +312,12 @@ IVIncInsertPos = Pos; } + void setIVIncProhibitHoisting(Instruction *I = nullptr) { + assert(!CanonicalMode && + "IV increment hoisting is not supported in CanonicalMode"); + IVIncProhibitHoisting = I; + } + /// Enable post-inc expansion for addrecs referring to the given /// loops. Post-inc expansion is only supported in non-canonical mode. void setPostInc(const PostIncLoopSet &L) { Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5234,6 +5234,10 @@ // perform an advantageous expansion. Rewriter.setPostInc(LF.PostIncLoops); + // Inform Rewriter that we well need to update inctruction and it should not + // hoist it. + Rewriter.setIVIncProhibitHoisting(LF.UserInst); + // This is the type that the user actually needs. Type *OpTy = LF.OperandValToReplace->getType(); // This will be the type that we'll initially expand to. @@ -5355,6 +5359,7 @@ // We're done expanding now, so reset the rewriter. Rewriter.clearPostInc(); + Rewriter.setIVIncProhibitHoisting(); // An ICmpZero Formula represents an ICmp which we're handling as a // comparison against zero. Now that we've expanded an expression for that Index: llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp =================================================================== --- llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1052,6 +1052,9 @@ /// it available to other uses in this loop. Recursively hoist any operands, /// until we reach a value that dominates InsertPos. bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { + if (IncV == IVIncProhibitHoisting) + return false; + if (SE.DT.dominates(IncV, InsertPos)) return true; @@ -1295,7 +1298,7 @@ if (AddRecPhiMatch) { // Potentially, move the increment. We have made sure in // isExpandedAddRecExprPHI or hoistIVInc that this is possible. - if (L == IVIncInsertLoop) + if (L == IVIncInsertLoop && IncV != IVIncProhibitHoisting) hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); // Ok, the add recurrence looks usable. Index: llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -loop-reduce < %s | FileCheck %s + +; These are regression tests for PR43768. +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Test checks that LSR does not hoist increment of %val9 while expanding the other pieces of formula +; to original place in backedge causing incorrect SSA form. +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32 addrspace(3)* undef, align 4 +; CHECK-NEXT: [[VAL1:%.*]] = add i32 undef, 12 +; CHECK-NEXT: [[VAL2:%.*]] = trunc i64 undef to i32 +; CHECK-NEXT: [[VAL3:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[VAL4:%.*]] = sub i32 [[VAL]], [[VAL3]] +; CHECK-NEXT: [[VAL5:%.*]] = ashr i32 undef, undef +; CHECK-NEXT: [[VAL6:%.*]] = sub i32 [[VAL4]], [[VAL5]] +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[VAL]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL3]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[VAL5]], 7 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL6]], 3 +; CHECK-NEXT: br label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB32:%.*]] ], [ 0, [[BB:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB32]] ], [ -8, [[BB]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 8 +; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i32 [[LSR_IV1]], [[TMP5]] +; CHECK-NEXT: [[VAL10:%.*]] = icmp ult i64 [[LSR_IV_NEXT]], 65536 +; CHECK-NEXT: br i1 [[VAL10]], label [[BB12:%.*]], label [[BB11:%.*]] +; CHECK: bb11: +; CHECK-NEXT: unreachable +; CHECK: bb12: +; CHECK-NEXT: [[VAL14:%.*]] = icmp slt i32 undef, undef +; CHECK-NEXT: br i1 [[VAL14]], label [[BB17:%.*]], label [[BB12_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] +; CHECK: bb15splitsplitsplitsplitsplitsplit: +; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLITSPLIT:%.*]] +; CHECK: bb12.bb15splitsplitsplitsplitsplit_crit_edge: +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[VAL6]], [[LSR_IV1]] +; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLITSPLIT]] +; CHECK: bb15splitsplitsplitsplitsplit: +; CHECK-NEXT: [[VAL16_PH_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP6]], [[BB12_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE]] ], [ [[VAL35:%.*]], [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT:%.*]] ] +; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLIT:%.*]] +; CHECK: bb17.bb15splitsplitsplitsplit_crit_edge: +; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[VAL]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = shl i32 [[VAL5]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[LSR_IV1]] +; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLIT]] +; CHECK: bb15splitsplitsplitsplit: +; CHECK-NEXT: [[VAL16_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP13]], [[BB17_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLITSPLIT]] ] +; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT:%.*]] +; CHECK: bb20.bb15splitsplitsplit_crit_edge: +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[VAL]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[VAL5]], 3 +; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[LSR_IV1]] +; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT]] +; CHECK: bb15splitsplitsplit: +; CHECK-NEXT: [[VAL16_PH_PH_PH:%.*]] = phi i32 [ [[TMP20]], [[BB20_BB15SPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLIT]] ] +; CHECK-NEXT: br label [[BB15SPLITSPLIT:%.*]] +; CHECK: bb23.bb15splitsplit_crit_edge: +; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[VAL]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 2 +; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP21]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = shl i32 [[VAL5]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], [[LSR_IV1]] +; CHECK-NEXT: br label [[BB15SPLITSPLIT]] +; CHECK: bb15splitsplit: +; CHECK-NEXT: [[VAL16_PH_PH:%.*]] = phi i32 [ [[TMP27]], [[BB23_BB15SPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH]], [[BB15SPLITSPLITSPLIT]] ] +; CHECK-NEXT: br label [[BB15SPLIT:%.*]] +; CHECK: bb26.bb15split_crit_edge: +; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[VAL]], 5 +; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 5 +; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP28]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[VAL5]], 5 +; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], [[LSR_IV1]] +; CHECK-NEXT: br label [[BB15SPLIT]] +; CHECK: bb15split: +; CHECK-NEXT: [[VAL16_PH:%.*]] = phi i32 [ [[TMP34]], [[BB26_BB15SPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH]], [[BB15SPLITSPLIT]] ] +; CHECK-NEXT: br label [[BB15:%.*]] +; CHECK: bb29.bb15_crit_edge: +; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[VAL]], 6 +; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 6 +; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP35]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = mul i32 [[VAL5]], 6 +; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], [[LSR_IV1]] +; CHECK-NEXT: br label [[BB15]] +; CHECK: bb15: +; CHECK-NEXT: [[VAL16:%.*]] = phi i32 [ [[TMP41]], [[BB29_BB15_CRIT_EDGE:%.*]] ], [ [[VAL16_PH]], [[BB15SPLIT]] ] +; CHECK-NEXT: call void @widget() [ "deopt"(i32 [[VAL16]], i32 3, i32 [[VAL]]) ] +; CHECK-NEXT: unreachable +; CHECK: bb17: +; CHECK-NEXT: [[VAL19:%.*]] = icmp slt i32 undef, undef +; CHECK-NEXT: br i1 [[VAL19]], label [[BB20:%.*]], label [[BB17_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE]] +; CHECK: bb20: +; CHECK-NEXT: [[VAL22:%.*]] = icmp slt i32 undef, undef +; CHECK-NEXT: br i1 [[VAL22]], label [[BB23:%.*]], label [[BB20_BB15SPLITSPLITSPLIT_CRIT_EDGE]] +; CHECK: bb23: +; CHECK-NEXT: [[VAL25:%.*]] = icmp slt i32 undef, undef +; CHECK-NEXT: br i1 [[VAL25]], label [[BB26:%.*]], label [[BB23_BB15SPLITSPLIT_CRIT_EDGE]] +; CHECK: bb26: +; CHECK-NEXT: [[VAL28:%.*]] = icmp slt i32 undef, undef +; CHECK-NEXT: br i1 [[VAL28]], label [[BB29:%.*]], label [[BB26_BB15SPLIT_CRIT_EDGE]] +; CHECK: bb29: +; CHECK-NEXT: [[VAL31:%.*]] = icmp slt i32 undef, undef +; CHECK-NEXT: br i1 [[VAL31]], label [[BB32]], label [[BB29_BB15_CRIT_EDGE]] +; CHECK: bb32: +; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP4]], [[LSR_IV1]] +; CHECK-NEXT: [[VAL35]] = add i32 [[TMP42]], [[VAL6]] +; CHECK-NEXT: br i1 false, label [[BB7]], label [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT]] +; +bb: + %val = load i32, i32 addrspace(3)* undef, align 4 + %val1 = add i32 undef, 12 + %val2 = trunc i64 undef to i32 + %val3 = mul i32 %val1, %val2 + %val4 = sub i32 %val, %val3 + %val5 = ashr i32 undef, undef + %val6 = sub i32 %val4, %val5 + br label %bb7 + +bb7: ; preds = %bb32, %bb + %val8 = phi i64 [ 0, %bb ], [ %val34, %bb32 ] + %val9 = phi i32 [ 0, %bb ], [ %val35, %bb32 ] + %val10 = icmp ult i64 %val8, 65536 + br i1 %val10, label %bb12, label %bb11 + +bb11: ; preds = %bb7 + unreachable + +bb12: ; preds = %bb7 + %val13 = add i32 %val9, %val6 + %val14 = icmp slt i32 undef, undef + br i1 %val14, label %bb17, label %bb15 + +bb15: ; preds = %bb32, %bb29, %bb26, %bb23, %bb20, %bb17, %bb12 + %val16 = phi i32 [ %val35, %bb32 ], [ %val30, %bb29 ], [ %val27, %bb26 ], [ %val24, %bb23 ], [ %val21, %bb20 ], [ %val18, %bb17 ], [ %val13, %bb12 ] + call void @widget() [ "deopt"(i32 %val16, i32 3, i32 %val) ] + unreachable + +bb17: ; preds = %bb12 + %val18 = add i32 %val13, %val6 + %val19 = icmp slt i32 undef, undef + br i1 %val19, label %bb20, label %bb15 + +bb20: ; preds = %bb17 + %val21 = add i32 %val18, %val6 + %val22 = icmp slt i32 undef, undef + br i1 %val22, label %bb23, label %bb15 + +bb23: ; preds = %bb20 + %val24 = add i32 %val21, %val6 + %val25 = icmp slt i32 undef, undef + br i1 %val25, label %bb26, label %bb15 + +bb26: ; preds = %bb23 + %val27 = add i32 %val24, %val6 + %val28 = icmp slt i32 undef, undef + br i1 %val28, label %bb29, label %bb15 + +bb29: ; preds = %bb26 + %val30 = add i32 %val27, %val6 + %val31 = icmp slt i32 undef, undef + br i1 %val31, label %bb32, label %bb15 + +bb32: ; preds = %bb29 + %val33 = add i32 %val30, %val6 + %val34 = add nuw nsw i64 %val8, 8 + %val35 = add i32 %val33, %val6 + br i1 false, label %bb7, label %bb15 +} + +; Test checks that LSR does not hoist increment of %val8 while expanding the other pieces of formula +; to original place in backedge causing incorrect SSA form. +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[VAL:%.*]] = bitcast i8* null to i32* +; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32* [[VAL]], align 4 +; CHECK-NEXT: [[VAL2:%.*]] = bitcast i8* null to i32* +; CHECK-NEXT: [[VAL3:%.*]] = load i32, i32* [[VAL2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[VAL1]], 1 +; CHECK-NEXT: br label [[BB6:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[VAL5:%.*]] = sext i32 [[VAL16:%.*]] to i64 +; CHECK-NEXT: unreachable +; CHECK: bb6: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB12:%.*]] ], [ [[VAL3]], [[BB:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB12]] ], [ -1, [[BB]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 1 +; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], [[TMP0]] +; CHECK-NEXT: [[VAL10:%.*]] = icmp ult i64 [[LSR_IV_NEXT]], 1048576 +; CHECK-NEXT: br i1 [[VAL10]], label [[BB12]], label [[BB11:%.*]] +; CHECK: bb11: +; CHECK-NEXT: unreachable +; CHECK: bb12: +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[VAL1]], [[LSR_IV1]] +; CHECK-NEXT: [[VAL16]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: [[VAL17:%.*]] = fcmp olt double 0.000000e+00, 2.270000e+02 +; CHECK-NEXT: br i1 [[VAL17]], label [[BB6]], label [[BB4:%.*]] +; +bb: + %val = bitcast i8* null to i32* + %val1 = load i32, i32* %val, align 4 + %val2 = bitcast i8* null to i32* + %val3 = load i32, i32* %val2, align 4 + br label %bb6 + +bb4: ; preds = %bb12 + %val5 = sext i32 %val16 to i64 + unreachable + +bb6: ; preds = %bb12, %bb + %val7 = phi i64 [ %val9, %bb12 ], [ 0, %bb ] + %val8 = phi i32 [ %val16, %bb12 ], [ %val3, %bb ] + %val9 = add nuw nsw i64 %val7, 1 + %val10 = icmp ult i64 %val7, 1048576 + br i1 %val10, label %bb12, label %bb11 + +bb11: ; preds = %bb6 + unreachable + +bb12: ; preds = %bb6 + %val13 = select i1 false, i32 0, i32 %val8 + %val14 = add i32 %val8, %val1 + %val15 = select i1 false, i32 %val14, i32 %val13 + %val16 = add i32 %val14, 1 + %val17 = fcmp olt double 0.000000e+00, 2.270000e+02 + br i1 %val17, label %bb6, label %bb4 +} + +declare void @widget()