Index: llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp =================================================================== --- llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1257,7 +1257,8 @@ if (LSRMode) { if (!isExpandedAddRecExprPHI(&PN, TempIncV, L)) continue; - if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) + if (L == IVIncInsertLoop && PostIncLoops.count(L) && + !hoistIVInc(TempIncV, IVIncInsertPos)) continue; } else { if (!isNormalAddRecExprPHI(&PN, TempIncV, L)) @@ -1288,7 +1289,7 @@ if (AddRecPhiMatch) { // Potentially, move the increment. We have made sure in // isExpandedAddRecExprPHI or hoistIVInc that this is possible. - if (L == IVIncInsertLoop) + if (L == IVIncInsertLoop && PostIncLoops.count(L)) hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); // Ok, the add recurrence looks usable. Index: llvm/test/Transforms/LoopStrengthReduce/pr43678-2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopStrengthReduce/pr43678-2.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -loop-reduce < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1" + +; Check that LSR produce correct IR in terms of SSA. +; CHECK-LABEL: test +define void @test() { +bb: + %tmp = bitcast i8* null to i32* + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = bitcast i8* null to i32* + %tmp3 = load i32, i32* %tmp2, align 4 + br label %bb6 + +bb4: ; preds = %bb12 + %tmp5 = sext i32 %tmp16 to i64 + unreachable + +bb6: ; preds = %bb12, %bb + %tmp7 = phi i64 [ %tmp9, %bb12 ], [ 0, %bb ] + %tmp8 = phi i32 [ %tmp16, %bb12 ], [ %tmp3, %bb ] + %tmp9 = add nuw nsw i64 %tmp7, 1 + %tmp10 = icmp ult i64 %tmp7, 1048576 + br i1 %tmp10, label %bb12, label %bb11 + +bb11: ; preds = %bb6 + unreachable + +bb12: ; preds = %bb6 + %tmp13 = select i1 false, i32 0, i32 %tmp8 + %tmp14 = add i32 %tmp8, %tmp1 + %tmp15 = select i1 false, i32 %tmp14, i32 %tmp13 + %tmp16 = add i32 %tmp14, 1 + %tmp17 = fcmp olt double 0.000000e+00, 2.270000e+02 + br i1 %tmp17, label %bb6, label %bb4 +} Index: llvm/test/Transforms/LoopStrengthReduce/pr43678.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopStrengthReduce/pr43678.ll @@ -0,0 +1,69 @@ +; RUN: opt -S -loop-reduce < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; Check that LSR produce correct IR in terms of SSA. +; CHECK-LABEL: test +define void @test() { +bb: + %tmp = load i32, i32 addrspace(3)* undef, align 4 + %tmp1 = add i32 undef, 12 + %tmp2 = trunc i64 undef to i32 + %tmp3 = mul i32 %tmp1, %tmp2 + %tmp4 = sub i32 %tmp, %tmp3 + %tmp5 = ashr i32 undef, undef + %tmp6 = sub i32 %tmp4, %tmp5 + br label %bb7 + +bb7: ; preds = %bb32, %bb + %tmp8 = phi i64 [ 0, %bb ], [ %tmp34, %bb32 ] + %tmp9 = phi i32 [ 0, %bb ], [ %tmp35, %bb32 ] + %tmp10 = icmp ult i64 %tmp8, 65536 + br i1 %tmp10, label %bb12, label %bb11 + +bb11: ; preds = %bb7 + unreachable + +bb12: ; preds = %bb7 + %tmp13 = add i32 %tmp9, %tmp6 + %tmp14 = icmp slt i32 undef, undef + br i1 %tmp14, label %bb17, label %bb15 + +bb15: ; preds = %bb32, %bb29, %bb26, %bb23, %bb20, %bb17, %bb12 + %tmp16 = phi i32 [ %tmp35, %bb32 ], [ %tmp30, %bb29 ], [ %tmp27, %bb26 ], [ %tmp24, %bb23 ], [ %tmp21, %bb20 ], [ %tmp18, %bb17 ], [ %tmp13, %bb12 ] + call void @widget() [ "deopt"(i32 %tmp16, i32 3, i32 %tmp) ] + unreachable + +bb17: ; preds = %bb12 + %tmp18 = add i32 %tmp13, %tmp6 + %tmp19 = icmp slt i32 undef, undef + br i1 %tmp19, label %bb20, label %bb15 + +bb20: ; preds = %bb17 + %tmp21 = add i32 %tmp18, %tmp6 + %tmp22 = icmp slt i32 undef, undef + br i1 %tmp22, label %bb23, label %bb15 + +bb23: ; preds = %bb20 + %tmp24 = add i32 %tmp21, %tmp6 + %tmp25 = icmp slt i32 undef, undef + br i1 %tmp25, label %bb26, label %bb15 + +bb26: ; preds = %bb23 + %tmp27 = add i32 %tmp24, %tmp6 + %tmp28 = icmp slt i32 undef, undef + br i1 %tmp28, label %bb29, label %bb15 + +bb29: ; preds = %bb26 + %tmp30 = add i32 %tmp27, %tmp6 + %tmp31 = icmp slt i32 undef, undef + br i1 %tmp31, label %bb32, label %bb15 + +bb32: ; preds = %bb29 + %tmp33 = add i32 %tmp30, %tmp6 + %tmp34 = add nuw nsw i64 %tmp8, 8 + %tmp35 = add i32 %tmp33, %tmp6 + br i1 false, label %bb7, label %bb15 +} + +declare void @widget()