Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -783,6 +783,11 @@ if (NegStride) Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE); + // TODO: ideally we should still be able to generate memset if SCEV expander + // is taught to generate the dependencies at the latest point. + if (!isSafeToExpand(Start, *SE)) + return false; + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read Index: test/Transforms/LoopIdiom/unsafe.ll =================================================================== --- /dev/null +++ test/Transforms/LoopIdiom/unsafe.ll @@ -0,0 +1,70 @@ +; RUN: opt -S < %s -loop-idiom | FileCheck %s +; CHECK-NOT: memset +; check that memset is not generated (for assignment in for.body5) because that will result +; in udiv hoisted out of the loop by the SCEV Expander +; TODO: ideally we should be able to generate memset +; if SCEV expander is taught to generate the dependencies +; at the right point. + +@e = common local_unnamed_addr global i32 0, align 4 +@d = common local_unnamed_addr global i32 0, align 4 +@a = common local_unnamed_addr global i32 0, align 4 +@b = common local_unnamed_addr global i32 0, align 4 +@f = common local_unnamed_addr global i32 0, align 4 +@c = common local_unnamed_addr global [1 x i8] zeroinitializer, align 1 +define i32 @main() local_unnamed_addr #0 { +entry: + %.pr = load i32, i32* @e, align 4 + %cmp16 = icmp slt i32 %.pr, 1 + br i1 %cmp16, label %for.cond1thread-pre-split.lr.ph, label %for.end11 +for.cond1thread-pre-split.lr.ph: ; preds = %entry + %0 = load i32, i32* @a, align 4 + %1 = load i32, i32* @b, align 4 + %e.promoted = load i32, i32* @e, align 4 + br label %for.cond1thread-pre-split +for.cond1thread-pre-split: ; preds = %for.cond1thread-pre-split.lr.ph, %for.inc9 + %inc1017 = phi i32 [ %e.promoted, %for.cond1thread-pre-split.lr.ph ], [ %inc10, %for.inc9 ] + %.pr12 = load i32, i32* @d, align 4 + %tobool15 = icmp eq i32 %.pr12, 0 + br i1 %tobool15, label %for.inc9, label %for.body2.lr.ph +for.body2.lr.ph: ; preds = %for.cond1thread-pre-split + %div = udiv i32 %0, %1 + %2 = sext i32 %div to i64 + br label %for.body2 +for.body2: ; preds = %for.body2.lr.ph, %for.inc6 + %3 = phi i32 [ %.pr12, %for.body2.lr.ph ], [ %inc7, %for.inc6 ] + %.pr13 = load i32, i32* @f, align 4 + %tobool414 = icmp eq i32 %.pr13, 0 + br i1 %tobool414, label %for.inc6, label %for.body5.preheader +for.body5.preheader: ; preds = %for.body2 + %4 = sext i32 %.pr13 to i64 + %5 = sub i32 -1, %.pr13 + %6 = zext i32 %5 to i64 + br label %for.body5 +for.body5: ; preds = %for.body5.preheader, %for.body5 + %indvars.iv = phi i64 [ %4, %for.body5.preheader ], [ %indvars.iv.next, %for.body5 ] + %7 = add nsw i64 %2, %indvars.iv + %arrayidx = getelementptr inbounds [1 x i8], [1 x i8]* @c, i64 0, i64 %7 + store i8 0, i8* %arrayidx, align 1 + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %8 = trunc i64 %indvars.iv.next to i32 + %tobool4 = icmp eq i32 %8, 0 + br i1 %tobool4, label %for.cond3.for.inc6_crit_edge, label %for.body5 +for.cond3.for.inc6_crit_edge: ; preds = %for.body5 + br label %for.inc6 +for.inc6: ; preds = %for.cond3.for.inc6_crit_edge, %for.body2 + %inc7 = add nsw i32 %3, 1 + %tobool = icmp eq i32 %inc7, 0 + br i1 %tobool, label %for.cond1.for.inc9_crit_edge, label %for.body2 +for.cond1.for.inc9_crit_edge: ; preds = %for.inc6 + br label %for.inc9 +for.inc9: ; preds = %for.cond1.for.inc9_crit_edge, %for.cond1thread-pre-split + %inc10 = add nsw i32 %inc1017, 1 + %cmp = icmp slt i32 %inc1017, 0 + br i1 %cmp, label %for.cond1thread-pre-split, label %for.cond.for.end11_crit_edge +for.cond.for.end11_crit_edge: ; preds = %for.inc9 + br label %for.end11 +for.end11: ; preds = %for.cond.for.end11_crit_edge, %entry + ret i32 0 +} +