Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -450,10 +450,10 @@ /// vectorizing this phi node. void fixReduction(PHINode *Phi); - /// \brief The Loop exit block may have single value PHI nodes where the - /// incoming value is 'Undef'. While vectorizing we only handled real values - /// that were defined inside the loop. Here we fix the 'undef case'. - /// See PR14725. + /// \brief The Loop exit block may have single value PHI nodes with some + /// incoming value. While vectorizing we only handled real values + /// that were defined inside the loop and we should have one value for + /// each predecessor of its parent basic block. See PR14725. void fixLCSSAPHIs(); /// Iteratively sink the scalarized operands of a predicated instruction into @@ -4346,7 +4346,7 @@ if (!LCSSAPhi) break; if (LCSSAPhi->getNumIncomingValues() == 1) - LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()), + LCSSAPhi->addIncoming(LCSSAPhi->getIncomingValue(0), LoopMiddleBlock); } } Index: test/Transforms/LoopVectorize/pr32859.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/pr32859.ll @@ -0,0 +1,154 @@ +; RUN: opt < %s -inline -sroa -jump-threading -lcssa -loop-rotate -loop-vectorize -loop-unroll -S 2>&1 | FileCheck %s + +; CHECK-LABEL: for.cond.preheader: +; CHECK: %e.0.ph = phi i32 [ 0, %vector.body ], [ 0, %for.cond.preheader.loopexit ] +; CHECK-NEXT: br label %for.body + +@a = global i32 1, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define void @foo() #0 { +entry: + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i32, align 4 + %0 = bitcast i32* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #3 + %1 = bitcast i32* %c to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %1) #3 + %2 = bitcast i32* %d to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %2) #3 + store i32 0, i32* %c, align 4, !tbaa !1 + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry + %3 = load i32, i32* %c, align 4, !tbaa !1 + %cmp = icmp slt i32 %3, 16 + br i1 %cmp, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + %4 = load i32, i32* %c, align 4, !tbaa !1 + store i32 %4, i32* %b, align 4, !tbaa !1 + store i32 0, i32* %d, align 4, !tbaa !1 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %5 = load i32, i32* %d, align 4, !tbaa !1 + %cmp2 = icmp slt i32 %5, 3 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %6 = load i32, i32* %b, align 4, !tbaa !1 + %and = and i32 %6, 1 + %tobool = icmp ne i32 %and, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %for.body3 + store i32 0, i32* %b, align 4, !tbaa !1 + br label %if.end + +if.end: ; preds = %if.then, %for.body3 + br label %for.inc + +for.inc: ; preds = %if.end + %7 = load i32, i32* %d, align 4, !tbaa !1 + %inc = add nsw i32 %7, 1 + store i32 %inc, i32* %d, align 4, !tbaa !1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc4 + +for.inc4: ; preds = %for.end + %8 = load i32, i32* %c, align 4, !tbaa !1 + %inc5 = add nsw i32 %8, 1 + store i32 %inc5, i32* %c, align 4, !tbaa !1 + br label %for.cond + +for.end6: ; preds = %for.cond + %9 = bitcast i32* %d to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %9) #3 + %10 = bitcast i32* %c to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %10) #3 + %11 = bitcast i32* %b to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %11) #3 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind uwtable +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %e = alloca i32, align 4 + %f = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + %0 = bitcast i32* %e to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #3 + call void @foo() + store i32 0, i32* %e, align 4, !tbaa !1 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, i32* %e, align 4, !tbaa !1 + %cmp = icmp slt i32 %1, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = bitcast i32* %f to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %2) #3 + %3 = load i32, i32* @a, align 4, !tbaa !1 + %div = sdiv i32 1, %3 + store i32 %div, i32* %f, align 4, !tbaa !1 + %4 = load i32, i32* %f, align 4, !tbaa !1 + %tobool = icmp ne i32 %4, 0 + br i1 %tobool, label %land.lhs.true, label %lor.rhs + +land.lhs.true: ; preds = %for.body + %5 = load i32, i32* %f, align 4, !tbaa !1 + %cmp1 = icmp eq i32 %5, 1 + br i1 %cmp1, label %lor.end, label %lor.rhs + +lor.rhs: ; preds = %land.lhs.true, %for.body + br label %lor.end + +lor.end: ; preds = %lor.rhs, %land.lhs.true + %6 = phi i1 [ true, %land.lhs.true ], [ false, %lor.rhs ] + %lor.ext = zext i1 %6 to i32 + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %lor.ext) + %7 = bitcast i32* %f to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %7) #3 + br label %for.inc + +for.inc: ; preds = %lor.end + %8 = load i32, i32* %e, align 4, !tbaa !1 + %inc = add nsw i32 %8, 1 + store i32 %inc, i32* %e, align 4, !tbaa !1 + br label %for.cond + +for.end: ; preds = %for.cond + %9 = bitcast i32* %e to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %9) #3 + ret i32 0 +} + +declare i32 @printf(i8*, ...) #2 + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git 08c94e4bd75c5a2e4de5ea108d98739f5179dbeb) (http://llvm.org/git/llvm.git 7d66f52694bdfabf0081a1d40b99fca0c189a988)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"}