Index: lib/Transforms/Scalar/LoopRotation.cpp =================================================================== --- lib/Transforms/Scalar/LoopRotation.cpp +++ lib/Transforms/Scalar/LoopRotation.cpp @@ -82,6 +82,9 @@ if (skipOptnoneFunction(L)) return false; + // Save the loop metadata. + MDNode *LoopMD = L->getLoopID(); + LI = &getAnalysis(); TTI = &getAnalysis(); @@ -96,6 +99,12 @@ MadeChange = true; SimplifiedLatch = false; } + + // Restore the loop metadata. + // NB! We presume LoopRotation DOESN'T ADD its own metadata. + if ((MadeChange || SimplifiedLatch) && LoopMD) + L->setLoopID(LoopMD); + return MadeChange; } Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1067,7 +1067,8 @@ return false; if (DL == NULL) { - DEBUG(dbgs() << "LV: Not vectorizing: Missing data layout\n"); + DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName() + << ": Missing data layout\n"); return false; } @@ -1090,13 +1091,20 @@ bool processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); - DEBUG(dbgs() << "LV: Checking a loop in \"" + DEBUG(dbgs() << "\nLV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\" from " << getDebugLocString(L->getHeader()->getFirstNonPHIOrDbg()) << "\n"); LoopVectorizeHints Hints(L, DisableUnrolling); + DEBUG(dbgs() << "LV: Loop hints:" + << " force=" << (Hints.Force == 0 + ? "disabled" + : (Hints.Force == 1 ? "enabled" : "?")) + << " width=" << Hints.Width << " unroll=" << Hints.Unroll + << "\n"); + if (Hints.Force == 0) { DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); return false; @@ -1150,10 +1158,10 @@ } // Select the optimal vectorization factor. - LoopVectorizationCostModel::VectorizationFactor VF; - VF = CM.selectVectorizationFactor(OptForSize, Hints.Width); + const LoopVectorizationCostModel::VectorizationFactor VF = + CM.selectVectorizationFactor(OptForSize, Hints.Width); // Select the unroll factor. - unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width, + const unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width, VF.Cost); DEBUG(dbgs() << "LV: Found a vectorizable loop (" @@ -5076,7 +5084,7 @@ } } - DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n"); + DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n"); Factor.Width = Width; Factor.Cost = Width * Cost; return Factor; Index: test/Transforms/LoopVectorize/vect.omp.persistence.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/vect.omp.persistence.ll @@ -0,0 +1,87 @@ +; RUN: opt < %s -O2 -force-vector-unroll=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s + +; Loop from "rotated" +; CHECK: LV: Loop hints: force=enabled +; Loop from "nonrotated" +; CHECK: LV: Loop hints: force=enabled +; No more loops in the module +; CHECK-NOT: LV: Loop hints: force= +; In total only 1 loop should be rotated. +; CHECK: 1 loop-rotate + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; See http://reviews.llvm.org/D3348 for details. + +; +; Test #1 +; +; Ensure that "llvm.vectorizer.enable" metadata was not lost prior to LoopVectorize pass. +; In past LoopRotate was clearing that metadata. +; +; The source C code is: +; void rotated(float *a, int size) +; { +; int t = 0; +; #pragma omp simd +; for (int i = 0; i < size; ++i) { +; a[i] = a[i-5] * a[i+2]; +; ++t; +; } +;} + +define void @rotated(float* nocapture %a, i64 %size) { +entry: + %cmp1 = icmp sgt i64 %size, 0 + br i1 %cmp1, label %for.header, label %for.end + +for.header: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %cmp2 = icmp sgt i64 %indvars.iv, %size + br i1 %cmp2, label %for.end, label %for.body + +for.body: + + %0 = add nsw i64 %indvars.iv, -5 + %arrayidx = getelementptr inbounds float* %a, i64 %0 + %1 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1 + %2 = add nsw i64 %indvars.iv, 2 + %arrayidx2 = getelementptr inbounds float* %a, i64 %2 + %3 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1 + %mul = fmul float %1, %3 + %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv + store float %mul, float* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !1 + + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.header, !llvm.loop !1 + +for.end: + ret void +} + +!1 = metadata !{metadata !1, metadata !2} +!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true} + +; +; Test #2 +; +; Ensure that "llvm.vectorizer.enable" metadata was not lost even +; if loop was not rotated (see http://reviews.llvm.org/D3348#comment-4). +; +define i32 @nonrotated(i32 %a) { +entry: + br label %loop_cond +loop_cond: + %indx = phi i32 [ 1, %entry ], [ %inc, %loop_inc ] + %cmp = icmp ne i32 %indx, %a + br i1 %cmp, label %return, label %loop_inc +loop_inc: + %inc = add i32 %indx, 1 + br label %loop_cond, !llvm.loop !3 +return: + ret i32 0 +} + +!3 = metadata !{metadata !3, metadata !4} +!4 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}