Index: llvm/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll +++ llvm/test/Transforms/SimplifyCFG/preserve-llvm-loop-metadata.ll @@ -48,7 +48,69 @@ ret void } +; The test case is constructed based on the following C++ code, +; as a simplified test case to show why `llvm.loop.unroll.enable` +; could be dropped. +; +; While the C++ code itself might have the inner-loop unrolled (e.g., with -O3), +; the loss of inner-loop unroll metadata is a bug. +; Under some optimization pipelines (e.g., FullLoopUnroll pass is skipped in ThinLTO prelink stage), +; and in real-world C++ code (e.g., with larger loop body), failing to +; preserve loop unroll metadata could cause missed loop unroll. +; +; constexpr int kUnroll = 5; +; int sum(int a, int b, int step, const int remainder, int* input) { +; int i = a, j = b; +; int sum = 0; +; while(j - i > remainder) { +; i += step; +; #pragma unroll +; for (int k = 0; k < kUnroll; k++) { +; asm volatile ("add %w0, %w1\n" : "=r"(sum) : "r"(input[k + i]):"cc"); +; } +; } +; return sum; +; } +define i32 @test2(i32 %a, i32 %b, i32 %step, i32 %remainder, i32* %input) { +entry: + br label %while.cond + +while.cond.loopexit: ; preds = %for.body + br label %while.cond, !llvm.loop !2 + +while.cond: ; preds = %while.cond.loopexit, %entry + %i.0 = phi i32 [ %a, %entry ], [ %add, %while.cond.loopexit ] + %sum.0 = phi i32 [ 0, %entry ], [ %1, %while.cond.loopexit ] + %sub = sub nsw i32 %b, %i.0 + %cmp = icmp sgt i32 %sub, %remainder + br i1 %cmp, label %while.body, label %while.end + +while.body: ; preds = %while.cond + %add = add nsw i32 %i.0, %step + br label %for.body + +for.body: ; preds = %while.body, %for.body + %k.07 = phi i32 [ 0, %while.body ], [ %inc, %for.body ] + %add2 = add nsw i32 %k.07, %add + %idxprom = sext i32 %add2 to i64 + %arrayidx = getelementptr inbounds i32, i32* %input, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = tail call i32 asm sideeffect "add ${0:w}, ${1:w}\0A", "=r,r,~{cc}"(i32 %0) + %inc = add nuw nsw i32 %k.07, 1 + %cmp1 = icmp ult i32 %inc, 5 + br i1 %cmp1, label %for.body, label %while.cond.loopexit, !llvm.loop !4 + +while.end: ; preds = %while.cond + %sum.0.lcssa = phi i32 [ %sum.0, %while.cond ] + ret i32 %sum.0.lcssa +} + !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.distribute.enable", i1 true} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.mustprogress"} +!4 = distinct !{!4, !3, !5} +!5 = !{!"llvm.loop.unroll.enable"} ; CHECK: !0 = distinct !{!0, !1} ; CHECK: !1 = !{!"llvm.loop.distribute.enable", i1 true} +; CHECK-NOT: !{!"llvm.loop.unroll.enable"}