Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -2127,8 +2128,11 @@ EnableMemAccessVersioning && !TheLoop->getHeader()->getParent()->hasOptSize(); - // For each block. - for (BasicBlock *BB : TheLoop->blocks()) { + // Traverse blocks in deterministic order, regardless of their storage in + // the loop info. + LoopBlocksRPO RPOT(TheLoop); + RPOT.perform(LI); + for (BasicBlock *BB : RPOT) { // Scan the BB and collect legal loads and stores. Also detect any // convergent instructions. for (Instruction &I : *BB) { Index: llvm/test/Transforms/LoopVectorize/X86/pr56672.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/pr56672.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='loop(loop-rotate),loop-vectorize' -S -mcpu=skylake-avx512 -S %s | FileCheck %s +; RUN: opt -passes='loop(loop-rotate),invalidate,loop-vectorize' -S -mcpu=skylake-avx512 -S %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +; Make sure that vectorizer's behavior based on LAA remains the same regardless +; of the order of blocks in loop. +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP81:%.*]] = load i32, i32 addrspace(1)* getelementptr inbounds (i32, i32 addrspace(1)* null, i64 16), align 4 +; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP81]], -5 +; CHECK-NEXT: store i32 [[TMP92]], i32 addrspace(1)* getelementptr inbounds (i32, i32 addrspace(1)* null, i64 16), align 4 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP74:%.*]] = phi i32 addrspace(1)* [ getelementptr inbounds (i32, i32 addrspace(1)* null, i64 16), [[BB:%.*]] ], [ [[TMP7:%.*]], [[BB4:%.*]] ] +; CHECK-NEXT: [[TMP53:%.*]] = phi i64 [ 16, [[BB]] ], [ [[TMP5:%.*]], [[BB4]] ] +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[TMP74]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = add nuw i64 [[TMP53]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP]], undef +; CHECK-NEXT: br i1 [[TMP3]], label [[BB4]], label [[BB11:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP5]] = phi i64 [ [[TMP]], [[BB2]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ [[TMP53]], [[BB2]] ] +; CHECK-NEXT: [[TMP7]] = getelementptr inbounds i32, i32 addrspace(1)* null, i64 [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32 addrspace(1)* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -5 +; CHECK-NEXT: store i32 [[TMP9]], i32 addrspace(1)* [[TMP7]], align 4 +; CHECK-NEXT: br i1 false, label [[BB10:%.*]], label [[BB1]] +; CHECK: bb10: +; CHECK-NEXT: unreachable +; CHECK: bb11: +; CHECK-NEXT: ret void +; +bb: + br label %bb4 + +bb1: ; preds = %bb4 + br label %bb2 + +bb2: ; preds = %bb1 + store i32 1, i32 addrspace(1)* %tmp7, align 4 + %tmp = add nuw i64 %tmp5, 1 + %tmp3 = icmp ult i64 %tmp, undef + br i1 %tmp3, label %bb4, label %bb11 + +bb4: ; preds = %bb2, %bb + %tmp5 = phi i64 [ %tmp, %bb2 ], [ 16, %bb ] + %tmp6 = phi i64 [ %tmp5, %bb2 ], [ 15, %bb ] + %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* null, i64 %tmp5 + %tmp8 = load i32, i32 addrspace(1)* %tmp7, align 4 + %tmp9 = add i32 %tmp8, -5 + store i32 %tmp9, i32 addrspace(1)* %tmp7, align 4 + br i1 false, label %bb10, label %bb1 + +bb10: ; preds = %bb4 + unreachable + +bb11: ; preds = %bb2 + ret void +}