diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -712,11 +712,12 @@ // We assign a consecutive id to access from different alias sets. // Accesses between different groups doesn't need to be checked. - unsigned ASId = 1; + unsigned ASId = 0; for (auto &AS : AST) { int NumReadPtrChecks = 0; int NumWritePtrChecks = 0; bool CanDoAliasSetRT = true; + ++ASId; // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. @@ -752,9 +753,25 @@ // we have a pointer for which we couldn't find the bounds but we don't // actually need to emit any checks so it does not matter. bool NeedsAliasSetRTCheck = false; - if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) + if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) { NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1)); + // For alias sets without at least 2 writes or 1 write and 1 read, there + // is no need to generate RT checks and CanDoAliasSetRT for this alias set + // does not impact whether runtime checks can be generated. + if (!NeedsAliasSetRTCheck) { + assert((AS.size() <= 1 || + all_of(AS, + [this](auto AC) { + MemAccessInfo AccessWrite(AC.getValue(), true); + return DepCands.findValue(AccessWrite) == + DepCands.end(); + })) && + "Can only skip updating CanDoRT below, if all entries in AS " + "are reads or there is at most 1 entry"); + continue; + } + } // We need to perform run-time alias checks, but some pointers had bounds // that couldn't be checked. diff --git a/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll b/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll @@ -0,0 +1,150 @@ +; RUN: opt -loop-vectorize -force-vector-width=2 -S %s | FileCheck %s + +; Tests with alias sets that contain points with uncomputable bounds because +; they include %offset.1, which is loaded in each loop iteration. + +; Alias set with uncomputable bounds contains a single load. We do not need +; runtime checks for that group and it should not block vectorization. +define void @test1_uncomputable_bounds_single_load(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) { +; CHECK-LABEL: define void @test1_uncomputable_bounds_single_load +; CHECK: vector.body +; CHECK: ret void + +entry: + %cond = icmp sgt i64 %N, 0 + br i1 %cond, label %ph, label %exit + +ph: + br label %loop + +loop: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] + %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv + %offset.1 = load i32, i32* %gep.1, align 4 + %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1 + %lv = load i32, i32* %gep.2, align 4 + %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv + store i32 %lv , i32* %gep.3, align 4 + %offset.2 = add nsw i64 %iv, %X + %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2 + store i32 %lv, i32* %gep.4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %loop.exit, label %loop + +loop.exit: + br label %exit + +exit: + ret void +} + +; Alias set with uncomputable bounds contains a single store. We do not need +; runtime checks for that group and it should not block vectorization. +define void @test2_uncomputable_bounds_single_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) { +; CHECK-LABEL: define void @test2_uncomputable_bounds_single_store +; CHECK: vector.body +; CHECK: ret void + +entry: + %cond = icmp sgt i64 %N, 0 + br i1 %cond, label %ph, label %exit + +ph: + br label %loop + +loop: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] + %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv + %offset.1 = load i32, i32* %gep.1, align 4 + %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1 + store i32 20, i32* %gep.2, align 4 + %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv + store i32 0 , i32* %gep.3, align 4 + %offset.2 = add nsw i64 %iv, %X + %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2 + store i32 10, i32* %gep.4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %loop.exit, label %loop + +loop.exit: + br label %exit + +exit: + ret void +} + +; Alias set with uncomputable bounds contains a load and a store. This blocks +; vectorization, as we cannot generate runtime-checks for the set. +define void @test3_uncomputable_bounds_load_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) { +; CHECK-LABEL: define void @test3_uncomputable_bounds_load_store +; CHECK-NOT: vector.body + +entry: + %cond = icmp sgt i64 %N, 0 + br i1 %cond, label %ph, label %exit + +ph: + br label %loop + +loop: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] + %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv + %offset.1 = load i32, i32* %gep.1, align 4 + %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1 + store i32 20, i32* %gep.2, align 4 + %gep.22 = getelementptr inbounds i32, i32* %ptr.2, i64 %iv + %lv = load i32, i32* %gep.22, align 4 + %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv + store i32 %lv , i32* %gep.3, align 4 + %offset.2 = add nsw i64 %iv, %X + %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2 + store i32 %lv, i32* %gep.4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %loop.exit, label %loop + +loop.exit: + br label %exit + +exit: + ret void +} + +; Alias set with uncomputable bounds contains a load and a store. This blocks +; vectorization, as we cannot generate runtime-checks for the set. +define void @test4_uncomputable_bounds_store_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) { +; CHECK-LABEL: define void @test4_uncomputable_bounds_store_store +; CHECK-NOT: vector.body + +entry: + %cond = icmp sgt i64 %N, 0 + br i1 %cond, label %ph, label %exit + +ph: + br label %loop + +loop: + %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] + %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv + %offset.1 = load i32, i32* %gep.1, align 4 + %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1 + store i32 20, i32* %gep.2, align 4 + %gep.22 = getelementptr inbounds i32, i32* %ptr.2, i64 %iv + store i32 30, i32* %gep.22, align 4 + %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv + store i32 0 , i32* %gep.3, align 4 + %offset.2 = add nsw i64 %iv, %X + %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2 + store i32 10, i32* %gep.4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %loop.exit, label %loop + +loop.exit: + br label %exit + +exit: + ret void +}