Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4268,57 +4268,66 @@ bool UseDeferred = SetIteration > 0; PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; - for (auto A : AS) { - Value *Ptr = A.getValue(); - bool IsWrite = S.count(MemAccessInfo(Ptr, true)); + for (auto AV : AS) { + Value *Ptr = AV.getValue(); - // If we're using the deferred access set, then it contains only reads. - bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; - if (UseDeferred && !IsReadOnlyPtr) - continue; - // Otherwise, the pointer must be in the PtrAccessSet, either as a read - // or a write. - assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || - S.count(MemAccessInfo(Ptr, false))) && - "Alias-set pointer not in the access set?"); - - MemAccessInfo Access(Ptr, IsWrite); - DepCands.insert(Access); - - // Memorize read-only pointers for later processing and skip them in the - // first round (they need to be checked after we have seen all write - // pointers). Note: we also mark pointer that are not consecutive as - // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need - // the second check for "!IsWrite". - if (!UseDeferred && IsReadOnlyPtr) { - DeferredAccesses.insert(Access); - continue; - } + // For a single memory access in AliasSetTracker, Accesses may contain + // both read and write, and they both need to be handled for CheckDeps. + for (auto AC : S) { + if (AC.getPointer() != Ptr) + continue; - // If this is a write - check other reads and writes for conflicts. If - // this is a read only check other writes for conflicts (but only if - // there is no other write to the ptr - this is an optimization to - // catch "a[i] = a[i] + " without having to do a dependence check). - if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { - CheckDeps.insert(Access); - IsRTCheckNeeded = true; - } + bool IsWrite = AC.getInt(); + + // If we're using the deferred access set, then it contains only + // reads. + bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; + if (UseDeferred && !IsReadOnlyPtr) + continue; + // Otherwise, the pointer must be in the PtrAccessSet, either as a + // read or a write. + assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || + S.count(MemAccessInfo(Ptr, false))) && + "Alias-set pointer not in the access set?"); + + MemAccessInfo Access(Ptr, IsWrite); + DepCands.insert(Access); + + // Memorize read-only pointers for later processing and skip them in + // the first round (they need to be checked after we have seen all + // write pointers). Note: we also mark pointer that are not + // consecutive as "read-only" pointers (so that we check + // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". + if (!UseDeferred && IsReadOnlyPtr) { + DeferredAccesses.insert(Access); + continue; + } - if (IsWrite) - SetHasWrite = true; - - // Create sets of pointers connected by a shared alias set and - // underlying object. - typedef SmallVector ValueVector; - ValueVector TempObjects; - GetUnderlyingObjects(Ptr, TempObjects, DL); - for (Value *UnderlyingObj : TempObjects) { - UnderlyingObjToAccessMap::iterator Prev = - ObjToLastAccess.find(UnderlyingObj); - if (Prev != ObjToLastAccess.end()) - DepCands.unionSets(Access, Prev->second); - - ObjToLastAccess[UnderlyingObj] = Access; + // If this is a write - check other reads and writes for conflicts. If + // this is a read only check other writes for conflicts (but only if + // there is no other write to the ptr - this is an optimization to + // catch "a[i] = a[i] + " without having to do a dependence check). + if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { + CheckDeps.insert(Access); + IsRTCheckNeeded = true; + } + + if (IsWrite) + SetHasWrite = true; + + // Create sets of pointers connected by a shared alias set and + // underlying object. + typedef SmallVector ValueVector; + ValueVector TempObjects; + GetUnderlyingObjects(Ptr, TempObjects, DL); + for (Value *UnderlyingObj : TempObjects) { + UnderlyingObjToAccessMap::iterator Prev = + ObjToLastAccess.find(UnderlyingObj); + if (Prev != ObjToLastAccess.end()) + DepCands.unionSets(Access, Prev->second); + + ObjToLastAccess[UnderlyingObj] = Access; + } } } } Index: test/Transforms/LoopVectorize/loop-vect-memdep.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/loop-vect-memdep.ll @@ -0,0 +1,26 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; RUN: opt < %s -S -loop-vectorize -debug-only=loop-vectorize 2>&1 | FileCheck %s + +; CHECK: LV: Can't vectorize due to memory conflicts + +define void @test_loop_novect(double** %arr, i64 %n) { +for.body.lr.ph: + %t = load double** %arr, align 8 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %i = phi i64 [ 0, %for.body.lr.ph ], [ %i.next, %for.body ] + %a = getelementptr inbounds double* %t, i64 %i + %i.next = add nuw nsw i64 %i, 1 + %a.next = getelementptr inbounds double* %t, i64 %i.next + %t1 = load double* %a, align 8 + %t2 = load double* %a.next, align 8 + store double %t1, double* %a.next, align 8 + store double %t2, double* %a, align 8 + %c = icmp eq i64 %i, %n + br i1 %c, label %final, label %for.body + +final: ; preds = %for.body + ret void +}