Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4290,57 +4290,66 @@ bool UseDeferred = SetIteration > 0; PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; - for (auto A : AS) { - Value *Ptr = A.getValue(); - bool IsWrite = S.count(MemAccessInfo(Ptr, true)); + for (auto AV : AS) { + Value *Ptr = AV.getValue(); - // If we're using the deferred access set, then it contains only reads. - bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; - if (UseDeferred && !IsReadOnlyPtr) - continue; - // Otherwise, the pointer must be in the PtrAccessSet, either as a read - // or a write. - assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || - S.count(MemAccessInfo(Ptr, false))) && - "Alias-set pointer not in the access set?"); - - MemAccessInfo Access(Ptr, IsWrite); - DepCands.insert(Access); - - // Memorize read-only pointers for later processing and skip them in the - // first round (they need to be checked after we have seen all write - // pointers). Note: we also mark pointer that are not consecutive as - // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need - // the second check for "!IsWrite". - if (!UseDeferred && IsReadOnlyPtr) { - DeferredAccesses.insert(Access); - continue; - } + // For a single memory access in AliasSetTracker, Accesses may contain + // both read and write, and they both need to be handled for CheckDeps. + for (auto AC : S) { + if (AC.getPointer() != Ptr) + continue; - // If this is a write - check other reads and writes for conflicts. If - // this is a read only check other writes for conflicts (but only if - // there is no other write to the ptr - this is an optimization to - // catch "a[i] = a[i] + " without having to do a dependence check). - if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { - CheckDeps.insert(Access); - IsRTCheckNeeded = true; - } + bool IsWrite = AC.getInt(); + + // If we're using the deferred access set, then it contains only + // reads. + bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; + if (UseDeferred && !IsReadOnlyPtr) + continue; + // Otherwise, the pointer must be in the PtrAccessSet, either as a + // read or a write. + assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || + S.count(MemAccessInfo(Ptr, false))) && + "Alias-set pointer not in the access set?"); + + MemAccessInfo Access(Ptr, IsWrite); + DepCands.insert(Access); + + // Memorize read-only pointers for later processing and skip them in + // the first round (they need to be checked after we have seen all + // write pointers). Note: we also mark pointer that are not + // consecutive as "read-only" pointers (so that we check + // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". + if (!UseDeferred && IsReadOnlyPtr) { + DeferredAccesses.insert(Access); + continue; + } - if (IsWrite) - SetHasWrite = true; - - // Create sets of pointers connected by a shared alias set and - // underlying object. - typedef SmallVector ValueVector; - ValueVector TempObjects; - GetUnderlyingObjects(Ptr, TempObjects, DL); - for (Value *UnderlyingObj : TempObjects) { - UnderlyingObjToAccessMap::iterator Prev = - ObjToLastAccess.find(UnderlyingObj); - if (Prev != ObjToLastAccess.end()) - DepCands.unionSets(Access, Prev->second); - - ObjToLastAccess[UnderlyingObj] = Access; + // If this is a write - check other reads and writes for conflicts. If + // this is a read only check other writes for conflicts (but only if + // there is no other write to the ptr - this is an optimization to + // catch "a[i] = a[i] + " without having to do a dependence check). + if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { + CheckDeps.insert(Access); + IsRTCheckNeeded = true; + } + + if (IsWrite) + SetHasWrite = true; + + // Create sets of pointers connected by a shared alias set and + // underlying object. + typedef SmallVector ValueVector; + ValueVector TempObjects; + GetUnderlyingObjects(Ptr, TempObjects, DL); + for (Value *UnderlyingObj : TempObjects) { + UnderlyingObjToAccessMap::iterator Prev = + ObjToLastAccess.find(UnderlyingObj); + if (Prev != ObjToLastAccess.end()) + DepCands.unionSets(Access, Prev->second); + + ObjToLastAccess[UnderlyingObj] = Access; + } } } } Index: test/Transforms/LoopVectorize/loop-vect-memdep.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/loop-vect-memdep.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -S -loop-vectorize -debug-only=loop-vectorize 2>&1 | FileCheck %s + +; CHECK: LV: Can't vectorize due to memory conflicts + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @test_loop_novect(double ** %Array_.i, i32 %indvars.iv1438, double *%t9, double *%t10) { +for.body209.lr.ph: ; preds = %for.cond207.preheader + %t275 = load double** %Array_.i, align 8 + br label %for.body209 + +for.body209: ; preds = %for.body209, %for.body209.lr.ph + %indvars.iv1436 = phi i64 [ 0, %for.body209.lr.ph ], [ %indvars.iv.next1437, %for.body209 ] + %add.i952 = add i64 0, %indvars.iv1436 + %arrayidx.i954 = getelementptr inbounds double* %t275, i64 %add.i952 + %indvars.iv.next1437 = add nuw nsw i64 %indvars.iv1436, 1 + %add.i995 = add i64 0, %indvars.iv.next1437 + %arrayidx.i997 = getelementptr inbounds double* %t275, i64 %add.i995 + %arrayidx.i1012 = getelementptr inbounds double* %t9, i64 %indvars.iv1436 + %arrayidx.i1058 = getelementptr inbounds double* %t10, i64 %indvars.iv1436 + %t281 = load double* %arrayidx.i1012, align 8 + %t282 = load double* %arrayidx.i954, align 8 + %mul.i1071 = fmul double %t281, %t282 + %t283 = load double* %arrayidx.i1058, align 8 + %t284 = load double* %arrayidx.i997, align 8 + %mul1.i = fmul double %t283, %t284 + %add.i1072 = fadd double %mul.i1071, %mul1.i + %t285 = fmul double %t282, %t283 + %mul3.i = fmul double %t281, %t284 + %add4.i = fsub double %mul3.i, %t285 + store double %add4.i, double* %arrayidx.i997, align 8 + store double %add.i1072, double* %arrayidx.i954, align 8 + %lftr.wideiv1440 = trunc i64 %indvars.iv1436 to i32 + %exitcond1441 = icmp eq i32 %lftr.wideiv1440, %indvars.iv1438 + br i1 %exitcond1441, label %invoke.cont239, label %for.body209 + +invoke.cont239: + ret void +}