Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -311,7 +311,7 @@ /// This struct holds information about the memory runtime legality check that /// a group of pointers do not overlap. struct RuntimePointerCheck { - RuntimePointerCheck() : Need(false) {} + RuntimePointerCheck(ScalarEvolution *SE) : Need(false), SE(SE) {} /// Reset the state of the pointer runtime information. void reset() { @@ -322,16 +322,55 @@ IsWritePtr.clear(); DependencySetId.clear(); AliasSetId.clear(); + Exprs.clear(); } /// Insert a pointer and calculate the start and end SCEVs. - void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, - unsigned DepSetId, unsigned ASId, - const ValueToValueMap &Strides); + void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, + unsigned ASId, const ValueToValueMap &Strides); /// \brief No run-time memory checking is necessary. bool empty() const { return Pointers.empty(); } + /// A grouping of pointers. A single memcheck is required between + /// two groups. + struct CheckingPtrGroup { + /// \brief Create a new pointer checking group containing a single + /// pointer, with index \p Index in RtCheck. + CheckingPtrGroup(unsigned Index, RuntimePointerCheck &RtCheck) + : RtCheck(RtCheck), High(RtCheck.Ends[Index]), + Low(RtCheck.Starts[Index]) { + Members.push_back(Index); + } + + /// \brief Tries to add the pointer recorded in RtCheck at index + /// \p Index to this pointer checking group. We can only add a pointer + /// to a checking group if we will still be able to get + /// the upper and lower bounds of the check. Returns true in case + /// of success, false otherwise. + bool addPointer(unsigned Index); + + /// Constitutes the context of this pointer checking group. For each + /// pointer that is a member of this group we will retain the index + /// at which it appears in RtCheck. + RuntimePointerCheck &RtCheck; + /// The SCEV expression which represents the upper bound of all the + /// pointers in this group. + const SCEV *High; + /// The SCEV expression which represents the lower bound of all the + /// pointers in this group. + const SCEV *Low; + /// Indices of all the pointers that constitute this grouping. + SmallVector Members; + }; + + /// \brief Groups pointers such that a single memcheck is required + /// between two different groups. This will clear the CheckingGroups vector + /// and re-compute it. We will only group dependecies if \p UseDependencies + /// is true, otherwise we will create a separate group for each pointer. + void groupChecks(MemoryDepChecker::DepCandidates &DepCands, + bool UseDependencies); + /// \brief Decide whether we need to issue a run-time check for pointer at /// index \p I and \p J to prove their independence. /// @@ -341,6 +380,12 @@ bool needsChecking(unsigned I, unsigned J, const SmallVectorImpl *PtrPartition) const; + /// \brief Decide if we need to add a check between two groups of pointers, + /// according to needsChecking. + bool needsChecking(const CheckingPtrGroup &M, + const CheckingPtrGroup &N, + const SmallVectorImpl *PtrPartition) const; + /// \brief Return true if any pointer requires run-time checking according /// to needsChecking. bool needsAnyChecking(const SmallVectorImpl *PtrPartition) const; @@ -372,6 +417,12 @@ SmallVector DependencySetId; /// Holds the id of the disjoint alias set to which this pointer belongs. SmallVector AliasSetId; + /// Holds at position i the SCEV for the access i + SmallVector Exprs; + /// Holds a partitioning of pointers into "check groups". + SmallVector CheckingGroups; + /// Holds a pointer to the ScalarEvolution analysis. + ScalarEvolution *SE; }; LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -48,6 +48,13 @@ cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); unsigned VectorizerParams::RuntimeMemoryCheckThreshold; +/// \brief The maximum iterations used to merge memory checks +static cl::opt MemoryCheckMergeThreshold( + "memory-check-merge-threshold", cl::Hidden, + cl::desc("Maximum number of comparisons done when trying to merge " + "runtime memory checks. (default = 100)"), + cl::init(100)); + /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; @@ -113,8 +120,8 @@ } void LoopAccessInfo::RuntimePointerCheck::insert( - ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, - unsigned ASId, const ValueToValueMap &Strides) { + Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, + const ValueToValueMap &Strides) { // Get the stride replaced scev. const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); @@ -127,6 +134,136 @@ IsWritePtr.push_back(WritePtr); DependencySetId.push_back(DepSetId); AliasSetId.push_back(ASId); + Exprs.push_back(Sc); +} + +bool LoopAccessInfo::RuntimePointerCheck::needsChecking( + const CheckingPtrGroup &M, const CheckingPtrGroup &N, + const SmallVectorImpl *PtrPartition) const { + for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) + for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) + if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) + return true; + return false; +} + +/// Compare \p I and \p J and return the minimum. +/// Return nullptr in case we couldn't find an answer. +static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, + ScalarEvolution *SE) { + const SCEV *Diff = SE->getMinusSCEV(J, I); + const SCEVConstant *C = dyn_cast(Diff); + + if (!C) + return nullptr; + if (C->getValue()->isNegative()) + return J; + return I; +} + +bool LoopAccessInfo::RuntimePointerCheck::CheckingPtrGroup::addPointer( + unsigned Index) { + // Compare the starts and ends with the known minimum and maximum + // of this set. We need to know how we compare against the min/max + // of the set in order to be able to emit memchecks. + const SCEV *Min0 = getMinFromExprs(RtCheck.Starts[Index], Low, RtCheck.SE); + if (!Min0) + return false; + + const SCEV *Min1 = getMinFromExprs(RtCheck.Ends[Index], High, RtCheck.SE); + if (!Min1) + return false; + + // Update the low bound expression if we've found a new min value. + if (Min0 == RtCheck.Starts[Index]) + Low = RtCheck.Starts[Index]; + + // Update the high bound expression if we've found a new max value. + if (Min1 != RtCheck.Ends[Index]) + High = RtCheck.Ends[Index]; + + Members.push_back(Index); + return true; +} + +void LoopAccessInfo::RuntimePointerCheck::groupChecks( + MemoryDepChecker::DepCandidates &DepCands, + bool UseDependencies) { + // We build the groups from dependency candidates equivalence classes + // because: + // - We know that pointers in the same equivalence class share + // the same underlying object and therefore there is a chance + // that we can compare pointers + // - We wouldn't be able to merge two pointers for which we need + // to emit a memcheck. The classes in DepCands are already + // conveniently built such that no two pointers in the same + // class need checking against each other. + + // We use the following (greedy) algorithm to construct the groups + // For every pointer in the equivalence class: + // For each existing group: + // - if the difference between this pointer and the min/max bounds + // of the group is a constant, then make the pointer part of the + // group and update the min/max bounds of that group as required. + + CheckingGroups.clear(); + + // If we don't have the dependency partitions, construct a new + // checking pointer group for each pointer. + if (!UseDependencies) { + for (unsigned I = 0; I < Pointers.size(); ++I) + CheckingGroups.push_back(CheckingPtrGroup(I, *this)); + return; + } + + unsigned TotalComparisons = 0; + + DenseMap PositionMap; + for (unsigned Pointer = 0; Pointer < Pointers.size(); ++Pointer) + PositionMap[Pointers[Pointer]] = Pointer; + + // Go through all equivalence classes, get the the "pointer check groups" + // and add them to the overall solution. + for (auto DI = DepCands.begin(), DE = DepCands.end(); DI != DE; ++DI) { + if (!DI->isLeader()) + continue; + + SmallVector Groups; + + for (auto MI = DepCands.member_begin(DI), ME = DepCands.member_end(); + MI != ME; ++MI) { + unsigned Pointer = PositionMap[MI->getPointer()]; + bool Merged = false; + + // Go through all the existing sets and see if we can find one + // which can include this pointer. + for (CheckingPtrGroup &Group : Groups) { + // Don't perform more than a certain amount of comparisons. + // This should limit the cost of grouping the pointers to something + // reasonable. If we do end up hitting this threshold, the algorithm + // will create separate groups for all remaining pointers. + if (TotalComparisons > MemoryCheckMergeThreshold) + break; + + TotalComparisons++; + + if (Group.addPointer(Pointer)) { + Merged = true; + break; + } + } + + if (!Merged) + // We couldn't add this pointer to any existing set or the threshold + // for the number of comparisons has been reached. Create a new group + // to hold the current pointer. + Groups.push_back(CheckingPtrGroup(Pointer, *this)); + } + + // We've computed the grouped checks for this partition. + // Save the results and continue with the next one. + std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups)); + } } bool LoopAccessInfo::RuntimePointerCheck::needsChecking( @@ -156,42 +293,71 @@ void LoopAccessInfo::RuntimePointerCheck::print( raw_ostream &OS, unsigned Depth, const SmallVectorImpl *PtrPartition) const { - unsigned NumPointers = Pointers.size(); - if (NumPointers == 0) - return; OS.indent(Depth) << "Run-time memory checks:\n"; + unsigned N = 0; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) { - OS.indent(Depth) << N++ << ":\n"; - OS.indent(Depth + 2) << *Pointers[I]; - if (PtrPartition) - OS << " (Partition: " << (*PtrPartition)[I] << ")"; - OS << "\n"; - OS.indent(Depth + 2) << *Pointers[J]; - if (PtrPartition) - OS << " (Partition: " << (*PtrPartition)[J] << ")"; - OS << "\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) + for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) + if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { + OS.indent(Depth) << "Check " << N++ << ":\n"; + OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; + + for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { + OS.indent(Depth + 2) << *Pointers[CheckingGroups[I].Members[K]] + << "\n"; + if (PtrPartition) + OS << " (Partition: " + << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" + << "\n"; + } + + OS.indent(Depth + 2) << "Against group " << J << ":\n"; + + for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { + OS.indent(Depth + 2) << *Pointers[CheckingGroups[J].Members[K]] + << "\n"; + if (PtrPartition) + OS << " (Partition: " + << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" + << "\n"; + } } + + OS.indent(Depth) << "Grouped accesses:\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + OS.indent(Depth + 2) << "Group " << I << ":\n"; + OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low + << " High: " << *CheckingGroups[I].High << ")\n"; + for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { + OS.indent(Depth + 6) << "Member: " << *Exprs[CheckingGroups[I].Members[J]] + << "\n"; + } + } } unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks( const SmallVectorImpl *PtrPartition) const { - unsigned NumPointers = Pointers.size(); + + unsigned NumPartitions = CheckingGroups.size(); unsigned CheckCount = 0; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) + for (unsigned I = 0; I < NumPartitions; ++I) + for (unsigned J = I + 1; J < NumPartitions; ++J) + if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) CheckCount++; return CheckCount; } bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( const SmallVectorImpl *PtrPartition) const { - return getNumberOfChecks(PtrPartition) != 0; + unsigned NumPointers = Pointers.size(); + + for (unsigned I = 0; I < NumPointers; ++I) + for (unsigned J = I + 1; J < NumPointers; ++J) + if (needsChecking(I, J, PtrPartition)) + return true; + return false; } namespace { @@ -341,7 +507,7 @@ // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -387,6 +553,9 @@ } } + if (NeedRTCheck && CanDoRT) + RtCheck.groupChecks(DepCands, IsDepCheckNeeded); + return CanDoRT; } @@ -1360,32 +1529,35 @@ if (!PtrRtCheck.Need) return std::make_pair(nullptr, nullptr); - unsigned NumPointers = PtrRtCheck.Pointers.size(); - SmallVector , 2> Starts; - SmallVector , 2> Ends; + SmallVector, 2> Starts; + SmallVector, 2> Ends; LLVMContext &Ctx = Loc->getContext(); SCEVExpander Exp(*SE, DL, "induction"); Instruction *FirstInst = nullptr; - for (unsigned i = 0; i < NumPointers; ++i) { - Value *Ptr = PtrRtCheck.Pointers[i]; + for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) { + const RuntimePointerCheck::CheckingPtrGroup &CG = + PtrRtCheck.CheckingGroups[i]; + Value *Ptr = PtrRtCheck.Pointers[CG.Members[0]]; const SCEV *Sc = SE->getSCEV(Ptr); if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << - *Ptr <<"\n"); + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr + << "\n"); Starts.push_back(Ptr); Ends.push_back(Ptr); } else { - DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n'); unsigned AS = Ptr->getType()->getPointerAddressSpace(); // Use this type for pointer arithmetic. Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + Value *Start = nullptr, *End = nullptr; - Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc); - Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc); + DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); + DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); Starts.push_back(Start); Ends.push_back(End); } @@ -1394,9 +1566,14 @@ IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; - for (unsigned i = 0; i < NumPointers; ++i) { - for (unsigned j = i+1; j < NumPointers; ++j) { - if (!PtrRtCheck.needsChecking(i, j, PtrPartition)) + for (unsigned i = 0; i < PtrRtCheck.CheckingGroups.size(); ++i) { + for (unsigned j = i + 1; j < PtrRtCheck.CheckingGroups.size(); ++j) { + const RuntimePointerCheck::CheckingPtrGroup &CGI = + PtrRtCheck.CheckingGroups[i]; + const RuntimePointerCheck::CheckingPtrGroup &CGJ = + PtrRtCheck.CheckingGroups[j]; + + if (!PtrRtCheck.needsChecking(CGI, CGJ, PtrPartition)) continue; unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); @@ -1447,8 +1624,8 @@ const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), - TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), + : PtrRtCheck(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), + AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) Index: test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll =================================================================== --- test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll +++ test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll @@ -1,19 +1,20 @@ ; RUN: opt -loop-accesses -analyze < %s | FileCheck %s -; 3 reads and 3 writes should need 12 memchecks - target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnueabi" +; 3 reads and 3 writes should need 12 memchecks +; CHECK: function 'testf': ; CHECK: Memory dependences are safe with run-time checks -; Memory dependecies have labels starting from 0, so in + +; Memory dependencies have labels starting from 0, so in ; order to verify that we have n checks, we look for ; (n-1): and not n:. ; CHECK: Run-time memory checks: -; CHECK-NEXT: 0: -; CHECK: 11: -; CHECK-NOT: 12: +; CHECK-NEXT: Check 0: +; CHECK: Check 11: +; CHECK-NOT: Check 12: define void @testf(i16* %a, i16* %b, @@ -56,3 +57,162 @@ for.end: ; preds = %for.body ret void } + +; The following (testg and testh) check that we can group +; memory checks of accesses which differ by a constant value. +; Both tests are based on the following C code: +; +; void testh(short *a, short *b, short *c) { +; unsigned long ind = 0; +; for (unsigned long ind = 0; ind < 20; ++ind) { +; c[2 * ind] = a[ind] * a[ind + 1]; +; c[2 * ind + 1] = a[ind] * a[ind + 1] * b[ind]; +; } +; } +; +; It is sufficient to check the intervals +; [a, a + 21], [b, b + 20] against [c, c + 41]. + +; 3 reads and 2 writes - two of the reads can be merged, +; and the writes can be merged as well. This gives us a +; total of 2 memory checks. + +; CHECK: function 'testg': + +; CHECK: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group 0: +; CHECK-NEXT: %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add +; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind +; CHECK-NEXT: Against group 2: +; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc +; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind +; CHECK-NEXT: Check 1: +; CHECK-NEXT: Comparing group 1: +; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind +; CHECK-NEXT: Against group 2: +; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc +; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group 0: +; CHECK-NEXT: (Low: %a High: (40 + %a)) +; CHECK-NEXT: Member: {(2 + %a),+,2} +; CHECK-NEXT: Member: {%a,+,2} +; CHECK-NEXT: Group 1: +; CHECK-NEXT: (Low: %b High: (38 + %b)) +; CHECK-NEXT: Member: {%b,+,2} +; CHECK-NEXT: Group 2: +; CHECK-NEXT: (Low: %c High: (78 + %c)) +; CHECK-NEXT: Member: {(2 + %c),+,4} +; CHECK-NEXT: Member: {%c,+,4} + +define void @testg(i16* %a, + i16* %b, + i16* %c) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ] + + %add = add nuw nsw i64 %ind, 1 + %store_ind_inc = add nuw nsw i64 %store_ind, 1 + %store_ind_next = add nuw nsw i64 %store_ind_inc, 1 + + %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind + %loadA = load i16, i16* %arrayidxA, align 2 + + %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add + %loadA1 = load i16, i16* %arrayidxA1, align 2 + + %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind + %loadB = load i16, i16* %arrayidxB, align 2 + + %mul = mul i16 %loadA, %loadA1 + %mul1 = mul i16 %mul, %loadB + + %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind + store i16 %mul1, i16* %arrayidxC, align 2 + + %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc + store i16 %mul, i16* %arrayidxC1, align 2 + + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; 3 reads and 2 writes - the writes can be merged into a single +; group, but the GEPs used for the reads are not marked as inbounds. +; We can still merge them because we are using a unit stride for +; accesses, so we cannot overflow the GEPs. + +; CHECK: function 'testh': +; CHECK: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group 0: +; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %add +; CHECK-NEXT: %arrayidxA = getelementptr i16, i16* %a, i64 %ind +; CHECK-NEXT: Against group 2: +; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc +; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind +; CHECK-NEXT: Check 1: +; CHECK-NEXT: Comparing group 1: +; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind +; CHECK-NEXT: Against group 2: +; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc +; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group 0: +; CHECK-NEXT: (Low: %a High: (40 + %a)) +; CHECK-NEXT: Member: {(2 + %a),+,2} +; CHECK-NEXT: Member: {%a,+,2} +; CHECK-NEXT: Group 1: +; CHECK-NEXT: (Low: %b High: (38 + %b)) +; CHECK-NEXT: Member: {%b,+,2} +; CHECK-NEXT: Group 2: +; CHECK-NEXT: (Low: %c High: (78 + %c)) +; CHECK-NEXT: Member: {(2 + %c),+,4} +; CHECK-NEXT: Member: {%c,+,4} + +define void @testh(i16* %a, + i16* %b, + i16* %c) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ] + + %add = add nuw nsw i64 %ind, 1 + %store_ind_inc = add nuw nsw i64 %store_ind, 1 + %store_ind_next = add nuw nsw i64 %store_ind_inc, 1 + + %arrayidxA = getelementptr i16, i16* %a, i64 %ind + %loadA = load i16, i16* %arrayidxA, align 2 + + %arrayidxA1 = getelementptr i16, i16* %a, i64 %add + %loadA1 = load i16, i16* %arrayidxA1, align 2 + + %arrayidxB = getelementptr i16, i16* %b, i64 %ind + %loadB = load i16, i16* %arrayidxB, align 2 + + %mul = mul i16 %loadA, %loadA1 + %mul1 = mul i16 %mul, %loadB + + %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind + store i16 %mul1, i16* %arrayidxC, align 2 + + %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc + store i16 %mul, i16* %arrayidxC1, align 2 + + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} Index: test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll =================================================================== --- test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll +++ test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll @@ -15,7 +15,9 @@ ; CHECK-NEXT: Interesting Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: 0: +; CHECK-NEXT: Comparing group ; CHECK-NEXT: %arrayidxA2 = getelementptr inbounds i16, i16* %a, i64 %idx +; CHECK-NEXT: Against group ; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %indvar @B = common global i16* null, align 8 Index: test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll =================================================================== --- test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll +++ test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll @@ -14,10 +14,16 @@ ; CHECK-NEXT: store i16 %mul1, i16* %arrayidxA_plus_2, align 2 ; CHECK: Run-time memory checks: ; CHECK-NEXT: 0: +; CHECK-NEXT: Comparing group +; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %storemerge3 ; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add +; CHECK-NEXT: Against group ; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %storemerge3 ; CHECK-NEXT: 1: +; CHECK-NEXT: Comparing group +; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %storemerge3 ; CHECK-NEXT: %arrayidxA_plus_2 = getelementptr inbounds i16, i16* %a, i64 %add +; CHECK-NEXT: Against group ; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %storemerge3 @B = common global i16* null, align 8 Index: test/Transforms/LoopDistribute/basic-with-memchecks.ll =================================================================== --- test/Transforms/LoopDistribute/basic-with-memchecks.ll +++ test/Transforms/LoopDistribute/basic-with-memchecks.ll @@ -32,8 +32,9 @@ %e = load i32*, i32** @E, align 8 br label %for.body -; We have two compares for each array overlap check which is a total of 10 -; compares. +; We have two compares for each array overlap check. +; Since the checks to A and A + 4 get merged, this will give us a +; total of 8 compares. ; ; CHECK: for.body.lver.memcheck: ; CHECK: = icmp @@ -48,9 +49,6 @@ ; CHECK: = icmp ; CHECK: = icmp -; CHECK: = icmp -; CHECK: = icmp - ; CHECK-NOT: = icmp ; CHECK: br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph.ldist1