Index: llvm/lib/Analysis/DependenceAnalysis.cpp =================================================================== --- llvm/lib/Analysis/DependenceAnalysis.cpp +++ llvm/lib/Analysis/DependenceAnalysis.cpp @@ -1430,8 +1430,6 @@ if (R != 0) return true; // gcd doesn't divide Delta, no dependence Q = Delta.sdiv(G); - X *= Q; - Y *= Q; return false; } @@ -1465,17 +1463,21 @@ // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], // where i is an induction variable, c1 and c2 are loop invariant, and a1 // and a2 are constant, we can solve it exactly using an algorithm developed -// by Banerjee and Wolfe. See Section 2.5.3 in +// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in: // -// Optimizing Supercompilers for Supercomputers -// Michael Wolfe -// MIT Press, 1989 +// Dependence Analysis for Supercomputing +// Utpal Banerjee +// Kluwer Academic Publishers, 1988 // // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), // so use them if possible. They're also a bit better with symbolics and, // in the case of the strong SIV test, can compute Distances. // // Return true if dependence disproved. +// +// This is a modified version of the original Banerjee algorithm. The original +// only tested whether Dst depends on Src. This algorithm extends that and +// returns all the dependencies that exist between Dst and Src. bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, const SCEV *SrcConst, const SCEV *DstConst, const Loop *CurLoop, unsigned Level, @@ -1492,8 +1494,8 @@ Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); - NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), - Delta, CurLoop); + NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, + CurLoop); const SCEVConstant *ConstDelta = dyn_cast(Delta); const SCEVConstant *ConstSrcCoeff = dyn_cast(SrcCoeff); const SCEVConstant *ConstDstCoeff = dyn_cast(DstCoeff); @@ -1504,8 +1506,9 @@ APInt G, X, Y; APInt AM = ConstSrcCoeff->getAPInt(); APInt BM = ConstDstCoeff->getAPInt(); + APInt CM = ConstDelta->getAPInt(); unsigned Bits = AM.getBitWidth(); - if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { + if (findGCD(Bits, AM, BM, CM, G, X, Y)) { // gcd doesn't divide Delta, no dependence ++ExactSIVindependence; ++ExactSIVsuccesses; @@ -1516,55 +1519,73 @@ // since SCEV construction normalizes, LM = 0 APInt UM(Bits, 1, true); - bool UMvalid = false; + bool UMValid = false; // UM is perhaps unavailable, let's check if (const SCEVConstant *CUB = - collectConstantUpperBound(CurLoop, Delta->getType())) { + collectConstantUpperBound(CurLoop, Delta->getType())) { UM = CUB->getAPInt(); LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); - UMvalid = true; + UMValid = true; } APInt TU(APInt::getSignedMaxValue(Bits)); APInt TL(APInt::getSignedMinValue(Bits)); + APInt TC = CM.sdiv(G); + APInt TX = X * TC; + APInt TY = Y * TC; + LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); + LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); + LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); + + SmallVector TLVec, TUVec; + APInt TB = BM.sdiv(G); + if (TB.sgt(0)) { + TLVec.push_back(ceilingOfQuotient(-TX, TB)); + LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); + // New bound check - modification to Banerjee's e3 check + if (UMValid) { + TUVec.push_back(floorOfQuotient(UM - TX, TB)); + LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); + } + } else { + TUVec.push_back(floorOfQuotient(-TX, TB)); + LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); + // New bound check - modification to Banerjee's e3 check + if (UMValid) { + TLVec.push_back(ceilingOfQuotient(UM - TX, TB)); + LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); + } + } + + APInt TA = AM.sdiv(G); + if (TA.sgt(0)) { + if (UMValid) { + TUVec.push_back(floorOfQuotient(UM - TY, TA)); + LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); + } + // New bound check - modification to Banerjee's e3 check + TLVec.push_back(ceilingOfQuotient(-TY, TA)); + LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); + } else { + if (UMValid) { + TLVec.push_back(ceilingOfQuotient(UM - TY, TA)); + LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); + } + // New bound check - modification to Banerjee's e3 check + TUVec.push_back(floorOfQuotient(-TY, TA)); + LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); + } + + LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); + LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); + + if (TLVec.empty() || TUVec.empty()) + return false; + TL = APIntOps::smax(TLVec.front(), TLVec.back()); + TU = APIntOps::smin(TUVec.front(), TUVec.back()); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); - // test(BM/G, LM-X) and test(-BM/G, X-UM) - APInt TMUL = BM.sdiv(G); - if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL)); - LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); - if (UMvalid) { - TU = APIntOps::smin(TU, floorOfQuotient(UM - X, TMUL)); - LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); - } - } - else { - TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL)); - LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); - if (UMvalid) { - TL = APIntOps::smax(TL, ceilingOfQuotient(UM - X, TMUL)); - LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); - } - } - - // test(AM/G, LM-Y) and test(-AM/G, Y-UM) - TMUL = AM.sdiv(G); - if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); - if (UMvalid) { - TU = APIntOps::smin(TU, floorOfQuotient(UM - Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); - } - } - else { - TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); - if (UMvalid) { - TL = APIntOps::smax(TL, ceilingOfQuotient(UM - Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); - } - } if (TL.sgt(TU)) { ++ExactSIVindependence; ++ExactSIVsuccesses; @@ -1573,77 +1594,42 @@ // explore directions unsigned NewDirection = Dependence::DVEntry::NONE; - - // less than - APInt SaveTU(TU); // save these - APInt SaveTL(TL); - LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); - TMUL = AM - BM; - if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y + 1, TMUL)); - LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); - } - else { - TU = APIntOps::smin(TU, floorOfQuotient(X - Y + 1, TMUL)); - LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); - } - if (TL.sle(TU)) { - NewDirection |= Dependence::DVEntry::LT; - ++ExactSIVsuccesses; + APInt LowerDistance, UpperDistance; + if (TA.sgt(TB)) { + LowerDistance = (TY - TX) + (TA - TB) * TL; + UpperDistance = (TY - TX) + (TA - TB) * TU; + } else { + LowerDistance = (TY - TX) + (TA - TB) * TU; + UpperDistance = (TY - TX) + (TA - TB) * TL; } - // equal - TU = SaveTU; // restore - TL = SaveTL; - LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); - if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(X - Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); - } - else { - TU = APIntOps::smin(TU, floorOfQuotient(X - Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); - } - TMUL = BM - AM; - if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X, TMUL)); - LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); - } - else { - TU = APIntOps::smin(TU, floorOfQuotient(Y - X, TMUL)); - LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); - } - if (TL.sle(TU)) { + LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n"); + LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n"); + + APInt Zero(Bits, 0, true); + if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) { NewDirection |= Dependence::DVEntry::EQ; ++ExactSIVsuccesses; } - - // greater than - TU = SaveTU; // restore - TL = SaveTL; - LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); - if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(Y - X + 1, TMUL)); - LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); - } - else { - TU = APIntOps::smin(TU, floorOfQuotient(Y - X + 1, TMUL)); - LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); - } - if (TL.sle(TU)) { + if (LowerDistance.slt(0)) { NewDirection |= Dependence::DVEntry::GT; ++ExactSIVsuccesses; } + if (UpperDistance.sgt(0)) { + NewDirection |= Dependence::DVEntry::LT; + ++ExactSIVsuccesses; + } // finished Result.DV[Level].Direction &= NewDirection; if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) ++ExactSIVindependence; + LLVM_DEBUG(dbgs() << "\t Result = "); + LLVM_DEBUG(Result.dump(dbgs())); return Result.DV[Level].Direction == Dependence::DVEntry::NONE; } - // Return true if the divisor evenly divides the dividend. static bool isRemainderZero(const SCEVConstant *Dividend, @@ -1903,8 +1889,9 @@ APInt G, X, Y; APInt AM = ConstSrcCoeff->getAPInt(); APInt BM = ConstDstCoeff->getAPInt(); + APInt CM = ConstDelta->getAPInt(); unsigned Bits = AM.getBitWidth(); - if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { + if (findGCD(Bits, AM, BM, CM, G, X, Y)) { // gcd doesn't divide Delta, no dependence ++ExactRDIVindependence; return true; @@ -1917,7 +1904,7 @@ bool SrcUMvalid = false; // SrcUM is perhaps unavailable, let's check if (const SCEVConstant *UpperBound = - collectConstantUpperBound(SrcLoop, Delta->getType())) { + collectConstantUpperBound(SrcLoop, Delta->getType())) { SrcUM = UpperBound->getAPInt(); LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); SrcUMvalid = true; @@ -1927,7 +1914,7 @@ bool DstUMvalid = false; // UM is perhaps unavailable, let's check if (const SCEVConstant *UpperBound = - collectConstantUpperBound(DstLoop, Delta->getType())) { + collectConstantUpperBound(DstLoop, Delta->getType())) { DstUM = UpperBound->getAPInt(); LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); DstUMvalid = true; @@ -1935,44 +1922,59 @@ APInt TU(APInt::getSignedMaxValue(Bits)); APInt TL(APInt::getSignedMinValue(Bits)); - - // test(BM/G, LM-X) and test(-BM/G, X-UM) - APInt TMUL = BM.sdiv(G); - if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(-X, TMUL)); - LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); + APInt TC = CM.sdiv(G); + APInt TX = X * TC; + APInt TY = Y * TC; + LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); + LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); + LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); + + SmallVector TLVec, TUVec; + APInt TB = BM.sdiv(G); + if (TB.sgt(0)) { + TLVec.push_back(ceilingOfQuotient(-TX, TB)); + LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); if (SrcUMvalid) { - TU = APIntOps::smin(TU, floorOfQuotient(SrcUM - X, TMUL)); - LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); + TUVec.push_back(floorOfQuotient(SrcUM - TX, TB)); + LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); } - } - else { - TU = APIntOps::smin(TU, floorOfQuotient(-X, TMUL)); - LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } else { + TUVec.push_back(floorOfQuotient(-TX, TB)); + LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); if (SrcUMvalid) { - TL = APIntOps::smax(TL, ceilingOfQuotient(SrcUM - X, TMUL)); - LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); + TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB)); + LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); } } - // test(AM/G, LM-Y) and test(-AM/G, Y-UM) - TMUL = AM.sdiv(G); - if (TMUL.sgt(0)) { - TL = APIntOps::smax(TL, ceilingOfQuotient(-Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); + APInt TA = AM.sdiv(G); + if (TA.sgt(0)) { + TLVec.push_back(ceilingOfQuotient(-TY, TA)); + LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); if (DstUMvalid) { - TU = APIntOps::smin(TU, floorOfQuotient(DstUM - Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); + TUVec.push_back(floorOfQuotient(DstUM - TY, TA)); + LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); } - } - else { - TU = APIntOps::smin(TU, floorOfQuotient(-Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); + } else { + TUVec.push_back(floorOfQuotient(-TY, TA)); + LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); if (DstUMvalid) { - TL = APIntOps::smax(TL, ceilingOfQuotient(DstUM - Y, TMUL)); - LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); + TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA)); + LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); } } + + if (TLVec.empty() || TUVec.empty()) + return false; + + LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); + LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); + + TL = APIntOps::smax(TLVec.front(), TLVec.back()); + TU = APIntOps::smin(TUVec.front(), TUVec.back()); + LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); + LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); + if (TL.sgt(TU)) ++ExactRDIVindependence; return TL.sgt(TU); Index: llvm/test/Analysis/DependenceAnalysis/Coupled.ll =================================================================== --- llvm/test/Analysis/DependenceAnalysis/Coupled.ll +++ llvm/test/Analysis/DependenceAnalysis/Coupled.ll @@ -90,7 +90,7 @@ ; CHECK-LABEL: couple2 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [*|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -250,7 +250,7 @@ ; CHECK-LABEL: couple6 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [=|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -438,7 +438,7 @@ ; CHECK-LABEL: couple11 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [>]! +; CHECK: da analyze - flow [=|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -476,7 +476,7 @@ ; CHECK-LABEL: couple12 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [>]! +; CHECK: da analyze - flow [<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -551,7 +551,7 @@ ; CHECK-LABEL: couple14 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [=|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! Index: llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll =================================================================== --- llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll +++ llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll @@ -16,7 +16,7 @@ ; CHECK-LABEL: exact0 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [>]! +; CHECK: da analyze - flow [<=|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -130,7 +130,7 @@ ; CHECK-LABEL: exact3 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [>]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -167,7 +167,7 @@ ; CHECK-LABEL: exact4 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [>]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -204,7 +204,7 @@ ; CHECK-LABEL: exact5 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [=>|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -241,7 +241,7 @@ ; CHECK-LABEL: exact6 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [=>|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -278,7 +278,7 @@ ; CHECK-LABEL: exact7 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [*|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -352,7 +352,7 @@ ; CHECK-LABEL: exact9 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [>]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -389,7 +389,7 @@ ; CHECK-LABEL: exact10 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [>]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -426,7 +426,7 @@ ; CHECK-LABEL: exact11 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [=>|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -463,7 +463,7 @@ ; CHECK-LABEL: exact12 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [=>|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -500,7 +500,7 @@ ; CHECK-LABEL: exact13 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [<]! +; CHECK: da analyze - flow [*|<]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused!