diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1701,6 +1701,10 @@ return std::unique(adl_begin(R), adl_end(R), P); } +template auto unique(Range &&R) { + return std::unique(adl_begin(R), adl_end(R)); +} + /// Wrapper function around std::equal to detect if pair-wise elements between /// two ranges are the same. template bool equal(L &&LRange, R &&RRange) { diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h --- a/llvm/include/llvm/Analysis/Delinearization.h +++ b/llvm/include/llvm/Analysis/Delinearization.h @@ -24,6 +24,7 @@ class GetElementPtrInst; class ScalarEvolution; class SCEV; +class Loop; /// Compute the array dimensions Sizes from the set of Terms extracted from /// the memory access function of this SCEVAddRecExpr (second step of @@ -42,7 +43,8 @@ /// (third step of delinearization). void computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes); + ArrayRef Sizes); + /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the /// subscripts and sizes of an array access. /// @@ -68,7 +70,7 @@ /// /// Example: /// -/// A[][n][m] +/// float A[][n][m] /// /// for i /// for j @@ -78,6 +80,8 @@ /// The initial SCEV: /// /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k] +/// (Note: The SCEV of a pointer is in bytes, here we use the argument +/// of the GetElementPtr for illustration) /// /// 1. Find the different terms in the step functions: /// -> [2*m, 5, n*m, n*m] @@ -106,11 +110,17 @@ /// /// The subscript of the outermost dimension is the Quotient: [j+k]. /// -/// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. +/// Overall, for the address of the above memory access we have: A[][n][m][4], +/// and the access function: A[j+k][2i][5i][0] where [4] is the array element +/// size in bytes and [0] is the byte offset into this element. void delinearize(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes, const SCEV *ElementSize); +bool delinearizeAccess(ScalarEvolution &SE, Value *PtrVal, Type *AccessTy, + Loop *Scope, SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes); + /// Gathers the individual index expressions from a GEP instruction. /// /// This function optimistically assumes the GEP references into a fixed size diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp --- a/llvm/lib/Analysis/Delinearization.cpp +++ b/llvm/lib/Analysis/Delinearization.cpp @@ -344,7 +344,7 @@ void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr, SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes) { + ArrayRef Sizes) { // Early exit in case this SCEV is not an affine multivariate function. if (Sizes.empty()) return; @@ -359,6 +359,13 @@ const SCEV *Q, *R; SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R); + // if (i == Last && !isa(R)) { + // R = Res; + // Q = SE.getZero(R->getType()); + // Q = SE.getUDivExpr(Res, Sizes[i]); + // R = SE.getURemExpr(Res, Sizes[i]); + // } + LLVM_DEBUG({ dbgs() << "Res: " << *Res << "\n"; dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; @@ -369,19 +376,6 @@ Res = Q; - // Do not record the last subscript corresponding to the size of elements in - // the array. - if (i == Last) { - - // Bail out if the byte offset is non-zero. - if (!R->isZero()) { - Subscripts.clear(); - Sizes.clear(); - return; - } - - continue; - } // Record the access function for the current subscript. Subscripts.push_back(R); @@ -431,8 +425,7 @@ /// and then SCEV->delinearize determines the size of some of the dimensions of /// the array as these are the multiples by which the strides are happening: /// -/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) -/// bytes. +/// CHECK: ArrayDecl[UnknownSize][%m][%o][sizeof(double)] /// /// Note that the outermost dimension remains of UnknownSize because there are /// no strides that would help identifying the size of the last dimension: when @@ -443,7 +436,7 @@ /// Finally delinearize provides the access functions for the array reference /// that does correspond to A[i][j][k] of the above C testcase: /// -/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>][0] /// /// The testcases are checking the output of a function pass: /// DelinearizationPass that walks through all loads and stores of a function @@ -485,6 +478,21 @@ }); } +class SCEVEliminateParams : public SCEVRewriteVisitor { +public: + static const SCEV *rewrite(ScalarEvolution &SE, const SCEV *E) { + SCEVEliminateParams Rewriter(SE); + return Rewriter.visit(E); + } + + SCEVEliminateParams(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {} + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + auto Ty = Expr->getType(); + return SE.getZero(Ty); + } +}; + bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE, const GetElementPtrInst *GEP, SmallVectorImpl &Subscripts, @@ -523,6 +531,136 @@ return !Subscripts.empty(); } +bool llvm::delinearizeAccess(ScalarEvolution &SE, Value *PtrVal, Type *AccessTy, + Loop *Scope, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes) { + + const SCEV *PtrExpr = SE.getSCEVAtScope(PtrVal, Scope); + + const SCEVUnknown *BasePointer = + dyn_cast(SE.getPointerBase(PtrExpr)); + // Do not delinearize if we cannot find the base pointer. + if (!BasePointer) + return false; + + const SCEV *AccessFn = SE.getMinusSCEV(PtrExpr, BasePointer); + + + + Type *PtrItTy = SE.getEffectiveSCEVType(PtrVal->getType()); + const SCEV *AccessSize = SE.getSizeOfExpr(PtrItTy, AccessTy); + + + SmallVector CandidateElementSize; + + // 1. For non-structure-of-arrays, the access size is the element size. + if (const SCEVConstant *C = dyn_cast_or_null(AccessSize)) + CandidateElementSize.push_back(C->getAPInt()); + + + // 2. Try to infer the element size from the GEP. + SmallVector GEPChain; + Value *GEPStart = PtrVal; + while (GEPOperator *GEP = dyn_cast(GEPStart)) { + GEPChain.push_back(GEP); + GEPStart = GEP->getPointerOperand(); + } + std::reverse(GEPChain.begin(), GEPChain.end()); + + auto AddCandidateStructTy = [&](Type *Ty) { + const SCEV* SizeExpr = SE.getSizeOfExpr(PtrItTy, Ty); + if (!SizeExpr || SizeExpr->isZero()) + return; + if (const SCEVConstant *C = dyn_cast_or_null(SizeExpr)) + CandidateElementSize.push_back(C->getAPInt()); + }; + + for (GEPOperator *GEP : GEPChain) { + Type *SrcType = GEP->getSourceElementType(); + AddCandidateStructTy(SrcType); + // Value* PtrIndex = *GEP->indices().begin(); + SmallVector Indices; + llvm::append_range(Indices, llvm::drop_begin(GEP->indices(), 1)); + for (auto Idx : Indices) { + SrcType = GetElementPtrInst::getTypeAtIndex(SrcType, Idx); + AddCandidateStructTy(SrcType); + } + GEPStart = GEP; + } + assert(GEPStart == PtrVal); + + // 3. Infer that array element size from the SCEV stride. + SCEVEliminateParams Eliminator(SE); + SmallVector Strides; + SCEVCollectStrides StrideCollector(SE, Strides); + visitAll(AccessFn, StrideCollector); + for (const SCEV *Stride : StrideCollector.Strides) { + auto ReducedStride = Eliminator.visit(Stride); + if (const SCEVConstant *C = + dyn_cast_or_null(ReducedStride)) { + CandidateElementSize.push_back(C->getAPInt()); + } + } + + + // Use largest ElementSize that does give a benefit, fall back to AccessSize + // if not does. + + const SCEV *ElementSize = AccessSize; + llvm::sort(CandidateElementSize, + [](const APInt &LHS, const APInt &RHS) { return LHS.ult(RHS); }); + CandidateElementSize.erase(llvm::unique(CandidateElementSize), + CandidateElementSize.end()); + for (auto S : llvm::reverse(CandidateElementSize)) { + if (S.isZero()) + continue; + + const SCEV *Q, *R; + SCEVDivision::divide(SE, AccessFn, SE.getConstant(S), &Q, &R); + + // Only consider element sizes where the subscript will be constant. This must be the case for structure accesses. + if (isa(R)) { + ElementSize = SE.getConstant(S); + break; + } + } + + + // First step: collect parametric terms. + SmallVector Terms; + collectParametricTerms(SE, AccessFn, Terms); + + if (Terms.empty()) + return false; + + + // Second step: find subscript sizes. + findArrayDimensions(SE, Terms, Sizes, ElementSize); + + if (Sizes.empty()) + return false; + + // Third step: compute the access functions for each subscript. + computeAccessFunctions(SE, AccessFn, Subscripts, Sizes); + + if (Subscripts.empty()) + return false; + + LLVM_DEBUG({ + dbgs() << "succeeded to delinearize " << *AccessFn << "\n"; + dbgs() << "ArrayDecl[UnknownSize]"; + for (const SCEV *S : Sizes) + dbgs() << "[" << *S << "]"; + + dbgs() << "\nArrayRef"; + for (const SCEV *S : Subscripts) + dbgs() << "[" << *S << "]"; + dbgs() << "\n"; + }); + return true; +} + namespace { class Delinearization : public FunctionPass { @@ -548,8 +686,7 @@ O << "Delinearization on function " << F->getName() << ":\n"; for (Instruction &Inst : instructions(F)) { // Only analyze loads and stores. - if (!isa(&Inst) && !isa(&Inst) && - !isa(&Inst)) + if (!isa(&Inst) && !isa(&Inst)) continue; const BasicBlock *BB = Inst.getParent(); @@ -558,6 +695,9 @@ for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) { const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(&Inst), L); + auto PtrVal = getPointerOperand(&Inst); + auto AccessTy = getLoadStoreType(&Inst); + const SCEVUnknown *BasePointer = dyn_cast(SE->getPointerBase(AccessFn)); // Do not delinearize if we cannot find the base pointer. @@ -571,23 +711,22 @@ O << "AccessFunction: " << *AccessFn << "\n"; SmallVector Subscripts, Sizes; - delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst)); + delinearizeAccess(*SE, PtrVal, AccessTy, L, Subscripts, Sizes); if (Subscripts.size() == 0 || Sizes.size() == 0 || - Subscripts.size() != Sizes.size()) { + Subscripts.size() != Sizes.size() + 1) { O << "failed to delinearize\n"; continue; } O << "Base offset: " << *BasePointer << "\n"; O << "ArrayDecl[UnknownSize]"; - int Size = Subscripts.size(); - for (int i = 0; i < Size - 1; i++) - O << "[" << *Sizes[i] << "]"; - O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; + for (const SCEV *Size : Sizes) + O << "[" << *Size << "]"; + O << "\n"; O << "ArrayRef"; - for (int i = 0; i < Size; i++) - O << "[" << *Subscripts[i] << "]"; + for (const SCEV *Subscript : Subscripts) + O << "[" << *Subscript << "]"; O << "\n"; } } diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3451,6 +3451,22 @@ computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes); computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes); + // computeAccessFunctions also returns the by offset into the array element, + // which DependenceAnalysis does not handle. Pop that last subscript and bail + // out if it is not zero. + // TODO: Support memory accesses that fall into the array element's byte + // range. + if (!SrcSubscripts.empty()) { + const SCEV *EltOffset = SrcSubscripts.pop_back_val(); + if (!EltOffset->isZero()) + return false; + } + if (!DstSubscripts.empty()) { + const SCEV *EltOffset = DstSubscripts.pop_back_val(); + if (!EltOffset->isZero()) + return false; + } + // Fail when there is only a subscript: that's a linearized access function. if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || SrcSubscripts.size() != DstSubscripts.size()) diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -348,15 +348,21 @@ llvm::delinearize(SE, AccessFn, Subscripts, Sizes, SE.getElementSize(&StoreOrLoadInst)); + // Eliminate the innermost subscript (for the byte offset to the array + // element). LoopCacheAnalysis assumes it is zero. + if (!Subscripts.empty() && Subscripts.back()->isZero()) + Subscripts.pop_back(); + if (Subscripts.empty() || Sizes.empty() || Subscripts.size() != Sizes.size()) { + Subscripts.clear(); + Sizes.clear(); + // Attempt to determine whether we have a single dimensional array access. // before giving up. if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) { LLVM_DEBUG(dbgs().indent(2) << "ERROR: failed to delinearize reference\n"); - Subscripts.clear(); - Sizes.clear(); return false; } diff --git a/llvm/test/Analysis/Delinearization/a.ll b/llvm/test/Analysis/Delinearization/a.ll --- a/llvm/test/Analysis/Delinearization/a.ll +++ b/llvm/test/Analysis/Delinearization/a.ll @@ -9,8 +9,8 @@ ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes. -; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][%m][%o][4] +; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>][0] define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 { entry: diff --git a/llvm/test/Analysis/Delinearization/array_of_struct.ll b/llvm/test/Analysis/Delinearization/array_of_struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/Delinearization/array_of_struct.ll @@ -0,0 +1,70 @@ +; RUN: opt < %s -analyze -enable-new-pm=0 -delinearize | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; CHECK: Inst: store i64 0, i64* %y, align 1, !tbaa !3 +; CHECK: In Loop with Header: for.cond +; CHECK: AccessFunction: {4,+,(12 + (12 * %N))}<%for.cond> +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%N][12] +; CHECK: ArrayRef[{0,+,1}<%for.cond>][{0,+,1}<%for.cond>][4] + +; struct __attribute__((packed)) Pair { int x; long y; }; +; void foo(unsigned long N, struct Pair A[][N]) { +; for (unsigned long i = 0; i < N; i+=1) +; A[i][i].y = 0; +; } + + + +source_filename = "byte_offset.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.Pair = type <{ i32, i64 }> + +; Function Attrs: nounwind uwtable +define dso_local void @foo(i64 %N, %struct.Pair* %A) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ] + %cmp = icmp ult i64 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %0 = mul nsw i64 %i.0, %N + %arrayidx = getelementptr inbounds %struct.Pair, %struct.Pair* %A, i64 %0 + %arrayidx1 = getelementptr inbounds %struct.Pair, %struct.Pair* %arrayidx, i64 %i.0 + %y = getelementptr inbounds %struct.Pair, %struct.Pair* %arrayidx1, i64 0, i32 1 + store i64 0, i64* %y, align 1, !tbaa !3 + %add = add i64 %i.0, 1 + br label %for.cond, !llvm.loop !9 + +for.end: ; preds = %for.cond + ret void +} + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { argmemonly nofree nosync nounwind willreturn } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"uwtable", i32 1} +!2 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 164631ce80ecbcc0af069cb6ddd29fef726f222a)"} +!3 = !{!4, !8, i64 4} +!4 = !{!"Pair", !5, i64 0, !8, i64 4} +!5 = !{!"int", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} +!8 = !{!"long", !6, i64 0} +!9 = distinct !{!9, !10} +!10 = !{!"llvm.loop.mustprogress"} \ No newline at end of file diff --git a/llvm/test/Analysis/Delinearization/array_of_struct3.ll b/llvm/test/Analysis/Delinearization/array_of_struct3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/Delinearization/array_of_struct3.ll @@ -0,0 +1,125 @@ +; RUN: opt < %s -analyze -enable-new-pm=0 -delinearize | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; CHECK: Inst: %3 = load double, double* %b, align 1, !tbaa !3 +; CHECK-NEXT: In Loop with Header: for.cond6 +; CHECK-NEXT: AccessFunction: {{\{\{\{}}(4 + (-13 * %n * %m)),+,(13 * %n * %m)}<%for.cond>,+,(13 * %m)}<%for.cond1>,+,13}<%for.cond6> +; CHECK-NEXT: Base offset: %f1 +; CHECK-NEXT: ArrayDecl[UnknownSize][%n][%m][13] +; CHECK-NEXT: ArrayRef[{-1,+,1}<%for.cond>][{0,+,1}<%for.cond1>][{0,+,1}<%for.cond6>][4] + +; CHECK: AccessFunction: {{\{\{\{}}4,+,(13 * %n * %m)}<%for.cond>,+,(13 * %m)}<%for.cond1>,+,13}<%for.cond6> +; CHECK-NEXT: Base offset: %f1 +; CHECK-NEXT: ArrayDecl[UnknownSize][%n][%m][13] +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.cond>][{0,+,1}<%for.cond1>][{0,+,1}<%for.cond6>][4] + +; struct __attribute__((packed)) MyS { +; float a; +; double b; +; char c; +; }; +; void foo(long long n, long long m, struct MyS f1[][n][m]) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < n; j++) +; for (int k = 0; k < m; k++) +; f1[i][j][k].b = f1[i-1][j][k].b; +; } + +source_filename = "array_of_struct2.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.MyS = type <{ float, double, i8 }> + +; Function Attrs: nounwind uwtable +define dso_local void @foo(i64 %n, i64 %m, %struct.MyS* %f1) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc26, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc27, %for.inc26 ] + %cmp = icmp ult i32 %i.0, 1024 + br i1 %cmp, label %for.cond1.preheader, label %for.end28 + +for.cond1.preheader: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.cond1.preheader, %for.inc23 + %j.0 = phi i32 [ %inc24, %for.inc23 ], [ 0, %for.cond1.preheader ] + %conv = zext i32 %j.0 to i64 + %cmp2 = icmp slt i64 %conv, %n + br i1 %cmp2, label %for.cond6.preheader, label %for.inc26 + +for.cond6.preheader: ; preds = %for.cond1 + br label %for.cond6 + +for.cond6: ; preds = %for.cond6.preheader, %for.body11 + %k.0 = phi i32 [ %inc, %for.body11 ], [ 0, %for.cond6.preheader ] + %conv7 = zext i32 %k.0 to i64 + %cmp8 = icmp slt i64 %conv7, %m + br i1 %cmp8, label %for.body11, label %for.inc23 + +for.body11: ; preds = %for.cond6 + %sub = add nsw i32 %i.0, -1 + %idxprom = sext i32 %sub to i64 + %0 = mul nuw i64 %n, %m + %1 = mul nsw i64 %0, %idxprom + %arrayidx = getelementptr inbounds %struct.MyS, %struct.MyS* %f1, i64 %1 + %idxprom12 = zext i32 %j.0 to i64 + %2 = mul nsw i64 %idxprom12, %m + %arrayidx13 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx, i64 %2 + %idxprom14 = zext i32 %k.0 to i64 + %arrayidx15 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx13, i64 %idxprom14 + %b = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx15, i64 0, i32 1 + %3 = load double, double* %b, align 1, !tbaa !3 + %idxprom16 = zext i32 %i.0 to i64 + %4 = mul nuw i64 %n, %m + %5 = mul nsw i64 %4, %idxprom16 + %arrayidx17 = getelementptr inbounds %struct.MyS, %struct.MyS* %f1, i64 %5 + %idxprom18 = zext i32 %j.0 to i64 + %6 = mul nsw i64 %idxprom18, %m + %arrayidx19 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx17, i64 %6 + %idxprom20 = zext i32 %k.0 to i64 + %arrayidx21 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx19, i64 %idxprom20 + %b22 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx21, i64 0, i32 1 + store double %3, double* %b22, align 1, !tbaa !3 + %inc = add nuw nsw i32 %k.0, 1 + br label %for.cond6, !llvm.loop !9 + +for.inc23: ; preds = %for.cond6 + %inc24 = add nuw nsw i32 %j.0, 1 + br label %for.cond1, !llvm.loop !11 + +for.inc26: ; preds = %for.cond1 + %inc27 = add nuw nsw i32 %i.0, 1 + br label %for.cond, !llvm.loop !12 + +for.end28: ; preds = %for.cond + ret void +} + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { argmemonly nofree nosync nounwind willreturn } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"uwtable", i32 1} +!2 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 8f792707c4e5208bba08d837504c93de28b1f209)"} +!3 = !{!4, !8, i64 4} +!4 = !{!"MyS", !5, i64 0, !8, i64 4, !6, i64 12} +!5 = !{!"float", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} +!8 = !{!"double", !6, i64 0} +!9 = distinct !{!9, !10} +!10 = !{!"llvm.loop.mustprogress"} +!11 = distinct !{!11, !10} +!12 = distinct !{!12, !10} diff --git a/llvm/test/Analysis/Delinearization/byte_offset.ll b/llvm/test/Analysis/Delinearization/byte_offset.ll --- a/llvm/test/Analysis/Delinearization/byte_offset.ll +++ b/llvm/test/Analysis/Delinearization/byte_offset.ll @@ -1,8 +1,12 @@ ; RUN: opt < %s -analyze -enable-new-pm=0 -delinearize | FileCheck %s ; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s +; CHECK: Inst: store float 0.000000e+00, float* %ptr, align 4 +; CHECK: In Loop with Header: inner.loop ; CHECK: AccessFunction: ({0,+,%i2}<%outer.loop> + %unknown) -; CHECK-NEXT: failed to delinearize +; CHECK: Base offset: %A +; CHECK: ArrayDecl[UnknownSize][%i2][4] +; CHECK: ArrayRef[0][0][({0,+,%i2}<%outer.loop> + %unknown)] ; void foo(char A[], long i2, bool c) { ; for (long i = 0; ; ++i) { diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll --- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll +++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll @@ -6,15 +6,15 @@ ; CHECK-NEXT: In Loop with Header: for.inc ; CHECK-NEXT: AccessFunction: {(4 * %N * %call),+,4}<%for.inc> ; CHECK-NEXT: Base offset: %A -; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes. -; CHECK-NEXT: ArrayRef[%call][{0,+,1}<%for.inc>] +; CHECK-NEXT: ArrayDecl[UnknownSize][%N][4] +; CHECK-NEXT: ArrayRef[%call][{0,+,1}<%for.inc>][0] ; CHECK: Inst: %tmp5 = load float, float* %arrayidx4, align 4 ; CHECK-NEXT: In Loop with Header: for.inc ; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc> ; CHECK-NEXT: Base offset: %B -; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes. -; CHECK-NEXT: ArrayRef[{0,+,1}<%for.inc>][%call1] +; CHECK-NEXT: ArrayDecl[UnknownSize][%N][4] +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.inc>][%call1][0] ; Function Attrs: noinline nounwind uwtable define void @mat_mul(float* %C, float* %A, float* %B, i64 %N) #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { diff --git a/llvm/test/Analysis/Delinearization/divide_by_one.ll b/llvm/test/Analysis/Delinearization/divide_by_one.ll --- a/llvm/test/Analysis/Delinearization/divide_by_one.ll +++ b/llvm/test/Analysis/Delinearization/divide_by_one.ll @@ -13,14 +13,14 @@ ; AddRec: {{(-1 + ((1 + %bs) * %stride)),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<%for.body3> ; CHECK: Inst: %0 = load i8, i8* %arrayidx, align 1 ; CHECK: Base offset: %dst -; CHECK: ArrayDecl[UnknownSize][%stride] with elements of 1 bytes. -; CHECK: ArrayRef[{(1 + %bs),+,-1}<%for.cond1.preheader>][{-1,+,1}<%for.body3>] +; CHECK: ArrayDecl[UnknownSize][%stride][1] +; CHECK: ArrayRef[{(1 + %bs),+,-1}<%for.cond1.preheader>][{-1,+,1}<%for.body3>][0] ; AddRec: {{(%stride * %bs),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<%for.body3> ; CHECK: Inst: store i8 %0, i8* %arrayidx7, align 1 ; CHECK: Base offset: %dst -; CHECK: ArrayDecl[UnknownSize][%stride] with elements of 1 bytes. -; CHECK: ArrayRef[{%bs,+,-1}<%for.cond1.preheader>][{0,+,1}<%for.body3>] +; CHECK: ArrayDecl[UnknownSize][%stride][1] +; CHECK: ArrayRef[{%bs,+,-1}<%for.cond1.preheader>][{0,+,1}<%for.body3>][0] define void @test(i8* nocapture %dst, i32 %stride, i32 %bs) { entry: diff --git a/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll b/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll deleted file mode 100644 --- a/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll +++ /dev/null @@ -1,153 +0,0 @@ -; RUN: opt < %s -aa-pipeline=basic-aa -passes='require,print' -disable-output -; -; a, b, c, d, g, h; -; char *f; -; static fn1(p1) { -; char *e = p1; -; for (; d;) { -; a = 0; -; for (;; ++a) -; for (; b; ++b) -; c = e[b + a]; -; } -; } -; -; fn2() { -; for (;;) -; fn1(&f[g * h]); -; } - -@g = common global i32 0, align 4 -@h = common global i32 0, align 4 -@f = common global i8* null, align 4 -@a = common global i32 0, align 4 -@b = common global i32 0, align 4 -@c = common global i32 0, align 4 -@d = common global i32 0, align 4 - -define i32 @fn2() { -entry: - %.pr = load i32, i32* @d, align 4 - %phitmp = icmp eq i32 %.pr, 0 - br label %for.cond - -for.cond: - %0 = phi i1 [ true, %for.cond ], [ %phitmp, %entry ] - br i1 %0, label %for.cond, label %for.cond2thread-pre-split.preheader.i - -for.cond2thread-pre-split.preheader.i: - %1 = load i32, i32* @g, align 4 - %2 = load i32, i32* @h, align 4 - %mul = mul nsw i32 %2, %1 - %3 = load i8*, i8** @f, align 4 - %.pr.pre.i = load i32, i32* @b, align 4 - br label %for.cond2thread-pre-split.i - -for.cond2thread-pre-split.i: - %.pr.i = phi i32 [ 0, %for.inc5.i ], [ %.pr.pre.i, %for.cond2thread-pre-split.preheader.i ] - %storemerge.i = phi i32 [ %inc6.i, %for.inc5.i ], [ 0, %for.cond2thread-pre-split.preheader.i ] - store i32 %storemerge.i, i32* @a, align 4 - %tobool31.i = icmp eq i32 %.pr.i, 0 - br i1 %tobool31.i, label %for.inc5.i, label %for.body4.preheader.i - -for.body4.preheader.i: - %4 = icmp slt i32 %.pr.i, -7 - %add.i = add i32 %storemerge.i, %mul - br i1 %4, label %for.body4.i.preheader, label %for.body4.ur.i.preheader - -for.body4.i.preheader: - %5 = sub i32 -8, %.pr.i - %6 = lshr i32 %5, 3 - %7 = mul i32 %6, 8 - br label %for.body4.i - -for.body4.i: - %8 = phi i32 [ %inc.7.i, %for.body4.i ], [ %.pr.i, %for.body4.i.preheader ] - %arrayidx.sum1 = add i32 %add.i, %8 - %arrayidx.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum1 - %9 = load i8, i8* %arrayidx.i, align 1 - %conv.i = sext i8 %9 to i32 - store i32 %conv.i, i32* @c, align 4 - %inc.i = add nsw i32 %8, 1 - store i32 %inc.i, i32* @b, align 4 - %arrayidx.sum2 = add i32 %add.i, %inc.i - %arrayidx.1.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum2 - %10 = load i8, i8* %arrayidx.1.i, align 1 - %conv.1.i = sext i8 %10 to i32 - store i32 %conv.1.i, i32* @c, align 4 - %inc.1.i = add nsw i32 %8, 2 - store i32 %inc.1.i, i32* @b, align 4 - %arrayidx.sum3 = add i32 %add.i, %inc.1.i - %arrayidx.2.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum3 - %11 = load i8, i8* %arrayidx.2.i, align 1 - %conv.2.i = sext i8 %11 to i32 - store i32 %conv.2.i, i32* @c, align 4 - %inc.2.i = add nsw i32 %8, 3 - store i32 %inc.2.i, i32* @b, align 4 - %arrayidx.sum4 = add i32 %add.i, %inc.2.i - %arrayidx.3.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum4 - %12 = load i8, i8* %arrayidx.3.i, align 1 - %conv.3.i = sext i8 %12 to i32 - store i32 %conv.3.i, i32* @c, align 4 - %inc.3.i = add nsw i32 %8, 4 - store i32 %inc.3.i, i32* @b, align 4 - %arrayidx.sum5 = add i32 %add.i, %inc.3.i - %arrayidx.4.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum5 - %13 = load i8, i8* %arrayidx.4.i, align 1 - %conv.4.i = sext i8 %13 to i32 - store i32 %conv.4.i, i32* @c, align 4 - %inc.4.i = add nsw i32 %8, 5 - store i32 %inc.4.i, i32* @b, align 4 - %arrayidx.sum6 = add i32 %add.i, %inc.4.i - %arrayidx.5.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum6 - %14 = load i8, i8* %arrayidx.5.i, align 1 - %conv.5.i = sext i8 %14 to i32 - store i32 %conv.5.i, i32* @c, align 4 - %inc.5.i = add nsw i32 %8, 6 - store i32 %inc.5.i, i32* @b, align 4 - %arrayidx.sum7 = add i32 %add.i, %inc.5.i - %arrayidx.6.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum7 - %15 = load i8, i8* %arrayidx.6.i, align 1 - %conv.6.i = sext i8 %15 to i32 - store i32 %conv.6.i, i32* @c, align 4 - %inc.6.i = add nsw i32 %8, 7 - store i32 %inc.6.i, i32* @b, align 4 - %arrayidx.sum8 = add i32 %add.i, %inc.6.i - %arrayidx.7.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum8 - %16 = load i8, i8* %arrayidx.7.i, align 1 - %conv.7.i = sext i8 %16 to i32 - store i32 %conv.7.i, i32* @c, align 4 - %inc.7.i = add nsw i32 %8, 8 - store i32 %inc.7.i, i32* @b, align 4 - %tobool3.7.i = icmp sgt i32 %inc.7.i, -8 - br i1 %tobool3.7.i, label %for.inc5.loopexit.ur-lcssa.i, label %for.body4.i - -for.inc5.loopexit.ur-lcssa.i: - %17 = add i32 %.pr.i, 8 - %18 = add i32 %17, %7 - %19 = icmp eq i32 %18, 0 - br i1 %19, label %for.inc5.i, label %for.body4.ur.i.preheader - -for.body4.ur.i.preheader: - %.ph = phi i32 [ %18, %for.inc5.loopexit.ur-lcssa.i ], [ %.pr.i, %for.body4.preheader.i ] - br label %for.body4.ur.i - -for.body4.ur.i: - %20 = phi i32 [ %inc.ur.i, %for.body4.ur.i ], [ %.ph, %for.body4.ur.i.preheader ] - %arrayidx.sum = add i32 %add.i, %20 - %arrayidx.ur.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum - %21 = load i8, i8* %arrayidx.ur.i, align 1 - %conv.ur.i = sext i8 %21 to i32 - store i32 %conv.ur.i, i32* @c, align 4 - %inc.ur.i = add nsw i32 %20, 1 - store i32 %inc.ur.i, i32* @b, align 4 - %tobool3.ur.i = icmp eq i32 %inc.ur.i, 0 - br i1 %tobool3.ur.i, label %for.inc5.i.loopexit, label %for.body4.ur.i - -for.inc5.i.loopexit: - br label %for.inc5.i - -for.inc5.i: - %inc6.i = add nsw i32 %storemerge.i, 1 - br label %for.cond2thread-pre-split.i -} diff --git a/llvm/test/Analysis/Delinearization/himeno_1.ll b/llvm/test/Analysis/Delinearization/himeno_1.ll --- a/llvm/test/Analysis/Delinearization/himeno_1.ll +++ b/llvm/test/Analysis/Delinearization/himeno_1.ll @@ -28,8 +28,8 @@ ; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k> ; CHECK: Base offset: %a.base -; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes. -; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)][4] +; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>][0] %struct.Mat = type { float*, i32, i32, i32, i32 } diff --git a/llvm/test/Analysis/Delinearization/himeno_2.ll b/llvm/test/Analysis/Delinearization/himeno_2.ll --- a/llvm/test/Analysis/Delinearization/himeno_2.ll +++ b/llvm/test/Analysis/Delinearization/himeno_2.ll @@ -28,8 +28,8 @@ ; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k> ; CHECK: Base offset: %a.base -; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes. -; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)][4] +; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>][0] %struct.Mat = type { float*, i32, i32, i32, i32 } diff --git a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll --- a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll +++ b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll @@ -10,10 +10,15 @@ ; AddRec: {{((%m * %b * 8) + %A),+,(2 * %m * 8)}<%for.i>,+,(2 * 8)}<%for.j> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. -; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<%for.j>] +; CHECK: ArrayDecl[UnknownSize][%m][8] +; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<%for.j>][0] +; A[2i+b][2j][0] +; A[(2*i+b)*m*8+2*j*8+0] +; A[2*i*m*8+b*m*8+2*j*8+0] +; A[i*m*16+b*m*8+j*16] + define void @foo(i64 %n, i64 %m, i64 %b, double* %A) { entry: br label %for.i diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll @@ -10,8 +10,8 @@ ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][%m][%o][8] +; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>][0] define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll @@ -10,8 +10,8 @@ ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes. -; CHECK: ArrayRef[{3,+,1}<%for.cond4.preheader.lr.ph.us>][{-4,+,1}<%for.body6.lr.ph.us.us>][{7,+,1}<%for.body6.us.us>] +; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)][8] +; CHECK: ArrayRef[{3,+,1}<%for.cond4.preheader.lr.ph.us>][{-4,+,1}<%for.body6.lr.ph.us.us>][{7,+,1}<%for.body6.us.us>][0] define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll @@ -10,8 +10,8 @@ ; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][%m][%o][8] +; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>][0] define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll @@ -12,15 +12,15 @@ ; In Loop with Header: for.j ; AddRec: {{0,+,(%m * 8)}<%for.i>,+,8}<%for.j> ; Base offset: %A -; ArrayDecl[UnknownSize][%m] with elements of 8 bytes. -; ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] +; ArrayDecl[UnknownSize][%m][8] +; ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][0] ; Inst: store double %val, double* %arrayidx ; In Loop with Header: for.j ; AddRec: {{%A,+,(8 * %m)}<%for.i>,+,8}<%for.j> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. -; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] +; CHECK: ArrayDecl[UnknownSize][%m][8] +; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][0] define void @foo(i64 %n, i64 %m, double* %A) { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll @@ -10,8 +10,8 @@ ; AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][%m][%o][8] +; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>][0] define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll @@ -9,8 +9,8 @@ ; AddRec: {{{%A,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes. -; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)][8] +; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>][0] target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll b/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll --- a/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll +++ b/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll @@ -6,8 +6,8 @@ ; A[i * (*p) + j] += i + j; ; } ; -; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes. -; CHECK: ArrayRef[{0,+,1}<%bb2>][{0,+,1}<%bb4>] +; CHECK: ArrayDecl[UnknownSize][%pval][4] +; CHECK: ArrayRef[{0,+,1}<%bb2>][{0,+,1}<%bb4>][0] ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -983,6 +983,11 @@ return true; } +static bool isKnownLessOrEqual(ScalarEvolution &SE, const SCEV *LHS, + const SCEV *RHS) { + return SE.isKnownPredicate(ICmpInst::ICMP_SLE, LHS, RHS); +} + // We first store the resulting memory accesses in TempMemoryAccesses. Only // if the access functions for all memory accesses have been successfully // delinearized we continue. Otherwise, we either report a failure or, if @@ -1015,8 +1020,24 @@ } else { llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts, Shape->DelinearizedSizes); - if (Acc->DelinearizedSubscripts.size() == 0) + if (Acc->DelinearizedSubscripts.size() == 0) { IsNonAffine = true; + } else { + // TODO: Do not drop the last DelinearizedSubscripts. If non-zero it + // must be preseved for code-generation such that it represents the + // same pointer. + const SCEV *AccOffset = Acc->DelinearizedSubscripts.pop_back_val(); + const SCEV *ArrSize = Shape->DelinearizedSizes.back(); + const SCEV *AccSize = Context.ElementSize[BasePointer]; + if (!AccOffset->isZero() || + !isKnownLessOrEqual(SE, AccSize, ArrSize)) { + LLVM_DEBUG(dbgs() + << "Accessed element bytes [" << *AccOffset << ",+" + << *AccSize << ") possibly outside element [0," + << *ArrSize << ")\n"); + IsNonAffine = true; + } + } } for (const SCEV *S : Acc->DelinearizedSubscripts) if (!isAffine(S, Scope, Context)) diff --git a/polly/test/ScopDetect/array_elt_byte_offset.ll b/polly/test/ScopDetect/array_elt_byte_offset.ll new file mode 100644 --- /dev/null +++ b/polly/test/ScopDetect/array_elt_byte_offset.ll @@ -0,0 +1,78 @@ +; RUN: opt %loadPolly -polly-detect -disable-output -debug < %s 2>&1 | FileCheck %s + +; CHECK: Accessed element bytes [{{.+}},+4) possibly outside element [0,4) +; REQUIRES: asserts + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.CCGFace = type { %struct.CCGFace*, i8*, i16, i16, i16, i16 } +%struct.CCGVert = type { %struct.CCGVert*, i8*, i16, i16, i16, i16, %struct.CCGEdge**, %struct.CCGFace** } +%struct.CCGEdge = type { %struct.CCGEdge*, i8*, i16, i16, float, %struct.CCGVert*, %struct.CCGVert*, %struct.CCGFace** } + +define void @ccgSubSurf__calcSubdivLevel() { +entry: + %shl.i1795 = shl nuw i32 1, undef + %i = load i32, i32* undef, align 8 + %shl.i.i = shl nuw i32 1, undef + %add.i.i = add nuw nsw i32 %shl.i.i, 1 + %mul.i = mul nsw i32 %add.i.i, %add.i.i + %add.i1797 = add nsw i32 %mul.i, %add.i.i + %smax = call i32 @llvm.smax.i32(i32 %shl.i1795, i32 1) + %i1 = sext i32 %add.i1797 to i64 + %i2 = sext i32 %i to i64 + %i3 = load %struct.CCGFace*, %struct.CCGFace** undef, align 8 + %i4 = load i16, i16* undef, align 8 + %arrayidx.i.i.i.i = getelementptr inbounds %struct.CCGFace, %struct.CCGFace* %i3, i64 1 + %i5 = bitcast %struct.CCGFace* %arrayidx.i.i.i.i to %struct.CCGVert** + %idxprom.i.i.i = sext i16 %i4 to i64 + %arrayidx.i.i.i = getelementptr inbounds %struct.CCGVert*, %struct.CCGVert** %i5, i64 %idxprom.i.i.i + %arrayidx2.i.i = getelementptr inbounds %struct.CCGVert*, %struct.CCGVert** %arrayidx.i.i.i, i64 %idxprom.i.i.i + %i6 = bitcast %struct.CCGVert** %arrayidx2.i.i to i8* + %i7 = load i32, i32* undef, align 4 + %wide.trip.count.i.us.us.us = zext i32 %i7 to i64 + br label %for.cond10.preheader.us.us.us + +for.cond10.preheader.us.us.us: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us ] + %i8 = mul nsw i64 %indvars.iv, %i1 + %i9 = add nsw i64 %i8, 1 + %i10 = mul nsw i64 %i9, %i2 + %add.ptr.i.us.us.us = getelementptr inbounds i8, i8* %i6, i64 %i10 + br label %for.cond14.preheader.us.us.us.us.us + +for.cond14.preheader.us.us.us.us.us: + br label %for.body18.us.us.us.us.us.us + +for.body18.us.us.us.us.us.us: + %x.04761.us.us.us.us.us.us = phi i32 [ 0, %for.cond14.preheader.us.us.us.us.us ], [ %add25.us.us.us.us.us.us, %VertDataAvg4.exit.loopexit.us.us.us.us.us.us ] + %mul8.i.us.us.us.us.us.us = shl i32 undef, undef + %add9.i.us.us.us.us.us.us = add nsw i32 %mul8.i.us.us.us.us.us.us, %add.i.i + %mul10.i.us.us.us.us.us.us = mul nsw i32 %add9.i.us.us.us.us.us.us, %i + %idxprom.i.us.us.us.us.us.us = sext i32 %mul10.i.us.us.us.us.us.us to i64 + %arrayidx.i.us.us.us.us.us.us = getelementptr inbounds i8, i8* %add.ptr.i.us.us.us, i64 %idxprom.i.us.us.us.us.us.us + %i11 = bitcast i8* %arrayidx.i.us.us.us.us.us.us to float* + %add25.us.us.us.us.us.us = add nuw nsw i32 %x.04761.us.us.us.us.us.us, 1 + br label %for.body.i.us.us.us.us.us.us + +for.body.i.us.us.us.us.us.us: + %indvars.iv.i.us.us.us.us.us.us = phi i64 [ 0, %for.body18.us.us.us.us.us.us ], [ %indvars.iv.next.i.us.us.us.us.us.us, %for.body.i.us.us.us.us.us.us ] + %arrayidx.i1890.us.us.us.us.us.us = getelementptr inbounds float, float* %i11, i64 %indvars.iv.i.us.us.us.us.us.us + %i12 = load float, float* %arrayidx.i1890.us.us.us.us.us.us, align 4 + %indvars.iv.next.i.us.us.us.us.us.us = add nuw nsw i64 %indvars.iv.i.us.us.us.us.us.us, 1 + %exitcond.not.i.us.us.us.us.us.us = icmp eq i64 %indvars.iv.next.i.us.us.us.us.us.us, %wide.trip.count.i.us.us.us + br i1 %exitcond.not.i.us.us.us.us.us.us, label %VertDataAvg4.exit.loopexit.us.us.us.us.us.us, label %for.body.i.us.us.us.us.us.us + +VertDataAvg4.exit.loopexit.us.us.us.us.us.us: + %exitcond.not = icmp eq i32 %add25.us.us.us.us.us.us, %smax + br i1 %exitcond.not, label %for.cond14.for.inc35_crit_edge.split.us.us.us.us.us.us, label %for.body18.us.us.us.us.us.us + +for.cond14.for.inc35_crit_edge.split.us.us.us.us.us.us: + br i1 undef, label %for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us, label %for.cond14.preheader.us.us.us.us.us + +for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond10.preheader.us.us.us +} + +declare i32 @llvm.smax.i32(i32, i32) +