diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -600,9 +600,17 @@ return getConstant(Ty, -1, /*isSigned=*/true); } - /// Return an expression for sizeof AllocTy that is type IntTy + /// Return an expression for sizeof ScalableTy that is type IntTy, where + /// ScalableTy is a scalable vector type. + const SCEV *getSizeOfScalableVectorExpr(Type *IntTy, + ScalableVectorType *ScalableTy); + + /// Return an expression for the alloc size of AllocTy that is type IntTy const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); + /// Return an expression for the store size of StoreTy that is type IntTy + const SCEV *getStoreSizeOfExpr(Type *IntTy, Type *StoreTy); + /// Return an expression for offsetof on the given field with type IntTy const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -224,9 +224,9 @@ } // Add the size of the pointed element to ScEnd. auto &DL = Lp->getHeader()->getModule()->getDataLayout(); - unsigned EltSize = - DL.getTypeStoreSizeInBits(Ptr->getType()->getPointerElementType()) / 8; - const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize); + Type *IdxTy = DL.getIndexType(Ptr->getType()); + const SCEV *EltSizeSCEV = + SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType()); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3660,28 +3660,42 @@ return getMinMaxExpr(scUMinExpr, Ops); } +const SCEV * +ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy, + ScalableVectorType *ScalableTy) { + Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo()); + Constant *One = ConstantInt::get(IntTy, 1); + Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One); + // Note that the expression we created is the final expression, we don't + // want to simplify it any further Also, if we call a normal getSCEV(), + // we'll end up in an endless recursion. So just create an SCEVUnknown. + return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy)); +} + const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { - if (isa(AllocTy)) { - Constant *NullPtr = Constant::getNullValue(AllocTy->getPointerTo()); - Constant *One = ConstantInt::get(IntTy, 1); - Constant *GEP = ConstantExpr::getGetElementPtr(AllocTy, NullPtr, One); - // Note that the expression we created is the final expression, we don't - // want to simplify it any further Also, if we call a normal getSCEV(), - // we'll end up in an endless recursion. So just create an SCEVUnknown. - return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy)); - } - // We can bypass creating a target-independent - // constant expression and then folding it back into a ConstantInt. - // This is just a compile-time optimization. + if (auto *ScalableAllocTy = dyn_cast(AllocTy)) + return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy); + // We can bypass creating a target-independent constant expression and then + // folding it back into a ConstantInt. This is just a compile-time + // optimization. return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); } +const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) { + if (auto *ScalableStoreTy = dyn_cast(StoreTy)) + return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy); + // We can bypass creating a target-independent constant expression and then + // folding it back into a ConstantInt. This is just a compile-time + // optimization. + return getConstant(IntTy, getDataLayout().getTypeStoreSize(StoreTy)); +} + const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo) { - // We can bypass creating a target-independent - // constant expression and then folding it back into a ConstantInt. - // This is just a compile-time optimization. + // We can bypass creating a target-independent constant expression and then + // folding it back into a ConstantInt. This is just a compile-time + // optimization. return getConstant( IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-store-vs-alloc-size.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-store-vs-alloc-size.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-store-vs-alloc-size.ll @@ -0,0 +1,39 @@ +; RUN: opt -analyze --loop-accesses %s -enable-new-pm=0 | FileCheck %s +; RUN: opt -passes=print-access-info %s -disable-output 2>&1 | FileCheck %s + +; This test defends against accidentally using alloc size instead of store size when performing run-time +; boundary check of memory accesses. The IR in this file is based on +; llvm/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll. +; Here, we use i19 instead of i64 because it has a different alloc size to its store size. + +;CHECK: function 'fastCopy': +;CHECK: (Low: %op High: (27 + %op)) +;CHECK: (Low: %src High: (27 + %src)) + +define void @fastCopy(i8* nocapture readonly %src, i8* nocapture %op) { +entry: + br label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %len.addr.07 = phi i32 [ %sub, %while.body ], [ 32, %while.body.preheader ] + %op.addr.06 = phi i8* [ %add.ptr1, %while.body ], [ %op, %while.body.preheader ] + %src.addr.05 = phi i8* [ %add.ptr, %while.body ], [ %src, %while.body.preheader ] + %0 = bitcast i8* %src.addr.05 to i19* + %1 = load i19, i19* %0, align 8 + %2 = bitcast i8* %op.addr.06 to i19* + store i19 %1, i19* %2, align 8 + %add.ptr = getelementptr inbounds i8, i8* %src.addr.05, i19 8 + %add.ptr1 = getelementptr inbounds i8, i8* %op.addr.06, i19 8 + %sub = add nsw i32 %len.addr.07, -8 + %cmp = icmp sgt i32 %len.addr.07, 8 + br i1 %cmp, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + ret void +} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/runtime-pointer-checking-insert-typesize.ll b/llvm/test/Analysis/LoopAccessAnalysis/runtime-pointer-checking-insert-typesize.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/runtime-pointer-checking-insert-typesize.ll @@ -0,0 +1,27 @@ +; RUN: opt -loop-accesses -analyze < %s >/dev/null 2>%t +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; This regression test is defending against a TypeSize warning 'assumption that +; TypeSize is not scalable'. This warning cropped up in +; RuntimePointerChecking::insert when performing loop load elimination because +; this function was previously unaware of scalable types. + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning: {{.*}}TypeSize is not scalable + +define void @runtime_pointer_checking_insert_typesize(* %a, + * %b) { +entry: + br label %loop.body +loop.body: + %0 = phi i64 [ 0, %entry ], [%1, %loop.body] + %idx_a = getelementptr , * %a, i64 %0 + %idx_b = getelementptr , * %b, i64 %0 + %tmp = load , * %idx_a + store %tmp, * %idx_b + %1 = add i64 %0, 2 + %2 = icmp eq i64 %1, 1024 + br i1 %2, label %loop.end, label %loop.body +loop.end: + ret void +}