Index: polly/trunk/include/polly/CodeGen/IslExprBuilder.h =================================================================== --- polly/trunk/include/polly/CodeGen/IslExprBuilder.h +++ polly/trunk/include/polly/CodeGen/IslExprBuilder.h @@ -74,14 +74,12 @@ /// in the wild. Signed computations are needed, as loop bounds may become /// negative. /// -/// FIXME: Hardcoding sizes can cause issues: +/// It is possible to track overflows that occured in the generated IR. See the +/// description of @see OverflowState for more information. /// -/// a) Certain run-time checks that we may want to generate can involve the -/// size of the data types the computation is performed on. When code -/// generating these run-time checks to isl_ast_expr[essions], the -/// resulting computation may require more than 64 bit. +/// FIXME: Hardcoding sizes can cause issues: /// -/// b) On embedded systems and especially for high-level-synthesis 64 bit +/// - On embedded systems and especially for high-level-synthesis 64 bit /// computations are very costly. /// /// The right approach is to compute the minimal necessary bitwidth and @@ -89,11 +87,6 @@ /// to use this information in our IslAstGenerator. Preliminary patches are /// available, but have not been committed yet. /// -/// 2) We always flag computations with 'nsw' -/// -/// As isl_ast_expr[essions] assume arbitrary precision, no wrapping should -/// ever occur in the generated LLVM-IR (assuming the data type chosen is large -/// enough). class IslExprBuilder { public: /// @brief A map from isl_ids to llvm::Values. @@ -112,9 +105,7 @@ IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue, ValueMapT &GlobalMap, const llvm::DataLayout &DL, llvm::ScalarEvolution &SE, llvm::DominatorTree &DT, - llvm::LoopInfo &LI) - : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), - DL(DL), SE(SE), DT(DT), LI(LI) {} + llvm::LoopInfo &LI); /// @brief Create LLVM-IR for an isl_ast_expr[ession]. /// @@ -140,9 +131,37 @@ /// @return The type with which the expression should be computed. llvm::IntegerType *getType(__isl_keep isl_ast_expr *Expr); + /// @brief Change if runtime overflows are tracked or not. + /// + /// @param Enable Flag to enable/disable the tracking. + /// + /// Note that this will reset the tracking state and that tracking is only + /// allowed if the last tracked expression dominates the current insert point. + void setTrackOverflow(bool Enable); + + /// @brief Return the current overflow status or nullptr if it is not tracked. + /// + /// @return A nullptr if tracking is disabled or otherwise an i1 that has the + /// value of "0" if and only if no overflow happened since tracking + /// was enabled. + llvm::Value *getOverflowState() const; + private: Scop &S; + /// @brief Flag that will be set if an overflow occurred at runtime. + /// + /// Note that this flag is by default a nullptr and if it is a nullptr + /// we will not record overflows but simply perform the computations. + /// The intended usage is as follows: + /// - If overflows in [an] expression[s] should be tracked, call + /// the setTrackOverflow(true) function. + /// - Use create(...) for all expressions that should be checked. + /// - Call getOverflowState() to get the value representing the current + /// state of the overflow flag. + /// - To stop tracking call setTrackOverflow(false). + llvm::Value *OverflowState; + PollyIRBuilder &Builder; IDToValueTy &IDToValue; ValueMapT &GlobalMap; @@ -165,6 +184,48 @@ llvm::Value *createInt(__isl_take isl_ast_expr *Expr); llvm::Value *createOpAddressOf(__isl_take isl_ast_expr *Expr); llvm::Value *createAccessAddress(__isl_take isl_ast_expr *Expr); + + /// @brief Create a binary operation @p Opc and track overflows if requested. + /// + /// @param OpC The binary operation that should be performed [Add/Sub/Mul]. + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the binary operation. + llvm::Value *createBinOp(llvm::BinaryOperator::BinaryOps Opc, + llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name); + + /// @brief Create an addition and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the addition. + llvm::Value *createAdd(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); + + /// @brief Create a subtraction and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the subtraction. + llvm::Value *createSub(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); + + /// @brief Create a multiplication and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the multiplication. + llvm::Value *createMul(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); }; } Index: polly/trunk/lib/CodeGen/CodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/CodeGeneration.cpp +++ polly/trunk/lib/CodeGen/CodeGeneration.cpp @@ -148,6 +148,7 @@ PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S); + IslExprBuilder &ExprBuilder = NodeBuilder.getExprBuilder(); // Only build the run-time condition and parameters _after_ having // introduced the conditional branch. This is important as the conditional @@ -191,7 +192,13 @@ NodeBuilder.addParameters(S.getContext()); - Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); + ExprBuilder.setTrackOverflow(true); + Value *RTC = buildRTC(Builder, ExprBuilder); + Value *OverflowHappened = Builder.CreateNot( + ExprBuilder.getOverflowState(), "polly.rtc.overflown"); + RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); + ExprBuilder.setTrackOverflow(false); + Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); Builder.SetInsertPoint(&StartBlock->front()); Index: polly/trunk/lib/CodeGen/IslExprBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/IslExprBuilder.cpp +++ polly/trunk/lib/CodeGen/IslExprBuilder.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "polly/CodeGen/IslExprBuilder.h" +#include "polly/Options.h" #include "polly/ScopInfo.h" #include "polly/Support/GICHelper.h" #include "polly/Support/ScopHelper.h" @@ -19,6 +20,123 @@ using namespace llvm; using namespace polly; +/// @brief Different overflow tracking modes. +enum OverflowTrackingChoice { + OT_NEVER, ///< Never tack potential overflows. + OT_REQUEST, ///< Track potential overflows if requested. + OT_ALWAYS ///< Always track potential overflows. +}; + +static cl::opt OTMode( + "polly-overflow-tracking", + cl::desc("Define where potential integer overflows in generated " + "expressions should be tracked."), + cl::values(clEnumValN(OT_NEVER, "never", "Never track the overflow bit."), + clEnumValN(OT_REQUEST, "request", + "Track the overflow bit if requested."), + clEnumValN(OT_ALWAYS, "always", + "Always track the overflow bit."), + clEnumValEnd), + cl::Hidden, cl::init(OT_REQUEST), cl::ZeroOrMore, cl::cat(PollyCategory)); + +IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder, + IDToValueTy &IDToValue, ValueMapT &GlobalMap, + const DataLayout &DL, ScalarEvolution &SE, + DominatorTree &DT, LoopInfo &LI) + : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), + DL(DL), SE(SE), DT(DT), LI(LI) { + OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr; +} + +void IslExprBuilder::setTrackOverflow(bool Enable) { + // If potential overflows are tracked always or never we ignore requests + // to change the behaviour. + if (OTMode != OT_REQUEST) + return; + + if (Enable) { + // If tracking should be enabled initialize the OverflowState. + OverflowState = Builder.getFalse(); + } else { + // If tracking should be disabled just unset the OverflowState. + OverflowState = nullptr; + } +} + +Value *IslExprBuilder::getOverflowState() const { + // If the overflow tracking was requested but it is disabled we avoid the + // additional nullptr checks at the call sides but instead provide a + // meaningful result. + if (OTMode == OT_NEVER) + return Builder.getFalse(); + return OverflowState; +} + +Value *IslExprBuilder::createBinOp(BinaryOperator::BinaryOps Opc, Value *LHS, + Value *RHS, const Twine &Name) { + // Handle the plain operation (without overflow tracking) first. + if (!OverflowState) { + switch (Opc) { + case Instruction::Add: + return Builder.CreateNSWAdd(LHS, RHS, Name); + case Instruction::Sub: + return Builder.CreateNSWSub(LHS, RHS, Name); + case Instruction::Mul: + return Builder.CreateNSWMul(LHS, RHS, Name); + default: + llvm_unreachable("Unknown binary operator!"); + } + } + + Function *F = nullptr; + Module *M = Builder.GetInsertBlock()->getModule(); + switch (Opc) { + case Instruction::Add: + F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, + {LHS->getType()}); + break; + case Instruction::Sub: + F = Intrinsic::getDeclaration(M, Intrinsic::ssub_with_overflow, + {LHS->getType()}); + break; + case Instruction::Mul: + F = Intrinsic::getDeclaration(M, Intrinsic::smul_with_overflow, + {LHS->getType()}); + break; + default: + llvm_unreachable("No overflow intrinsic for binary operator found!"); + } + + auto *ResultStruct = Builder.CreateCall(F, {LHS, RHS}, Name); + assert(ResultStruct->getType()->isStructTy()); + + auto *OverflowFlag = + Builder.CreateExtractValue(ResultStruct, 1, Name + ".obit"); + + // If all overflows are tracked we do not combine the results as this could + // cause dominance problems. Instead we will always keep the last overflow + // flag as current state. + if (OTMode == OT_ALWAYS) + OverflowState = OverflowFlag; + else + OverflowState = + Builder.CreateOr(OverflowState, OverflowFlag, "polly.overflow.state"); + + return Builder.CreateExtractValue(ResultStruct, 0, Name + ".res"); +} + +Value *IslExprBuilder::createAdd(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Add, LHS, RHS, Name); +} + +Value *IslExprBuilder::createSub(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Sub, LHS, RHS, Name); +} + +Value *IslExprBuilder::createMul(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Mul, LHS, RHS, Name); +} + Type *IslExprBuilder::getWidestType(Type *T1, Type *T2) { assert(isa(T1) && isa(T2)); @@ -142,8 +260,7 @@ if (Ty != IndexOp->getType()) IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); - IndexOp = - Builder.CreateAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); + IndexOp = createAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); } // For every but the last dimension multiply the size, for the last @@ -167,8 +284,7 @@ if (Ty != DimSize->getType()) DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, "polly.access.sext." + BaseName); - IndexOp = - Builder.CreateMul(IndexOp, DimSize, "polly.access.mul." + BaseName); + IndexOp = createMul(IndexOp, DimSize, "polly.access.mul." + BaseName); } Access = Builder.CreateGEP(Base, IndexOp, "polly.access." + BaseName); @@ -237,13 +353,13 @@ default: llvm_unreachable("This is no binary isl ast expression"); case isl_ast_op_add: - Res = Builder.CreateNSWAdd(LHS, RHS); + Res = createAdd(LHS, RHS); break; case isl_ast_op_sub: - Res = Builder.CreateNSWSub(LHS, RHS); + Res = createSub(LHS, RHS); break; case isl_ast_op_mul: - Res = Builder.CreateNSWMul(LHS, RHS); + Res = createMul(LHS, RHS); break; case isl_ast_op_div: Res = Builder.CreateSDiv(LHS, RHS, "pexp.div", true); @@ -265,8 +381,8 @@ // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d Value *One = ConstantInt::get(MaxType, 1); Value *Zero = ConstantInt::get(MaxType, 0); - Value *Sum1 = Builder.CreateSub(LHS, RHS, "pexp.fdiv_q.0"); - Value *Sum2 = Builder.CreateAdd(Sum1, One, "pexp.fdiv_q.1"); + Value *Sum1 = createSub(LHS, RHS, "pexp.fdiv_q.0"); + Value *Sum2 = createAdd(Sum1, One, "pexp.fdiv_q.1"); Value *isNegative = Builder.CreateICmpSLT(LHS, Zero, "pexp.fdiv_q.2"); Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS, "pexp.fdiv_q.3"); Index: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp +++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp @@ -1001,7 +1001,13 @@ isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); Domain = nullptr; + ExprBuilder.setTrackOverflow(true); Value *Cond = ExprBuilder.create(DomainCond); + Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), + "polly.preload.cond.overflown"); + Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); + ExprBuilder.setTrackOverflow(false); + if (!Cond->getType()->isIntegerTy(1)) Cond = Builder.CreateIsNotNull(Cond); Index: polly/trunk/test/Isl/CodeGen/OpenMP/new_multidim_access.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/OpenMP/new_multidim_access.ll +++ polly/trunk/test/Isl/CodeGen/OpenMP/new_multidim_access.ll @@ -19,15 +19,15 @@ ; CHECK: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 2i1] }; ; CHECK: new: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 43 + i1] }; -; IR: %polly.access.mul.polly.subfunc.arg.A = mul i64 %polly.indvar, %polly.subfunc.arg.m +; IR: %polly.access.mul.polly.subfunc.arg.A = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m ; IR: %6 = add nsw i64 %polly.indvar5, 13 -; IR: %polly.access.add.polly.subfunc.arg.A = add i64 %polly.access.mul.polly.subfunc.arg.A, %6 +; IR: %polly.access.add.polly.subfunc.arg.A = add nsw i64 %polly.access.mul.polly.subfunc.arg.A, %6 ; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A ; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3 -; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul i64 %polly.indvar, %polly.subfunc.arg.m +; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m ; IR: %7 = add nsw i64 %polly.indvar5, 43 -; IR: %polly.access.add.polly.subfunc.arg.A9 = add i64 %polly.access.mul.polly.subfunc.arg.A8, %7 +; IR: %polly.access.add.polly.subfunc.arg.A9 = add nsw i64 %polly.access.mul.polly.subfunc.arg.A8, %7 ; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9 ; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/Isl/CodeGen/aliasing_multidimensional_access.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/aliasing_multidimensional_access.ll +++ polly/trunk/test/Isl/CodeGen/aliasing_multidimensional_access.ll @@ -1,12 +1,28 @@ ; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s ; -; Check that we calculate the maximal access into array A correctly. +; Check that we calculate the maximal access into array A correctly and track the overflow state. ; -; CHECK: %[[TMP0:[._0-9a-zA-Z]*]] = mul i64 99, %m -; CHECK: %[[TMP1:[._0-9a-zA-Z]*]] = add i64 %[[TMP0]], 149 -; CHECK: %[[TMP2:[._0-9a-zA-Z]*]] = mul i64 %[[TMP1]], %p -; CHECK: %[[TMP3:[._0-9a-zA-Z]*]] = add i64 %[[TMP2]], 150 -; CHECK: %polly.access.A{{[0-9]*}} = getelementptr double, double* %A, i64 %[[TMP3]] +; CHECK: %[[TMP0:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 99, i64 %m) +; CHECK: %[[TMP0O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP0]], 1 +; CHECK: %[[OS0:[._0-9a-zA-Z]*]] = or i1 {{[^,]*}}, %[[TMP0O]] +; CHECK: %[[TMP0R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP0]], 0 +; CHECK: %[[TMP1:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[TMP0R]], i64 149) +; CHECK: %[[TMP1O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP1]], 1 +; CHECK: %[[OS1:[._0-9a-zA-Z]*]] = or i1 %[[OS0]], %[[TMP1O]] +; CHECK: %[[TMP1R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP1]], 0 +; CHECK: %[[TMP2:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %[[TMP1R]], i64 %p) +; CHECK: %[[TMP2O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP2]], 1 +; CHECK: %[[OS2:[._0-9a-zA-Z]*]] = or i1 %[[OS1]], %[[TMP2O]] +; CHECK: %[[TMP2R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP2]], 0 +; CHECK: %[[TMP3:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[TMP2R]], i64 150) +; CHECK: %[[TMP3O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP3]], 1 +; CHECK: %[[OS3:[._0-9a-zA-Z]*]] = or i1 %[[OS2]], %[[TMP3O]] +; CHECK: %[[TMP3R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP3]], 0 +; CHECK: %polly.access.A{{[0-9]*}} = getelementptr double, double* %A, i64 %[[TMP3R]] +; +; CHECK: %polly.rtc.overflown = xor i1 %[[OS3]], true +; CHECK: %polly.rtc.result = and i1 %{{[^,]*}}, %polly.rtc.overflown +; CHECK: br i1 %polly.rtc.result, ; ; void foo(long n, long m, long p, double A[n][m][p], int *B) { ; for (long i = 0; i < 100; i++) Index: polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_1.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_1.ll +++ polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_1.ll @@ -6,8 +6,11 @@ ; } ; ; CHECK: %[[Cext:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK: %[[Cp1:[._a-zA-Z0-9]*]] = add nsw i64 %[[Cext]], 1 -; CHECK: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[Cp1]] +; CHECK: %[[Cp1:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[Cext]], i64 1) +; CHECK: %[[Cp1O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[Cp1]], 1 +; CHECK: %[[OS:[._a-zA-Z0-9]*]] = or i1 false, %[[Cp1O]] +; CHECK: %[[Cp1R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[Cp1]], 0 +; CHECK: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[Cp1R]] ; CHECK: %[[AMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 0 ; CHECK: %[[BMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMax]] to i64 ; CHECK: %[[AMinI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[AMin]] to i64 @@ -19,7 +22,9 @@ ; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i64 %[[AMaxI]], %[[BMinI]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] ; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 true, %[[NoAlias]] -; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond +; CHECK: %[[NOV:[._a-zA-Z0-9]*]] = xor i1 %[[OS]], true +; CHECK: %[[RTCV:[._a-zA-Z0-9]*]] = and i1 %[[RTC]], %[[NOV]] +; CHECK: br i1 %[[RTCV]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_2.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_2.ll +++ polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_2.ll @@ -7,9 +7,12 @@ ; ; CHECK: %[[Ctx:[._a-zA-Z0-9]*]] = and i1 true ; CHECK-NEXT: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK-NEXT: %[[M3:[._a-zA-Z0-9]*]] = sub nsw i64 %[[M0]], 9 -; CHECK-NEXT: %[[M1:[._a-zA-Z0-9]*]] = icmp sgt i64 6, %[[M3]] -; CHECK-NEXT: %[[M4:[._a-zA-Z0-9]*]] = select i1 %[[M1]], i64 6, i64 %[[M3]] +; CHECK-NEXT: %[[M3:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %[[M0]], i64 9) +; CHECK-NEXT: %[[M3O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[M3]], 1 +; CHECK-NEXT: %[[OS0:[._a-zA-Z0-9]*]] = or i1 false, %[[M3O]] +; CHECK-NEXT: %[[M3R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[M3]], 0 +; CHECK-NEXT: %[[M1:[._a-zA-Z0-9]*]] = icmp sgt i64 6, %[[M3R]] +; CHECK-NEXT: %[[M4:[._a-zA-Z0-9]*]] = select i1 %[[M1]], i64 6, i64 %[[M3R]] ; CHECK-NEXT: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[M4]] ; CHECK-NEXT: %[[AMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 0 ; CHECK-NEXT: %[[BMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMax]] to i64 @@ -17,16 +20,21 @@ ; CHECK-NEXT: %[[BltA:[._a-zA-Z0-9]*]] = icmp ule i64 %[[BMaxI]], %[[AMinI]] ; CHECK-NEXT: %[[AMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 1024 ; CHECK-NEXT: %[[m0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK-NEXT: %[[m3:[._a-zA-Z0-9]*]] = sub nsw i64 %[[m0]], 10 -; CHECK-NEXT: %[[m1:[._a-zA-Z0-9]*]] = icmp slt i64 5, %[[m3]] -; CHECK-NEXT: %[[m4:[._a-zA-Z0-9]*]] = select i1 %[[m1]], i64 5, i64 %[[m3]] +; CHECK-NEXT: %[[m3:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %[[m0]], i64 10) +; CHECK-NEXT: %[[m3O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[m3]], 1 +; CHECK-NEXT: %[[OS1:[._a-zA-Z0-9]*]] = or i1 %[[OS0]], %[[m3O]] +; CHECK-NEXT: %[[m3R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[m3]], 0 +; CHECK-NEXT: %[[m1:[._a-zA-Z0-9]*]] = icmp slt i64 5, %[[m3R]] +; CHECK-NEXT: %[[m4:[._a-zA-Z0-9]*]] = select i1 %[[m1]], i64 5, i64 %[[m3R]] ; CHECK-NEXT: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]] ; CHECK-NEXT: %[[AMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[AMax]] to i64 ; CHECK-NEXT: %[[BMinI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMin]] to i64 ; CHECK-NEXT: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i64 %[[AMaxI]], %[[BMinI]] ; CHECK-NEXT: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] ; CHECK-NEXT: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %[[Ctx]], %[[NoAlias]] -; CHECK-NEXT: br i1 %[[RTC]], label %polly.start, label %for.cond +; CHECK-NEXT: %[[NOV:[._a-zA-Z0-9]*]] = xor i1 %[[OS1]], true +; CHECK-NEXT: %[[RTCV:[._a-zA-Z0-9]*]] = and i1 %[[RTC]], %[[NOV]] +; CHECK-NEXT: br i1 %[[RTCV]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/Isl/CodeGen/exprModDiv.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/exprModDiv.ll +++ polly/trunk/test/Isl/CodeGen/exprModDiv.ll @@ -18,7 +18,7 @@ ; A[i % 127] ; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127 -; CHECK: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r +; CHECK: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r ; A[floor(i / 127)] ; @@ -28,41 +28,41 @@ ; each value of i to indeed be mapped to a value. ; ; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127 -; CHECK: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q +; CHECK: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 127 * floord(-p - 1, 127) + 127] -; CHECK: %pexp.fdiv_q.0 = sub i64 %p, 127 -; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1 +; CHECK: %pexp.fdiv_q.0 = sub nsw i64 %p, 127 +; CHECK: %pexp.fdiv_q.1 = add nsw i64 %pexp.fdiv_q.0, 1 ; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %p, 0 ; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %p ; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127 ; CHECK: %[[r1:[0-9]*]] = mul nsw i64 127, %pexp.fdiv_q.4 ; CHECK: %[[r2:[0-9]*]] = sub nsw i64 %p, %[[r1]] -; CHECK: %polly.access.A8 = getelementptr float, float* %A, i64 %[[r2]] +; CHECK: %polly.access.A11 = getelementptr float, float* %A, i64 %[[r2]] ; A[p / 127] ; CHECK: %pexp.div = sdiv exact i64 %p, 127 -; CHECK: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div +; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div ; A[i % 128] ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 -; POW2: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r +; POW2: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r ; A[floor(i / 128)] ; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128 -; POW2: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q +; POW2: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 128 * floord(-p - 1, 128) + 128] ; POW2: %polly.fdiv_q.shr = ashr i64 %p, 7 ; POW2: %[[r1:[0-9]*]] = mul nsw i64 128, %polly.fdiv_q.shr ; POW2: %[[r2:[0-9]*]] = sub nsw i64 %p, %[[r1]] -; POW2: %polly.access.A8 = getelementptr float, float* %A, i64 %[[r2]] +; POW2: %polly.access.A11 = getelementptr float, float* %A, i64 %[[r2]] ; A[p / 128] ; POW2: %pexp.div = sdiv exact i64 %p, 128 -; POW2: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div +; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll +++ polly/trunk/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll @@ -1,5 +1,6 @@ ; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s ; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s --check-prefix=IR +; RUN: opt %loadPolly -S -polly-codegen --polly-overflow-tracking=always < %s | FileCheck %s --check-prefix=IRA ; ; As (p + q) can overflow we have to check that we load from ; I[p + q] only if it does not. @@ -20,23 +21,61 @@ ; IR-NEXT: %13 = icmp sge i64 %12, 1 ; IR-NEXT: %14 = sext i32 %q to i64 ; IR-NEXT: %15 = sext i32 %p to i64 -; IR-NEXT: %16 = add nsw i64 %15, %14 -; IR-NEXT: %17 = icmp sle i64 %16, 2147483647 +; IR-NEXT: %16 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %15, i64 %14) +; IR-NEXT: %.obit4 = extractvalue { i64, i1 } %16, 1 +; IR-NEXT: %polly.overflow.state5 = or i1 false, %.obit4 +; IR-NEXT: %.res6 = extractvalue { i64, i1 } %16, 0 +; IR-NEXT: %17 = icmp sle i64 %.res6, 2147483647 ; IR-NEXT: %18 = and i1 %13, %17 ; IR-NEXT: %19 = sext i32 %q to i64 ; IR-NEXT: %20 = sext i32 %p to i64 -; IR-NEXT: %21 = add nsw i64 %20, %19 -; IR-NEXT: %22 = icmp sge i64 %21, -2147483648 +; IR-NEXT: %21 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %20, i64 %19) +; IR-NEXT: %.obit7 = extractvalue { i64, i1 } %21, 1 +; IR-NEXT: %polly.overflow.state8 = or i1 %polly.overflow.state5, %.obit7 +; IR-NEXT: %.res9 = extractvalue { i64, i1 } %21, 0 +; IR-NEXT: %22 = icmp sge i64 %.res9, -2147483648 ; IR-NEXT: %23 = and i1 %18, %22 -; IR-NEXT: br label %polly.preload.cond1 +; IR-NEXT: %polly.preload.cond.overflown10 = xor i1 %polly.overflow.state8, true +; IR-NEXT: %polly.preload.cond.result11 = and i1 %23, %polly.preload.cond.overflown10 +; IR-NEXT: br label %polly.preload.cond12 ; -; IR: polly.preload.cond1: -; IR-NEXT: br i1 %23 +; IR: polly.preload.cond12: +; IR-NEXT: br i1 %polly.preload.cond.result11 ; -; IR: polly.preload.exec3: +; IR: polly.preload.exec14: ; IR-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0 ; IR-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4 ; +; IRA: polly.preload.merge: +; IRA-NEXT: %polly.preload.tmp1.merge = phi i32* [ %polly.access.I.load, %polly.preload.exec ], [ null, %polly.preload.cond ] +; IRA-NEXT: store i32* %polly.preload.tmp1.merge, i32** %tmp1.preload.s2a +; IRA-NEXT: %12 = sext i32 %N to i64 +; IRA-NEXT: %13 = icmp sge i64 %12, 1 +; IRA-NEXT: %14 = sext i32 %q to i64 +; IRA-NEXT: %15 = sext i32 %p to i64 +; IRA-NEXT: %16 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %15, i64 %14) +; IRA-NEXT: %.obit5 = extractvalue { i64, i1 } %16, 1 +; IRA-NEXT: %.res6 = extractvalue { i64, i1 } %16, 0 +; IRA-NEXT: %17 = icmp sle i64 %.res6, 2147483647 +; IRA-NEXT: %18 = and i1 %13, %17 +; IRA-NEXT: %19 = sext i32 %q to i64 +; IRA-NEXT: %20 = sext i32 %p to i64 +; IRA-NEXT: %21 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %20, i64 %19) +; IRA-NEXT: %.obit7 = extractvalue { i64, i1 } %21, 1 +; IRA-NEXT: %.res8 = extractvalue { i64, i1 } %21, 0 +; IRA-NEXT: %22 = icmp sge i64 %.res8, -2147483648 +; IRA-NEXT: %23 = and i1 %18, %22 +; IRA-NEXT: %polly.preload.cond.overflown9 = xor i1 %.obit7, true +; IRA-NEXT: %polly.preload.cond.result10 = and i1 %23, %polly.preload.cond.overflown9 +; IRA-NEXT: br label %polly.preload.cond11 +; +; IRA: polly.preload.cond11: +; IRA-NEXT: br i1 %polly.preload.cond.result10 +; +; IRA: polly.preload.exec13: +; IRA-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0 +; IRA-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4 +; ; void f(int **I, int *A, int N, int p, int q) { ; for (int i = 0; i < N; i++) ; A[i] = *(I[p + q]); Index: polly/trunk/test/Isl/CodeGen/no-overflow-tracking.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/no-overflow-tracking.ll +++ polly/trunk/test/Isl/CodeGen/no-overflow-tracking.ll @@ -0,0 +1,73 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s +; RUN: opt %loadPolly -S -polly-codegen -polly-overflow-tracking=never < %s | FileCheck %s --check-prefix=IR +; +; As (p + q) can overflow we have to check that we load from +; I[p + q] only if it does not. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, p, q] -> { Stmt_for_body[i0] -> MemRef_I[p + q] }; +; CHECK-NEXT: Execution Context: [N, p, q] -> { : N > 0 and -2147483648 - p <= q <= 2147483647 - p } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, p, q] -> { Stmt_for_body[i0] -> MemRef_tmp1[0] }; +; CHECK-NEXT: Execution Context: [N, p, q] -> { : N > 0 } +; CHECK-NEXT: } +; +; IR: polly.preload.merge: +; IR-NEXT: %polly.preload.tmp1.merge = phi i32* [ %polly.access.I.load, %polly.preload.exec ], [ null, %polly.preload.cond ] +; IR-NEXT: store i32* %polly.preload.tmp1.merge, i32** %tmp1.preload.s2a +; IR-NEXT: %12 = sext i32 %N to i64 +; IR-NEXT: %13 = icmp sge i64 %12, 1 +; IR-NEXT: %14 = sext i32 %q to i64 +; IR-NEXT: %15 = sext i32 %p to i64 +; IR-NEXT: %16 = add nsw i64 %15, %14 +; IR-NEXT: %17 = icmp sle i64 %16, 2147483647 +; IR-NEXT: %18 = and i1 %13, %17 +; IR-NEXT: %19 = sext i32 %q to i64 +; IR-NEXT: %20 = sext i32 %p to i64 +; IR-NEXT: %21 = add nsw i64 %20, %19 +; IR-NEXT: %22 = icmp sge i64 %21, -2147483648 +; IR-NEXT: %23 = and i1 %18, %22 +; IR-NEXT: br label %polly.preload.cond1 +; +; IR: polly.preload.cond1: +; IR-NEXT: br i1 %23 +; +; IR: polly.preload.exec3: +; IR-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0 +; IR-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4 +; +; void f(int **I, int *A, int N, int p, int q) { +; for (int i = 0; i < N; i++) +; A[i] = *(I[p + q]); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32** %I, i32* %A, i32 %N, i32 %p, i32 %q) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add = add i32 %p, %q + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32*, i32** %I, i64 %idxprom + %tmp1 = load i32*, i32** %arrayidx, align 8 + %tmp2 = load i32, i32* %tmp1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp2, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}