Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -74,14 +74,12 @@ /// in the wild. Signed computations are needed, as loop bounds may become /// negative. /// -/// FIXME: Hardcoding sizes can cause issues: +/// It is possible to track overflows that occured in the generated IR. See the +/// description of @see OverflowState for more information. /// -/// a) Certain run-time checks that we may want to generate can involve the -/// size of the data types the computation is performed on. When code -/// generating these run-time checks to isl_ast_expr[essions], the -/// resulting computation may require more than 64 bit. +/// FIXME: Hardcoding sizes can cause issues: /// -/// b) On embedded systems and especially for high-level-synthesis 64 bit +/// - On embedded systems and especially for high-level-synthesis 64 bit /// computations are very costly. /// /// The right approach is to compute the minimal necessary bitwidth and @@ -89,11 +87,6 @@ /// to use this information in our IslAstGenerator. Preliminary patches are /// available, but have not been committed yet. /// -/// 2) We always flag computations with 'nsw' -/// -/// As isl_ast_expr[essions] assume arbitrary precision, no wrapping should -/// ever occur in the generated LLVM-IR (assuming the data type chosen is large -/// enough). class IslExprBuilder { public: /// @brief A map from isl_ids to llvm::Values. @@ -113,8 +106,8 @@ ValueMapT &GlobalMap, const llvm::DataLayout &DL, llvm::ScalarEvolution &SE, llvm::DominatorTree &DT, llvm::LoopInfo &LI) - : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), - DL(DL), SE(SE), DT(DT), LI(LI) {} + : S(S), OverflowState(nullptr), Builder(Builder), IDToValue(IDToValue), + GlobalMap(GlobalMap), DL(DL), SE(SE), DT(DT), LI(LI) {} /// @brief Create LLVM-IR for an isl_ast_expr[ession]. /// @@ -140,9 +133,37 @@ /// @return The type with which the expression should be computed. llvm::IntegerType *getType(__isl_keep isl_ast_expr *Expr); + /// @brief Change if runtime overflows are tracked or not. + /// + /// @param Enable Flag to enable/disable the tacking. + /// + /// If the tracking was enabled [disabled] and @p Enable is true [false] the + /// call to this function is basically a no-op. + void setTrackOverflow(bool Enable); + + /// @brief Return the current overflow status or nullptr if it is not tracked. + /// + /// @return A nullptr if tracking is disabled or otherwise an i1 that has the + /// value of "0" if and only if no overflow happend since tracking was + /// enabled. + llvm::Value *getOverflowState() const { return OverflowState; } + private: Scop &S; + /// @brief Flag that will be set if an overflow occured at runtime. + /// + /// Note that this flag is by default a nullptr and if it is a nullptr + /// we will not record overflows but simply perform the computations. + /// The intended usage is as follows: + /// - If overflows in [an] expression[s] should be tracked, call + /// the setTrackOverflow(true) function. + /// - Use create(...) for all expressions that should be checked. + /// - Call getOverflowState() to get the value representing the current + /// state of the overflow flag. + /// - To stop tracking call setTrackOverflow(false). + llvm::Value *OverflowState; + PollyIRBuilder &Builder; IDToValueTy &IDToValue; ValueMapT &GlobalMap; @@ -165,6 +186,48 @@ llvm::Value *createInt(__isl_take isl_ast_expr *Expr); llvm::Value *createOpAddressOf(__isl_take isl_ast_expr *Expr); llvm::Value *createAccessAddress(__isl_take isl_ast_expr *Expr); + + /// @brief Create a binary operation @p Opc and track overflows if requested. + /// + /// @param OpC The binary operation that should be performed [Add/Sub/Mul]. + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the binary operation. + llvm::Value *createBinOp(llvm::BinaryOperator::BinaryOps Opc, + llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name); + + /// @brief Create an addition and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the addition. + llvm::Value *createAdd(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); + + /// @brief Create a subtraction and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the subtraction. + llvm::Value *createSub(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); + + /// @brief Create a multiplication and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the multiplication. + llvm::Value *createMul(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); }; } Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -148,6 +148,7 @@ PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S); + IslExprBuilder &ExprBuilder = NodeBuilder.getExprBuilder(); // Only build the run-time condition and parameters _after_ having // introduced the conditional branch. This is important as the conditional @@ -191,7 +192,13 @@ NodeBuilder.addParameters(S.getContext()); - Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); + ExprBuilder.setTrackOverflow(true); + Value *RTC = buildRTC(Builder, ExprBuilder); + Value *OverflowHappened = Builder.CreateNot( + ExprBuilder.getOverflowState(), "polly.rtc.overflown"); + RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); + ExprBuilder.setTrackOverflow(false); + Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); Builder.SetInsertPoint(&StartBlock->front()); Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -19,6 +19,79 @@ using namespace llvm; using namespace polly; +void IslExprBuilder::setTrackOverflow(bool Enable) { + // Ignore calls that do not change the current state. + if (Enable == (OverflowState != nullptr)) + return; + + if (Enable) { + // If trackign should be enabled initialize the OverflowState. + assert(!OverflowState); + OverflowState = Builder.getFalse(); + } else { + // If tracking should be disabled just unset the OverflowState. + assert(OverflowState); + OverflowState = nullptr; + } +} + +Value *IslExprBuilder::createBinOp(BinaryOperator::BinaryOps Opc, Value *LHS, + Value *RHS, const Twine &Name) { + // Handle the plain operation (without overflow tracking) first. + if (!OverflowState) { + switch (Opc) { + case Instruction::Add: + return Builder.CreateNSWAdd(LHS, RHS, Name); + case Instruction::Sub: + return Builder.CreateNSWSub(LHS, RHS, Name); + case Instruction::Mul: + return Builder.CreateNSWMul(LHS, RHS, Name); + default: + llvm_unreachable("No overflow intrinsic for binary operator found!"); + } + } + + Function *F = nullptr; + Module *M = Builder.GetInsertBlock()->getModule(); + switch (Opc) { + case Instruction::Add: + F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, + {LHS->getType()}); + break; + case Instruction::Sub: + F = Intrinsic::getDeclaration(M, Intrinsic::ssub_with_overflow, + {LHS->getType()}); + break; + case Instruction::Mul: + F = Intrinsic::getDeclaration(M, Intrinsic::smul_with_overflow, + {LHS->getType()}); + break; + default: + llvm_unreachable("No overflow intrinsic for binary operator found!"); + } + + auto *ResultStruct = Builder.CreateCall(F, {LHS, RHS}, Name); + assert(ResultStruct->getType()->isStructTy()); + + auto *OverflowFlag = + Builder.CreateExtractValue(ResultStruct, 1, Name + ".obit"); + OverflowState = + Builder.CreateOr(OverflowState, OverflowFlag, "polly.overflow.state"); + return Builder.CreateExtractValue(ResultStruct, 0, Name + ".res"); +} + +Value *IslExprBuilder::createAdd(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Add, LHS, RHS, Name); +} + +Value *IslExprBuilder::createSub(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Sub, LHS, RHS, Name); +} + +Value *IslExprBuilder::createMul(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Mul, LHS, RHS, Name); +} + Type *IslExprBuilder::getWidestType(Type *T1, Type *T2) { assert(isa(T1) && isa(T2)); @@ -142,8 +215,7 @@ if (Ty != IndexOp->getType()) IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); - IndexOp = - Builder.CreateAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); + IndexOp = createAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); } // For every but the last dimension multiply the size, for the last @@ -167,8 +239,7 @@ if (Ty != DimSize->getType()) DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, "polly.access.sext." + BaseName); - IndexOp = - Builder.CreateMul(IndexOp, DimSize, "polly.access.mul." + BaseName); + IndexOp = createMul(IndexOp, DimSize, "polly.access.mul." + BaseName); } Access = Builder.CreateGEP(Base, IndexOp, "polly.access." + BaseName); @@ -237,13 +308,13 @@ default: llvm_unreachable("This is no binary isl ast expression"); case isl_ast_op_add: - Res = Builder.CreateNSWAdd(LHS, RHS); + Res = createAdd(LHS, RHS); break; case isl_ast_op_sub: - Res = Builder.CreateNSWSub(LHS, RHS); + Res = createSub(LHS, RHS); break; case isl_ast_op_mul: - Res = Builder.CreateNSWMul(LHS, RHS); + Res = createMul(LHS, RHS); break; case isl_ast_op_div: Res = Builder.CreateSDiv(LHS, RHS, "pexp.div", true); @@ -265,8 +336,8 @@ // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d Value *One = ConstantInt::get(MaxType, 1); Value *Zero = ConstantInt::get(MaxType, 0); - Value *Sum1 = Builder.CreateSub(LHS, RHS, "pexp.fdiv_q.0"); - Value *Sum2 = Builder.CreateAdd(Sum1, One, "pexp.fdiv_q.1"); + Value *Sum1 = createSub(LHS, RHS, "pexp.fdiv_q.0"); + Value *Sum2 = createAdd(Sum1, One, "pexp.fdiv_q.1"); Value *isNegative = Builder.CreateICmpSLT(LHS, Zero, "pexp.fdiv_q.2"); Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS, "pexp.fdiv_q.3"); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -1001,7 +1001,13 @@ isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); Domain = nullptr; + ExprBuilder.setTrackOverflow(true); Value *Cond = ExprBuilder.create(DomainCond); + Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), + "polly.preload.cond.overflown"); + Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); + ExprBuilder.setTrackOverflow(false); + if (!Cond->getType()->isIntegerTy(1)) Cond = Builder.CreateIsNotNull(Cond); Index: test/Isl/CodeGen/OpenMP/new_multidim_access.ll =================================================================== --- test/Isl/CodeGen/OpenMP/new_multidim_access.ll +++ test/Isl/CodeGen/OpenMP/new_multidim_access.ll @@ -19,15 +19,15 @@ ; CHECK: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 2i1] }; ; CHECK: new: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 43 + i1] }; -; IR: %polly.access.mul.polly.subfunc.arg.A = mul i64 %polly.indvar, %polly.subfunc.arg.m +; IR: %polly.access.mul.polly.subfunc.arg.A = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m ; IR: %6 = add nsw i64 %polly.indvar5, 13 -; IR: %polly.access.add.polly.subfunc.arg.A = add i64 %polly.access.mul.polly.subfunc.arg.A, %6 +; IR: %polly.access.add.polly.subfunc.arg.A = add nsw i64 %polly.access.mul.polly.subfunc.arg.A, %6 ; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A ; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3 -; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul i64 %polly.indvar, %polly.subfunc.arg.m +; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m ; IR: %7 = add nsw i64 %polly.indvar5, 43 -; IR: %polly.access.add.polly.subfunc.arg.A9 = add i64 %polly.access.mul.polly.subfunc.arg.A8, %7 +; IR: %polly.access.add.polly.subfunc.arg.A9 = add nsw i64 %polly.access.mul.polly.subfunc.arg.A8, %7 ; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9 ; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/aliasing_multidimensional_access.ll =================================================================== --- test/Isl/CodeGen/aliasing_multidimensional_access.ll +++ test/Isl/CodeGen/aliasing_multidimensional_access.ll @@ -1,12 +1,28 @@ ; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s ; -; Check that we calculate the maximal access into array A correctly. +; Check that we calculate the maximal access into array A correctly and track the overflow state. ; -; CHECK: %[[TMP0:[._0-9a-zA-Z]*]] = mul i64 99, %m -; CHECK: %[[TMP1:[._0-9a-zA-Z]*]] = add i64 %[[TMP0]], 149 -; CHECK: %[[TMP2:[._0-9a-zA-Z]*]] = mul i64 %[[TMP1]], %p -; CHECK: %[[TMP3:[._0-9a-zA-Z]*]] = add i64 %[[TMP2]], 150 -; CHECK: %polly.access.A{{[0-9]*}} = getelementptr double, double* %A, i64 %[[TMP3]] +; CHECK: %[[TMP0:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 99, i64 %m) +; CHECK: %[[TMP0O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP0]], 1 +; CHECK: %[[OS0:[._0-9a-zA-Z]*]] = or i1 {{[^,]*}}, %[[TMP0O]] +; CHECK: %[[TMP0R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP0]], 0 +; CHECK: %[[TMP1:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[TMP0R]], i64 149) +; CHECK: %[[TMP1O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP1]], 1 +; CHECK: %[[OS1:[._0-9a-zA-Z]*]] = or i1 %[[OS0]], %[[TMP1O]] +; CHECK: %[[TMP1R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP1]], 0 +; CHECK: %[[TMP2:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %[[TMP1R]], i64 %p) +; CHECK: %[[TMP2O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP2]], 1 +; CHECK: %[[OS2:[._0-9a-zA-Z]*]] = or i1 %[[OS1]], %[[TMP2O]] +; CHECK: %[[TMP2R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP2]], 0 +; CHECK: %[[TMP3:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[TMP2R]], i64 150) +; CHECK: %[[TMP3O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP3]], 1 +; CHECK: %[[OS3:[._0-9a-zA-Z]*]] = or i1 %[[OS2]], %[[TMP3O]] +; CHECK: %[[TMP3R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP3]], 0 +; CHECK: %polly.access.A{{[0-9]*}} = getelementptr double, double* %A, i64 %[[TMP3R]] +; +; CHECK: %polly.rtc.overflown = xor i1 %[[OS3]], true +; CHECK: %polly.rtc.result = and i1 %{{[^,]*}}, %polly.rtc.overflown +; CHECK: br i1 %polly.rtc.result, ; ; void foo(long n, long m, long p, double A[n][m][p], int *B) { ; for (long i = 0; i < 100; i++) Index: test/Isl/CodeGen/aliasing_parametric_simple_1.ll =================================================================== --- test/Isl/CodeGen/aliasing_parametric_simple_1.ll +++ test/Isl/CodeGen/aliasing_parametric_simple_1.ll @@ -6,8 +6,11 @@ ; } ; ; CHECK: %[[Cext:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK: %[[Cp1:[._a-zA-Z0-9]*]] = add nsw i64 %[[Cext]], 1 -; CHECK: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[Cp1]] +; CHECK: %[[Cp1:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[Cext]], i64 1) +; CHECK: %[[Cp1O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[Cp1]], 1 +; CHECK: %[[OS:[._a-zA-Z0-9]*]] = or i1 false, %[[Cp1O]] +; CHECK: %[[Cp1R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[Cp1]], 0 +; CHECK: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[Cp1R]] ; CHECK: %[[AMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 0 ; CHECK: %[[BMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMax]] to i64 ; CHECK: %[[AMinI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[AMin]] to i64 @@ -19,7 +22,9 @@ ; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i64 %[[AMaxI]], %[[BMinI]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] ; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 true, %[[NoAlias]] -; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond +; CHECK: %[[NOV:[._a-zA-Z0-9]*]] = xor i1 %[[OS]], true +; CHECK: %[[RTCV:[._a-zA-Z0-9]*]] = and i1 %[[RTC]], %[[NOV]] +; CHECK: br i1 %[[RTCV]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/aliasing_parametric_simple_2.ll =================================================================== --- test/Isl/CodeGen/aliasing_parametric_simple_2.ll +++ test/Isl/CodeGen/aliasing_parametric_simple_2.ll @@ -7,9 +7,12 @@ ; ; CHECK: %[[Ctx:[._a-zA-Z0-9]*]] = and i1 true ; CHECK-NEXT: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK-NEXT: %[[M3:[._a-zA-Z0-9]*]] = sub nsw i64 %[[M0]], 9 -; CHECK-NEXT: %[[M1:[._a-zA-Z0-9]*]] = icmp sgt i64 6, %[[M3]] -; CHECK-NEXT: %[[M4:[._a-zA-Z0-9]*]] = select i1 %[[M1]], i64 6, i64 %[[M3]] +; CHECK-NEXT: %[[M3:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %[[M0]], i64 9) +; CHECK-NEXT: %[[M3O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[M3]], 1 +; CHECK-NEXT: %[[OS0:[._a-zA-Z0-9]*]] = or i1 false, %[[M3O]] +; CHECK-NEXT: %[[M3R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[M3]], 0 +; CHECK-NEXT: %[[M1:[._a-zA-Z0-9]*]] = icmp sgt i64 6, %[[M3R]] +; CHECK-NEXT: %[[M4:[._a-zA-Z0-9]*]] = select i1 %[[M1]], i64 6, i64 %[[M3R]] ; CHECK-NEXT: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[M4]] ; CHECK-NEXT: %[[AMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 0 ; CHECK-NEXT: %[[BMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMax]] to i64 @@ -17,16 +20,21 @@ ; CHECK-NEXT: %[[BltA:[._a-zA-Z0-9]*]] = icmp ule i64 %[[BMaxI]], %[[AMinI]] ; CHECK-NEXT: %[[AMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 1024 ; CHECK-NEXT: %[[m0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK-NEXT: %[[m3:[._a-zA-Z0-9]*]] = sub nsw i64 %[[m0]], 10 -; CHECK-NEXT: %[[m1:[._a-zA-Z0-9]*]] = icmp slt i64 5, %[[m3]] -; CHECK-NEXT: %[[m4:[._a-zA-Z0-9]*]] = select i1 %[[m1]], i64 5, i64 %[[m3]] +; CHECK-NEXT: %[[m3:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %[[m0]], i64 10) +; CHECK-NEXT: %[[m3O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[m3]], 1 +; CHECK-NEXT: %[[OS1:[._a-zA-Z0-9]*]] = or i1 %[[OS0]], %[[m3O]] +; CHECK-NEXT: %[[m3R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[m3]], 0 +; CHECK-NEXT: %[[m1:[._a-zA-Z0-9]*]] = icmp slt i64 5, %[[m3R]] +; CHECK-NEXT: %[[m4:[._a-zA-Z0-9]*]] = select i1 %[[m1]], i64 5, i64 %[[m3R]] ; CHECK-NEXT: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]] ; CHECK-NEXT: %[[AMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[AMax]] to i64 ; CHECK-NEXT: %[[BMinI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMin]] to i64 ; CHECK-NEXT: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i64 %[[AMaxI]], %[[BMinI]] ; CHECK-NEXT: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] ; CHECK-NEXT: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %[[Ctx]], %[[NoAlias]] -; CHECK-NEXT: br i1 %[[RTC]], label %polly.start, label %for.cond +; CHECK-NEXT: %[[NOV:[._a-zA-Z0-9]*]] = xor i1 %[[OS1]], true +; CHECK-NEXT: %[[RTCV:[._a-zA-Z0-9]*]] = and i1 %[[RTC]], %[[NOV]] +; CHECK-NEXT: br i1 %[[RTCV]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/exprModDiv.ll =================================================================== --- test/Isl/CodeGen/exprModDiv.ll +++ test/Isl/CodeGen/exprModDiv.ll @@ -18,7 +18,7 @@ ; A[i % 127] ; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127 -; CHECK: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r +; CHECK: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r ; A[floor(i / 127)] ; @@ -28,41 +28,41 @@ ; each value of i to indeed be mapped to a value. ; ; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127 -; CHECK: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q +; CHECK: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 127 * floord(-p - 1, 127) + 127] -; CHECK: %pexp.fdiv_q.0 = sub i64 %p, 127 -; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1 +; CHECK: %pexp.fdiv_q.0 = sub nsw i64 %p, 127 +; CHECK: %pexp.fdiv_q.1 = add nsw i64 %pexp.fdiv_q.0, 1 ; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %p, 0 ; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %p ; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127 ; CHECK: %[[r1:[0-9]*]] = mul nsw i64 127, %pexp.fdiv_q.4 ; CHECK: %[[r2:[0-9]*]] = sub nsw i64 %p, %[[r1]] -; CHECK: %polly.access.A8 = getelementptr float, float* %A, i64 %[[r2]] +; CHECK: %polly.access.A11 = getelementptr float, float* %A, i64 %[[r2]] ; A[p / 127] ; CHECK: %pexp.div = sdiv exact i64 %p, 127 -; CHECK: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div +; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div ; A[i % 128] ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 -; POW2: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r +; POW2: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r ; A[floor(i / 128)] ; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128 -; POW2: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q +; POW2: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 128 * floord(-p - 1, 128) + 128] ; POW2: %polly.fdiv_q.shr = ashr i64 %p, 7 ; POW2: %[[r1:[0-9]*]] = mul nsw i64 128, %polly.fdiv_q.shr ; POW2: %[[r2:[0-9]*]] = sub nsw i64 %p, %[[r1]] -; POW2: %polly.access.A8 = getelementptr float, float* %A, i64 %[[r2]] +; POW2: %polly.access.A11 = getelementptr float, float* %A, i64 %[[r2]] ; A[p / 128] ; POW2: %pexp.div = sdiv exact i64 %p, 128 -; POW2: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div +; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll =================================================================== --- test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll +++ test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll @@ -20,20 +20,28 @@ ; IR-NEXT: %13 = icmp sge i64 %12, 1 ; IR-NEXT: %14 = sext i32 %q to i64 ; IR-NEXT: %15 = sext i32 %p to i64 -; IR-NEXT: %16 = add nsw i64 %15, %14 -; IR-NEXT: %17 = icmp sle i64 %16, 2147483647 +; IR-NEXT: %16 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %15, i64 %14) +; IR-NEXT: %.obit4 = extractvalue { i64, i1 } %16, 1 +; IR-NEXT: %polly.overflow.state5 = or i1 false, %.obit4 +; IR-NEXT: %.res6 = extractvalue { i64, i1 } %16, 0 +; IR-NEXT: %17 = icmp sle i64 %.res6, 2147483647 ; IR-NEXT: %18 = and i1 %13, %17 ; IR-NEXT: %19 = sext i32 %q to i64 ; IR-NEXT: %20 = sext i32 %p to i64 -; IR-NEXT: %21 = add nsw i64 %20, %19 -; IR-NEXT: %22 = icmp sge i64 %21, -2147483648 +; IR-NEXT: %21 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %20, i64 %19) +; IR-NEXT: %.obit7 = extractvalue { i64, i1 } %21, 1 +; IR-NEXT: %polly.overflow.state8 = or i1 %polly.overflow.state5, %.obit7 +; IR-NEXT: %.res9 = extractvalue { i64, i1 } %21, 0 +; IR-NEXT: %22 = icmp sge i64 %.res9, -2147483648 ; IR-NEXT: %23 = and i1 %18, %22 -; IR-NEXT: br label %polly.preload.cond1 +; IR-NEXT: %polly.preload.cond.overflown10 = xor i1 %polly.overflow.state8, true +; IR-NEXT: %polly.preload.cond.result11 = and i1 %23, %polly.preload.cond.overflown10 +; IR-NEXT: br label %polly.preload.cond12 ; -; IR: polly.preload.cond1: -; IR-NEXT: br i1 %23 +; IR: polly.preload.cond12: +; IR-NEXT: br i1 %polly.preload.cond.result11 ; -; IR: polly.preload.exec3: +; IR: polly.preload.exec14: ; IR-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0 ; IR-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4 ;