Index: lib/CodeGen/LoopGenerators.cpp =================================================================== --- lib/CodeGen/LoopGenerators.cpp +++ lib/CodeGen/LoopGenerators.cpp @@ -17,11 +17,13 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; using namespace polly; +using namespace PatternMatch; static cl::opt PollyNumThreads("polly-num-threads", @@ -49,6 +51,9 @@ // contains the loop iv 'polly.indvar', the incremented loop iv // 'polly.indvar_next' as well as the condition to check if we execute another // iteration of the loop. After the loop has finished, we branch to ExitBB. +// we expect the type of UB, LB, UB+Stride to be large enough for values that +// UB may take throughout the execution of the loop, including the computation +// of indvar + Stride before the final abort. Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT, BasicBlock *&ExitBB, @@ -123,10 +128,8 @@ IV->addIncoming(LB, PreHeaderBB); Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); - Value *LoopCondition; - UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub"); - LoopCondition = Builder.CreateICmp(Predicate, IV, UB); - LoopCondition->setName("polly.loop_cond"); + Value *LoopCondition = + Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); // Create the loop latch and annotate it as such. BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); Index: test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll =================================================================== --- test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll +++ test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll @@ -2,10 +2,10 @@ ; ; Check that we mark multiple parallel loops correctly including the memory instructions. ; -; CHECK-DAG: %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 1022 +; CHECK-DAG: %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 1023 ; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDOuter:[0-9]*]] ; -; CHECK-DAG: %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 510 +; CHECK-DAG: %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 511 ; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]] ; ; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4 Index: test/Isl/CodeGen/MemAccess/generate-all.ll =================================================================== --- test/Isl/CodeGen/MemAccess/generate-all.ll +++ test/Isl/CodeGen/MemAccess/generate-all.ll @@ -15,7 +15,7 @@ ; SCEV-NEXT: %p_tmp5 = fadd float %tmp4_p_scalar_, 1.000000e+01 ; SCEV-NEXT: store float %p_tmp5, float* %p_tmp3, align 4, !alias.scope !0, !noalias !2 ; SCEV-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 -; SCEV-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98 +; SCEV-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99 ; SCEV-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit ; ASTEXPR: polly.stmt.bb2: ; preds = %polly.loop_header @@ -27,7 +27,7 @@ ; ASTEXPR-NEXT: %polly.access.A2 = getelementptr float, float* %A, i64 %pexp.pdiv_r1 ; ASTEXPR-NEXT: store float %p_tmp5, float* %polly.access.A2, align 4, !alias.scope !0, !noalias !2 ; ASTEXPR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 -; ASTEXPR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 98 +; ASTEXPR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99 ; ASTEXPR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/MemAccess/update_access_functions.ll =================================================================== --- test/Isl/CodeGen/MemAccess/update_access_functions.ll +++ test/Isl/CodeGen/MemAccess/update_access_functions.ll @@ -3,21 +3,21 @@ ; RUN: < %s -S | FileCheck %s ; CHECK-LABEL: polly.stmt.loop1: -; CHECK-NEXT: %3 = mul nsw i64 5, %polly.indvar +; CHECK-NEXT: %3 = mul nsw i64 5, %polly.indvar{{[0-9]*}} ; CHECK-NEXT: %4 = sub nsw i64 %3, 10 ; CHECK-NEXT: %polly.access.A = getelementptr double, double* %A, i64 %4 ; CHECK-NEXT: store double 4.200000e+01, double* %polly.access.A, align 8 ; CHECK-LABEL: polly.stmt.loop2: -; CHECK-NEXT: %polly.access.A10 = getelementptr double, double* %A, i64 42 -; CHECK-NEXT: %val_p_scalar_ = load double, double* %polly.access.A10, align 8 +; CHECK-NEXT: %polly.access.A[[Num0:[0-9]*]] = getelementptr double, double* %A, i64 42 +; CHECK-NEXT: %val_p_scalar_ = load double, double* %polly.access.A[[Num0]], align 8 ; CHECK-LABEL: polly.stmt.loop3: ; CHECK-NEXT: %val.s2a.reload = load double, double* %val.s2a -; CHECK-NEXT: [[REG0:%.*]] = mul nsw i64 13, %polly.indvar16 +; CHECK-NEXT: [[REG0:%.*]] = mul nsw i64 13, %polly.indvar{{[0-9]*}} ; CHECK-NEXT: [[REG1:%.*]] = add nsw i64 [[REG0]], 5 -; CHECK-NEXT: %polly.access.A20 = getelementptr double, double* %A, i64 [[REG1]] -; CHECK-NEXT: store double %val.s2a.reload, double* %polly.access.A20, align 8, +; CHECK-NEXT: %polly.access.A[[Num1:[0-9]*]] = getelementptr double, double* %A, i64 [[REG1]] +; CHECK-NEXT: store double %val.s2a.reload, double* %polly.access.A[[Num1]], align 8, target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/OpenMP/single_loop.ll =================================================================== --- test/Isl/CodeGen/OpenMP/single_loop.ll +++ test/Isl/CodeGen/OpenMP/single_loop.ll @@ -70,8 +70,7 @@ ; IR-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar ; IR-NEXT: store float 1.000000e+00, float* %[[gep]] ; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 -; IR-NEXT: %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 1 -; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub +; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.par.UBAdjusted ; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit ; IR-LABEL: polly.loop_preheader: Index: test/Isl/CodeGen/non_affine_float_compare.ll =================================================================== --- test/Isl/CodeGen/non_affine_float_compare.ll +++ test/Isl/CodeGen/non_affine_float_compare.ll @@ -34,7 +34,7 @@ ; CHECK: %p_tmp11b = fadd float %tmp10b_p_scalar_, 1.000000e+00 ; CHECK: store float %p_tmp11b, float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2 ; CHECK: %polly.indvar_next = add nsw i64 %polly.indvar, 1 -; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar, 1022 +; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 1023 ; CHECK: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/phi_scalar_simple_2.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_2.ll +++ test/Isl/CodeGen/phi_scalar_simple_2.ll @@ -31,7 +31,7 @@ ; CHECK-NEXT: store i32 %x, i32* %x.addr.0.phiops ; CHECK-NEXT: sext -; CHECK-LABEL: polly.merge21: +; CHECK-LABEL: polly.merge{{[a-z_0-9]*}}: ; CHECK: %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a for.cond: ; preds = %for.inc5, %entry Index: test/Isl/single_loop_param_less_equal.ll =================================================================== --- test/Isl/single_loop_param_less_equal.ll +++ test/Isl/single_loop_param_less_equal.ll @@ -51,8 +51,7 @@ ; CODEGEN: [[PTR:%[a-zA-Z0-9_\.]+]] = getelementptr [1024 x i32], [1024 x i32]* @A, i64 0, i64 %polly.indvar ; CODEGEN: store i32 1, i32* [[PTR]] ; CODEGEN: %polly.indvar_next = add nsw i64 %polly.indvar, 1 -; CODEGEN: %polly.adjust_ub = sub i64 %n, 1 -; CODEGEN: %polly.loop_cond = icmp sle i64 %polly.indvar, %polly.adjust_ub +; CODEGEN: %polly.loop_cond = icmp sle i64 %polly.indvar_next, %n ; CODEGEN: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit ; CODEGEN: polly.loop_preheader: Index: test/Isl/single_loop_param_less_than.ll =================================================================== --- test/Isl/single_loop_param_less_than.ll +++ test/Isl/single_loop_param_less_than.ll @@ -49,8 +49,7 @@ ; CODEGEN: [[PTR:%[a-zA-Z0-9_\.]+]] = getelementptr [1024 x i32], [1024 x i32]* @A, i64 0, i64 %polly.indvar ; CODEGEN: store i32 1, i32* [[PTR]] ; CODEGEN: %polly.indvar_next = add nsw i64 %polly.indvar, 1 -; CODEGEN: %polly.adjust_ub = sub i64 %n, 1 -; CODEGEN: %polly.loop_cond = icmp slt i64 %polly.indvar, %polly.adjust_ub +; CODEGEN: %polly.loop_cond = icmp slt i64 %polly.indvar_next, %n ; CODEGEN: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit ; CODEGEN: polly.loop_preheader: Index: test/ScheduleOptimizer/pattern-matching-based-opts_10.ll =================================================================== --- test/ScheduleOptimizer/pattern-matching-based-opts_10.ll +++ test/ScheduleOptimizer/pattern-matching-based-opts_10.ll @@ -12,14 +12,14 @@ ; This test case checks whether Polly generates second level alias metadata ; to distinguish the specific accesses in case of the ublas gemm kernel. ; -; CHECK: %tmp22_p_scalar_ = load double, double* %scevgep168, align 8, !alias.scope !10, !noalias !2 -; CHECK: store double %p_tmp23, double* %scevgep168, align 8, !alias.scope !10, !noalias !2 -; CHECK: %tmp22_p_scalar_188 = load double, double* %scevgep187, align 8, !alias.scope !11, !noalias !12 -; CHECK: store double %p_tmp23189, double* %scevgep187, align 8, !alias.scope !11, !noalias !12 -; CHECK: %tmp22_p_scalar_209 = load double, double* %scevgep208, align 8, !alias.scope !13, !noalias !14 -; CHECK: store double %p_tmp23210, double* %scevgep208, align 8, !alias.scope !13, !noalias !14 -; CHECK: %tmp22_p_scalar_230 = load double, double* %scevgep229, align 8, !alias.scope !15, !noalias !16 -; CHECK: store double %p_tmp23231, double* %scevgep229, align 8, !alias.scope !15, !noalias !16 +; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N0:[a-z_0-9]*]], align 8, !alias.scope !10, !noalias !2 +; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N0]], align 8, !alias.scope !10, !noalias !2 +; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N1:[a-z_0-9]*]], align 8, !alias.scope !11, !noalias !12 +; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N1]], align 8, !alias.scope !11, !noalias !12 +; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N2:[a-z_0-9]*]], align 8, !alias.scope !13, !noalias !14 +; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N2]], align 8, !alias.scope !13, !noalias !14 +; CHECK: %tmp22_p_scalar_{{[0-9]*}} = load double, double* %scevgep[[N3:[a-z_0-9]*]], align 8, !alias.scope !15, !noalias !16 +; CHECK: store double %p_tmp23{{[0-9]*}}, double* %scevgep[[N3]], align 8, !alias.scope !15, !noalias !16 ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown"