Index: llvm/trunk/test/Transforms/LoopInterchange/interchange-not-profitable.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/interchange-not-profitable.ll +++ llvm/trunk/test/Transforms/LoopInterchange/interchange-not-profitable.ll @@ -1,42 +0,0 @@ -; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@A = common global [100 x [100 x i32]] zeroinitializer -@B = common global [100 x i32] zeroinitializer - -;; Loops should not be interchanged in this case as it is not profitable. -;; for(int i=0;i<100;i++) -;; for(int j=0;j<100;j++) -;; A[i][j] = A[i][j]+k; - -; CHECK: Interchanging loops not profitable. - -define void @interchange_03(i32 %k) { -entry: - br label %for.cond1.preheader - -for.cond1.preheader: - %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc10 ] - br label %for.body3 - -for.body3: - %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] - %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv21, i64 %indvars.iv - %0 = load i32, i32* %arrayidx5 - %add = add nsw i32 %0, %k - store i32 %add, i32* %arrayidx5 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 100 - br i1 %exitcond, label %for.inc10, label %for.body3 - -for.inc10: - %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1 - %exitcond23 = icmp eq i64 %indvars.iv.next22, 100 - br i1 %exitcond23, label %for.end12, label %for.cond1.preheader - -for.end12: - ret void -} Index: llvm/trunk/test/Transforms/LoopInterchange/interchange-output-dependencies.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/interchange-output-dependencies.ll +++ llvm/trunk/test/Transforms/LoopInterchange/interchange-output-dependencies.ll @@ -1,48 +0,0 @@ -; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@A = common global [100 x [100 x i32]] zeroinitializer - -;; Test to make sure we can handle output dependencies. -;; -;; for (int i = 0; i < 2; ++i) -;; for(int j = 0; j < 3; ++j) { -;; A[j][i] = i; -;; A[j][i+1] = j; -;; } - -; CHECK: Not interchanging loops. Cannot prove legality. - -@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16 - -define void @interchange_10() { -entry: - br label %for.cond1.preheader - -for.cond.loopexit: ; preds = %for.body4 - %exitcond28 = icmp ne i64 %indvars.iv.next27, 2 - br i1 %exitcond28, label %for.cond1.preheader, label %for.cond.cleanup - -for.cond1.preheader: ; preds = %for.cond.loopexit, %entry - %indvars.iv26 = phi i64 [ 0, %entry ], [ %indvars.iv.next27, %for.cond.loopexit ] - %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1 - br label %for.body4 - -for.cond.cleanup: ; preds = %for.cond.loopexit - ret void - -for.body4: ; preds = %for.body4, %for.cond1.preheader - %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ] - %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26 - %tmp = trunc i64 %indvars.iv26 to i32 - store i32 %tmp, i32* %arrayidx6, align 4 - %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27 - %tmp1 = trunc i64 %indvars.iv to i32 - store i32 %tmp1, i32* %arrayidx10, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp ne i64 %indvars.iv.next, 3 - br i1 %exitcond, label %for.body4, label %for.cond.loopexit -} Index: llvm/trunk/test/Transforms/LoopInterchange/interchange-simple-count-down.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/interchange-simple-count-down.ll +++ llvm/trunk/test/Transforms/LoopInterchange/interchange-simple-count-down.ll @@ -1,40 +0,0 @@ -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@A = common global [100 x [100 x i32]] zeroinitializer -@B = common global [100 x i32] zeroinitializer - -;; for(int i=0;i<100;i++) -;; for(int j=100;j>=0;j--) -;; A[j][i] = A[j][i]+k; - -; CHECK: Loop interchanged with enclosing loop. - -define void @interchange_02(i32 %k) { -entry: - br label %for.cond1.preheader - -for.cond1.preheader: - %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc10 ] - br label %for.body3 - -for.body3: - %indvars.iv = phi i64 [ 100, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] - %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv19 - %0 = load i32, i32* %arrayidx5 - %add = add nsw i32 %0, %k - store i32 %add, i32* %arrayidx5 - %indvars.iv.next = add nsw i64 %indvars.iv, -1 - %cmp2 = icmp sgt i64 %indvars.iv, 0 - br i1 %cmp2, label %for.body3, label %for.inc10 - -for.inc10: - %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1 - %exitcond = icmp eq i64 %indvars.iv.next20, 100 - br i1 %exitcond, label %for.end11, label %for.cond1.preheader - -for.end11: - ret void -} Index: llvm/trunk/test/Transforms/LoopInterchange/interchange-simple-count-up.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/interchange-simple-count-up.ll +++ llvm/trunk/test/Transforms/LoopInterchange/interchange-simple-count-up.ll @@ -1,49 +0,0 @@ -; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@A = common global [100 x [100 x i32]] zeroinitializer -@B = common global [100 x i32] zeroinitializer - -;; for(int i=0;i=0;j--) +;; A[j][i] = A[j][i]+k; + +define void @interchange_02(i64 %k) { +; CHECK-LABEL: @interchange_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR3_PREHEADER:%.*]] +; CHECK: for1.header.preheader: +; CHECK-NEXT: br label [[FOR1_HEADER:%.*]] +; CHECK: for1.header: +; CHECK-NEXT: [[INDVARS_IV19:%.*]] = phi i64 [ [[INDVARS_IV_NEXT20:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]] +; CHECK: for3.preheader: +; CHECK-NEXT: br label [[FOR3:%.*]] +; CHECK: for3: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR3_SPLIT:%.*]] ], [ 100, [[FOR3_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]] +; CHECK: for3.split1: +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV19]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX5]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP0]], [[K:%.*]] +; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX5]] +; CHECK-NEXT: br label [[FOR1_INC10]] +; CHECK: for3.split: +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0 +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR3]], label [[FOR_END11:%.*]] +; CHECK: for1.inc10: +; CHECK-NEXT: [[INDVARS_IV_NEXT20]] = add nuw nsw i64 [[INDVARS_IV19]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT20]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR3_SPLIT]], label [[FOR1_HEADER]] +; CHECK: for.end11: +; CHECK-NEXT: ret void +; +entry: + br label %for1.header + +for1.header: + %j19 = phi i64 [ 0, %entry ], [ %j.next20, %for1.inc10 ] + br label %for3 + +for3: + %j = phi i64 [ 100, %for1.header ], [ %j.next, %for3 ] + %arrayidx5 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 %j, i64 %j19 + %0 = load i64, i64* %arrayidx5 + %add = add nsw i64 %0, %k + store i64 %add, i64* %arrayidx5 + %j.next = add nsw i64 %j, -1 + %cmp2 = icmp sgt i64 %j, 0 + br i1 %cmp2, label %for3, label %for1.inc10 + +for1.inc10: + %j.next20 = add nuw nsw i64 %j19, 1 + %exitcond = icmp eq i64 %j.next20, 100 + br i1 %exitcond, label %for.end11, label %for1.header + +for.end11: + ret void +} + +;; Test to make sure we can handle output dependencies. +;; +;; for (int i = 1; i < 100; ++i) +;; for(int j = 1; j < 99; ++j) { +;; A[j][i] = i; +;; A[j][i+1] = j; +;; } +;; FIXME: DA misses this case after D35430 + +define void @interchange_10() { +entry: + br label %for1.header + +for1.header: + %j23 = phi i64 [ 1, %entry ], [ %j.next24, %for1.inc10 ] + %j.next24 = add nuw nsw i64 %j23, 1 + br label %for2 + +for2: + %j = phi i64 [ %j.next, %for2 ], [ 1, %for1.header ] + %j.next = add nuw nsw i64 %j, 1 + %arrayidx5 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 %j, i64 %j23 + store i64 %j, i64* %arrayidx5 + %arrayidx10 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 %j, i64 %j.next24 + store i64 %j23, i64* %arrayidx10 + %exitcond = icmp eq i64 %j, 99 + br i1 %exitcond, label %for1.inc10, label %for2 + +for1.inc10: + %exitcond26 = icmp eq i64 %j23, 98 + br i1 %exitcond26, label %for.end12, label %for1.header + +for.end12: + ret void + +} Index: llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll +++ llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll @@ -1,66 +1,102 @@ -; REQUIRES: asserts -; RUN: opt < %s -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-interchange -verify-dom-info -S 2>&1 | FileCheck %s ;; Checks the order of the inner phi nodes does not cause havoc. ;; The inner loop has a reduction into c. The IV is not the first phi. -; CHECK: Not interchanging loops. Cannot prove legality. - target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv8--linux-gnueabihf" + + ; Function Attrs: norecurse nounwind define void @test(i32 %T, [90 x i32]* noalias nocapture %C, i16* noalias nocapture readonly %A, i16* noalias nocapture readonly %B) local_unnamed_addr #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR3_PREHEADER:%.*]] +; CHECK: for1.header.preheader: +; CHECK-NEXT: br label [[FOR1_HEADER:%.*]] +; CHECK: for1.header: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC20:%.*]], [[FOR1_INC19:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I]], 90 +; CHECK-NEXT: br label [[FOR2_HEADER_PREHEADER:%.*]] +; CHECK: for2.header.preheader: +; CHECK-NEXT: br label [[FOR2_HEADER:%.*]] +; CHECK: for2.header: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[INC17:%.*]], [[FOR2_INC16:%.*]] ], [ 0, [[FOR2_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [90 x i32], [90 x i32]* [[C:%.*]], i32 [[I]], i32 [[J]] +; CHECK-NEXT: [[ARRAYIDX14_PROMOTED:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 +; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]] +; CHECK: for3.preheader: +; CHECK-NEXT: br label [[FOR3:%.*]] +; CHECK: for3: +; CHECK-NEXT: [[K:%.*]] = phi i32 [ [[INC:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]] +; CHECK: for3.split1: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 [[ADD]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV]], [[ARRAYIDX14_PROMOTED]] +; CHECK-NEXT: br label [[FOR2_INC16]] +; CHECK: for3.split: +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]] +; CHECK: for2.inc16: +; CHECK-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[INC17]] = add nuw nsw i32 [[J]], 1 +; CHECK-NEXT: [[EXITCOND47:%.*]] = icmp eq i32 [[INC17]], 90 +; CHECK-NEXT: br i1 [[EXITCOND47]], label [[FOR1_INC19]], label [[FOR2_HEADER]] +; CHECK: for1.inc19: +; CHECK-NEXT: [[INC20]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND48:%.*]] = icmp eq i32 [[INC20]], 90 +; CHECK-NEXT: br i1 [[EXITCOND48]], label [[FOR3_SPLIT]], label [[FOR1_HEADER]] +; CHECK: for1.loopexit: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: - %cmp45 = icmp sgt i32 %T, 0 - br i1 %cmp45, label %for.body3.lr.ph.preheader, label %for.end21 - -for.body3.lr.ph.preheader: ; preds = %entry - br label %for.body3.lr.ph + br label %for1.header -for.body3.lr.ph: ; preds = %for.body3.lr.ph.preheader, %for.inc19 - %i.046 = phi i32 [ %inc20, %for.inc19 ], [ 0, %for.body3.lr.ph.preheader ] - %mul = mul nsw i32 %i.046, %T - br label %for.body6.lr.ph - -for.body6.lr.ph: ; preds = %for.inc16, %for.body3.lr.ph - %j.043 = phi i32 [ 0, %for.body3.lr.ph ], [ %inc17, %for.inc16 ] - %arrayidx14 = getelementptr inbounds [90 x i32], [90 x i32]* %C, i32 %i.046, i32 %j.043 +for1.header: ; preds = %entry + %i = phi i32 [ %inc20, %for1.inc19 ], [ 0, %entry ] + %mul = mul nsw i32 %i, 90 + br label %for2.header + +for2.header: ; preds = %for2.inc16, %for1.header + %j = phi i32 [ 0, %for1.header ], [ %inc17, %for2.inc16 ] + %arrayidx14 = getelementptr inbounds [90 x i32], [90 x i32]* %C, i32 %i, i32 %j %arrayidx14.promoted = load i32, i32* %arrayidx14, align 4 - br label %for.body6 + br label %for3 -for.body6: ; preds = %for.body6, %for.body6.lr.ph - %add1541 = phi i32 [ %arrayidx14.promoted, %for.body6.lr.ph ], [ %add15, %for.body6 ] - %k.040 = phi i32 [ 0, %for.body6.lr.ph ], [ %inc, %for.body6 ] - %add = add nsw i32 %k.040, %mul +for3: ; preds = %for3, %for2.header + %add1541 = phi i32 [ %arrayidx14.promoted, %for2.header ], [ %add15, %for3 ] + %k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ] + %add = add nsw i32 %k, %mul %arrayidx = getelementptr inbounds i16, i16* %A, i32 %add %0 = load i16, i16* %arrayidx, align 2 %conv = sext i16 %0 to i32 - %mul7 = mul nsw i32 %k.040, %T - %add8 = add nsw i32 %mul7, %j.043 - %arrayidx9 = getelementptr inbounds i16, i16* %B, i32 %add8 - %1 = load i16, i16* %arrayidx9, align 2 - %conv10 = sext i16 %1 to i32 - %mul11 = mul nsw i32 %conv10, %conv - %add15 = add nsw i32 %mul11, %add1541 - %inc = add nuw nsw i32 %k.040, 1 - %exitcond = icmp eq i32 %inc, %T - br i1 %exitcond, label %for.inc16, label %for.body6 + %add15 = add nsw i32 %conv, %add1541 + %inc = add nuw nsw i32 %k, 1 + %exitcond = icmp eq i32 %inc, 90 + br i1 %exitcond, label %for2.inc16, label %for3 -for.inc16: ; preds = %for.body6 - %add15.lcssa = phi i32 [ %add15, %for.body6 ] +for2.inc16: ; preds = %for.body6 + %add15.lcssa = phi i32 [ %add15, %for3 ] store i32 %add15.lcssa, i32* %arrayidx14, align 4 - %inc17 = add nuw nsw i32 %j.043, 1 - %exitcond47 = icmp eq i32 %inc17, %T - br i1 %exitcond47, label %for.inc19, label %for.body6.lr.ph - -for.inc19: ; preds = %for.inc16 - %inc20 = add nuw nsw i32 %i.046, 1 - %exitcond48 = icmp eq i32 %inc20, %T - br i1 %exitcond48, label %for.end21.loopexit, label %for.body3.lr.ph + %inc17 = add nuw nsw i32 %j, 1 + %exitcond47 = icmp eq i32 %inc17, 90 + br i1 %exitcond47, label %for1.inc19, label %for2.header + +for1.inc19: ; preds = %for2.inc16 + %inc20 = add nuw nsw i32 %i, 1 + %exitcond48 = icmp eq i32 %inc20, 90 + br i1 %exitcond48, label %for1.loopexit, label %for1.header -for.end21.loopexit: ; preds = %for.inc19 - br label %for.end21 +for1.loopexit: ; preds = %for1.inc19 + br label %exit -for.end21: ; preds = %for.end21.loopexit, %entry +exit: ; preds = %for1.loopexit ret void } Index: llvm/trunk/test/Transforms/LoopInterchange/profitability.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/profitability.ll +++ llvm/trunk/test/Transforms/LoopInterchange/profitability.ll @@ -1,5 +1,7 @@ -; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -loop-interchange -pass-remarks-output=%t \ +; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange +; RUN: FileCheck -input-file %t %s + ;; We test profitability model in these test cases. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -10,93 +12,80 @@ ;;---------------------------------------Test case 01--------------------------------- ;; Loops interchange will result in code vectorization and hence profitable. Check for interchange. -;; for(int i=1;i