Index: llvm/lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1898,6 +1898,10 @@ // to the new (widened) increment. auto *OrigInc = cast(OrigPhi->getIncomingValueForBlock(LatchBlock)); + if (isa(OrigInc) && OrigInc->hasNoSignedWrap()) + WideInc->setHasNoSignedWrap(true); + if (isa(OrigInc) && OrigInc->hasNoUnsignedWrap()) + WideInc->setHasNoUnsignedWrap(true); WideInc->setDebugLoc(OrigInc->getDebugLoc()); } Index: llvm/test/Transforms/IndVarSimplify/keep-nsw-nuw-flag.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IndVarSimplify/keep-nsw-nuw-flag.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -indvars -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@a = external global [8000 x float], align 64 +@b = external global [8000 x float], align 64 +@c = external global [8000 x float], align 64 + +define float @foo(i32 %arg1, i32 %arg2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[ARG1:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[ARG2:%.*]] to i64 +; CHECK-NEXT: br label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BODY]] ], [ [[TMP0]], [[PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds [8000 x float], [8000 x float]* @a, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX_A]], align 4 +; CHECK-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds [8000 x float], [8000 x float]* @b, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX_B]], align 4 +; CHECK-NEXT: [[ARRAYIDX_C:%.*]] = getelementptr inbounds [8000 x float], [8000 x float]* @c, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[FADD:%.*]] = fadd fast float [[TMP2]], [[TMP3]] +; CHECK-NEXT: store float [[FADD]], float* [[ARRAYIDX_C]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP1]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 8000 +; CHECK-NEXT: br i1 [[CMP]], label [[BODY]], label [[CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: cleanup: +; CHECK-NEXT: br label [[PREHEADER]] +; +entry: + br label %preheader + +preheader: ; preds = %cleanup, %entry + br label %body + +body: ; preds = %body, %preheader + %iv = phi i32 [ %arg1, %preheader ], [ %add, %body ] + %sext = sext i32 %iv to i64 + %arrayidx.a = getelementptr inbounds [8000 x float], [8000 x float]* @a, i64 0, i64 %sext + %0 = load float, float* %arrayidx.a, align 4 + %arrayidx.b = getelementptr inbounds [8000 x float], [8000 x float]* @b, i64 0, i64 %sext + %1 = load float, float* %arrayidx.b, align 4 + %arrayidx.c = getelementptr inbounds [8000 x float], [8000 x float]* @c, i64 0, i64 %sext + %fadd = fadd fast float %0, %1 + store float %fadd, float* %arrayidx.c, align 4 + %add = add nsw i32 %iv, %arg2 + %cmp = icmp slt i32 %add, 8000 + br i1 %cmp, label %body, label %cleanup, !llvm.loop !0 + +cleanup: ; preds = %body + br label %preheader +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.mustprogress"} + Index: llvm/test/Transforms/LoopFlatten/widen-iv.ll =================================================================== --- llvm/test/Transforms/LoopFlatten/widen-iv.ll +++ llvm/test/Transforms/LoopFlatten/widen-iv.ll @@ -41,11 +41,11 @@ ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @f(i32* [[ARRAYIDX_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nuw nsw i64 [[INDVAR]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: -; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1 +; CHECK-NEXT: [[INDVAR_NEXT2]] = add nuw nsw i64 [[INDVAR1]], 1 ; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT2]], [[FLATTEN_TRIPCOUNT]] ; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: for.cond.cleanup.loopexit: @@ -132,12 +132,12 @@ ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVAR2]] ; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4 ; CHECK-NEXT: tail call void @g(i32 [[TMP8]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nuw nsw i64 [[INDVAR]], 1 ; CHECK-NEXT: [[INC_US:%.*]] = add nuw nsw i32 [[J_016_US]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: -; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 +; CHECK-NEXT: [[INDVAR_NEXT3]] = add nuw nsw i64 [[INDVAR2]], 1 ; CHECK-NEXT: [[INC6_US]] = add nuw nsw i32 [[I_018_US]], 1 ; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]] ; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] @@ -240,7 +240,7 @@ ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4 ; CHECK-NEXT: tail call void @g(i32 [[TMP4]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nuw i64 [[INDVAR]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: @@ -335,7 +335,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_US]], align 2 ; CHECK-NEXT: [[ADD5_US:%.*]] = add i16 [[TMP4]], [[VAL:%.*]] ; CHECK-NEXT: store i16 [[ADD5_US]], i16* [[ARRAYIDX_US]], align 2 -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nuw i64 [[INDVAR]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND1_FOR_INC7_CRIT_EDGE_US]] ; CHECK: for.cond1.for.inc7_crit_edge.us: @@ -420,7 +420,7 @@ ; CHECK-NEXT: [[ADD_US:%.*]] = add i8 [[TMP3]], [[MUL_US]] ; CHECK-NEXT: [[CONV14_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i32 ; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nuw i64 [[INDVAR]], 1 ; CHECK-NEXT: [[CMP6_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] ; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: @@ -529,7 +529,7 @@ ; CHECK-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_16(i16 [[CONV15_US]]) ; CHECK-NEXT: [[CONV21_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[CALL22_US:%.*]] = tail call i32 @use_64(i64 [[CONV21_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nuw i64 [[INDVAR]], 1 ; CHECK-NEXT: [[CMP6_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] ; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: @@ -643,7 +643,7 @@ ; CHECK-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_16(i16 [[FLATTEN_TRUNCIV]]) ; CHECK-NEXT: [[CONV19_US:%.*]] = sext i16 [[FLATTEN_TRUNCIV]] to i64 ; CHECK-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_64(i64 [[CONV19_US]]) -; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add nuw nsw i64 [[INDVAR]], 1 ; CHECK-NEXT: [[CMP6_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] ; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] ; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: