Index: llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -81,6 +81,7 @@ bool replaceIVUserWithLoopInvariant(Instruction *UseInst); bool eliminateOverflowIntrinsic(CallInst *CI); + bool eliminateTrunc(TruncInst *TI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); @@ -494,6 +495,93 @@ return true; } +bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { + // It is always legal to replace + // icmp i32 trunc(iv), n + // with + // icmp i64 sext(trunc(iv)), sext(n), if pred is signed predicate. + // Or with + // icmp i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate. + // Or with either of these if pred is an equality predicate. + // + // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for + // every comparison which uses trunc, it means that we can replace each of + // them with comparison of iv against sext/zext(n). We no longer need trunc + // after that. + // + // TODO: Should we do this if we can widen *some* comparisons, but not all + // of them? Sometimes it is enough to enable other optimizations, but the + // trunc instruction will stay in the loop. + Value *IV = TI->getOperand(0); + Type *IVTy = IV->getType(); + const SCEV *IVSCEV = SE->getSCEV(IV); + const SCEV *TISCEV = SE->getSCEV(TI); + + // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can + // get rid of trunc + bool DoesSExtCollapse = false; + bool DoesZExtCollapse = false; + if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy)) + DoesSExtCollapse = true; + if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy)) + DoesZExtCollapse = true; + + // If neither sext nor zext does collapse, it is not profitable to do any + // transform. Bail. + if (!DoesSExtCollapse && !DoesZExtCollapse) + return false; + + // Collect users of the trunc that look like comparisons against invariants. + // Bail if we find something different. + SmallVector ICmpUsers; + for (auto *U : TI->users()) { + if (ICmpInst *ICI = dyn_cast(U)) { + if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) { + assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); + // If we cannot get rid of trunc, bail. + if (ICI->isSigned() && !DoesSExtCollapse) + return false; + if (ICI->isUnsigned() && !DoesZExtCollapse) + return false; + // For equality, either signed or unsigned works. + ICmpUsers.push_back(ICI); + } else + return false; + } else + return false; + } + + // Replace all comparisons against trunc with comparisons against IV. + for (auto *ICI : ICmpUsers) { + auto *Op1 = ICI->getOperand(1); + Instruction *Ext = nullptr; + // For signed/unsigned predicate, replace the old comparison with comparison + // of immediate IV against sext/zext of the invariant argument. If we can + // use either sext or zext (i.e. we are dealing with equality predicate), + // then prefer zext as a more canonical form. + // TODO: If we see a signed comparison which can be turned into unsigned, + // we can do it here for canonicalization purposes. + if (ICI->isUnsigned() || (ICI->isEquality() && DoesZExtCollapse)) { + assert(DoesZExtCollapse && "Unprofitable zext?"); + Ext = new ZExtInst(Op1, IVTy, "zext", ICI); + } else { + assert(DoesSExtCollapse && "Unprofitable sext?"); + Ext = new SExtInst(Op1, IVTy, "sext", ICI); + } + bool Changed; + L->makeLoopInvariant(Ext, Changed); + (void)Changed; + ICmpInst *NewICI = new ICmpInst(ICI, ICI->getPredicate(), IV, Ext); + ICI->replaceAllUsesWith(NewICI); + DeadInsts.emplace_back(ICI); + } + + // Trunc no longer needed. + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + DeadInsts.emplace_back(TI); + return true; +} + /// Eliminate an operation that consumes a simple IV and has no observable /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, /// but UseInst may not be. @@ -518,6 +606,10 @@ if (eliminateOverflowIntrinsic(CI)) return true; + if (auto *TI = dyn_cast(UseInst)) + if (eliminateTrunc(TI)) + return true; + if (eliminateIdentitySCEV(UseInst, IVOperand)) return true; Index: llvm/trunk/test/Transforms/IndVarSimplify/ada-loops.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/ada-loops.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/ada-loops.ll @@ -10,12 +10,17 @@ ; don't check that phis are "folded together" because that is a job ; for loop strength reduction. But indvars must remove sext, zext, and add i8. ; -; CHECK-NOT: {{sext|zext|add i8}} ; ModuleID = 'ada.bc' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32" target triple = "i686-pc-linux-gnu" +; CHECK-LABEL: @kinds__sbytezero +; CHECK: bb.thread: +; CHECK: sext +; CHECK: bb: +; CHECK-NOT: {{sext i8|zext i8|add i8|trunc}} + define void @kinds__sbytezero([256 x i32]* nocapture %a) nounwind { bb.thread: %tmp46 = getelementptr [256 x i32], [256 x i32]* %a, i32 0, i32 0 ; [#uses=1] @@ -36,6 +41,8 @@ ret void } +; CHECK-LABEL: @kinds__ubytezero + define void @kinds__ubytezero([256 x i32]* nocapture %a) nounwind { bb.thread: %tmp35 = getelementptr [256 x i32], [256 x i32]* %a, i32 0, i32 0 ; [#uses=1] Index: llvm/trunk/test/Transforms/IndVarSimplify/eliminate-trunc.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/eliminate-trunc.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/eliminate-trunc.ll @@ -0,0 +1,486 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -indvars -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; General case: without extra knowledge, trunc cannot be eliminated. +define void @test_00(i64 %start, i32 %n) { +; +; CHECK-LABEL: @test_00( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ %start, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + + +define void @test_01(i32 %n) { +; +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Max value at which we can eliminate trunc: SINT_MAX - 1. +define void @test_02(i32 %n) { +; +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 2147483646, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; If we start from SINT_MAX then the predicate is always false. +define void @test_03(i32 %n) { +; +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [2147483647, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Minimum value at which we can apply the transform: SINT_MIN + 1. +define void @test_04(i32 %n) { +; +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483647, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ -2147483647, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; FIXME: Harmful LFTR should be thrown away. +define void @test_05(i32 %n) { +; +; CHECK-LABEL: @test_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483648, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ -2147483648, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Trunc changes the actual value of the IV, so it is invalid to remove it: SINT_MIN - 1. +define void @test_06(i32 %n) { +; +; CHECK-LABEL: @test_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483649, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ -2147483649, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; General case: without extra knowledge, trunc cannot be eliminated. +define void @test_00_unsigned(i64 %start, i32 %n) { +; CHECK-LABEL: @test_00_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ %start, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; FIXME: Harmful LFTR should be thrown away. +define void @test_01_unsigned(i32 %n) { +; CHECK-LABEL: @test_01_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Max value at which we can eliminate trunc: UINT_MAX - 1. +define void @test_02_unsigned(i32 %n) { +; CHECK-LABEL: @test_02_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 4294967294, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 4294967294, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; If we start from UINT_MAX then the predicate is always false. +define void @test_03_unsigned(i32 %n) { +; CHECK-LABEL: @test_03_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 4294967295, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Minimum value at which we can apply the transform: UINT_MIN. +define void @test_04_unsigned(i32 %n) { +; CHECK-LABEL: @test_04_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Start from 1. +define void @test_05_unsigned(i32 %n) { +; CHECK-LABEL: @test_05_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Trunc changes the actual value of the IV, so it is invalid to remove it: UINT_MIN - 1. +define void @test_06_unsigned(i32 %n) { +; CHECK-LABEL: @test_06_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ -1, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Do not eliminate trunc if it is used by something different from icmp. +define void @test_07(i32* %p, i32 %n) { +; CHECK-LABEL: @test_07( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: store i32 [[NARROW_IV]], i32* [[P:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + store i32 %narrow.iv, i32* %p + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Check that we can eliminate both signed and unsigned compare. +define void @test_08(i32 %n) { +; CHECK-LABEL: @test_08( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp1 = icmp slt i32 %narrow.iv, %n + %cmp2 = icmp ult i32 %narrow.iv, %n + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Widen NE as unsigned. +define void @test_09(i32 %n) { +; CHECK-LABEL: @test_09( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ne i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Widen NE as signed. +define void @test_10(i32 %n) { +; CHECK-LABEL: @test_10( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[IV]], [[SEXT]] +; CHECK-NEXT: [[NEGCMP:%.*]] = icmp slt i64 [[IV]], -10 +; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[NEGCMP]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [ -100, %entry ], [ %iv.next, %loop ] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %trunccmp = icmp ne i32 %narrow.iv, %n + %negcmp = icmp slt i64 %iv, -10 + %cmp = and i1 %trunccmp, %negcmp + br i1 %cmp, label %loop, label %exit +exit: + ret void +} Index: llvm/trunk/test/Transforms/IndVarSimplify/widen-loop-comp.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -332,12 +332,12 @@ br label %loop loop: +; CHECK: [[WIDE_V:%[a-z0-9]+]] = sext i32 %v to i64 ; CHECK: loop: ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ] ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; CHECK: [[MUL:%[a-z0-9]+]] = mul nsw i64 %indvars.iv, -1 -; CHECK: [[MUL_TRUNC:%[a-z0-9]+]] = trunc i64 [[MUL]] to i32 -; CHECK: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[MUL_TRUNC]], %v +; CHECK: [[CMP:%[a-z0-9]+]] = icmp eq i64 [[MUL]], [[WIDE_V]] ; CHECK: call void @consume.i1(i1 [[CMP]]) %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]