Index: lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- lib/Transforms/Utils/SimplifyIndVar.cpp +++ lib/Transforms/Utils/SimplifyIndVar.cpp @@ -80,6 +80,7 @@ bool replaceIVUserWithLoopInvariant(Instruction *UseInst); bool eliminateOverflowIntrinsic(CallInst *CI); + bool eliminateTrunc(TruncInst *TI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); @@ -493,6 +494,65 @@ return true; } +bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { + // Can we prove that truncate does not change the actual range? + Value *IV = TI->getOperand(0); + Type *IVTy = IV->getType(); + + bool HasUnsignedComparisonUser = false; + bool HasSignedComparisonUser = false; + // Collect users of the trunc that look like comparisons against invariants. + // Bail if we find something different. + SmallVector ICmpUsers; + for (auto *U : TI->users()) { + if (ICmpInst *ICI = dyn_cast(U)) { + if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) { + assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); + ICmpUsers.push_back(ICI); + if (ICmpInst::isSigned(ICI->getPredicate())) + HasSignedComparisonUser = true; + else + HasUnsignedComparisonUser = true; + } else + return false; + } else + return false; + } + + // We are going to get rid of trunc. If it has uses then we need to check that + // the trunc doesn't change the actual value as it is used, meaning that + // sext/zext(trunc(iv)) == iv. + const SCEV *IVSCEV = SE->getSCEV(IV); + if (HasSignedComparisonUser) { + const SCEV *SExtTruncSCEV = SE->getSignExtendExpr(SE->getSCEV(TI), IVTy); + if (IVSCEV != SExtTruncSCEV) + return false; + } + if (HasUnsignedComparisonUser) { + const SCEV *ZExtTruncSCEV = SE->getZeroExtendExpr(SE->getSCEV(TI), IVTy); + if (IVSCEV != ZExtTruncSCEV) + return false; + } + + + // Replace all compares against trunc with compares against IV. + BasicBlock *Predecessor = L->getLoopPredecessor(); + for (auto *ICI : ICmpUsers) { + auto *Op1 = ICI->getOperand(1); + Instruction *Ext = nullptr; + if (ICmpInst::isSigned(ICI->getPredicate())) + Ext = new SExtInst(Op1, IVTy, "sext", Predecessor->getTerminator()); + else + Ext = new ZExtInst(Op1, IVTy, "zext", Predecessor->getTerminator()); + ICmpInst *NewICI = new ICmpInst(ICI, ICI->getPredicate(), IV, Ext); + ICI->replaceAllUsesWith(NewICI); + ICI->eraseFromParent(); + } + // Trunc no longer needed. + TI->eraseFromParent(); + return true; +} + /// Eliminate an operation that consumes a simple IV and has no observable /// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, /// but UseInst may not be. @@ -517,6 +577,10 @@ if (eliminateOverflowIntrinsic(CI)) return true; + if (auto *TI = dyn_cast(UseInst)) + if (eliminateTrunc(TI)) + return true; + if (eliminateIdentitySCEV(UseInst, IVOperand)) return true; Index: test/Transforms/IndVarSimplify/eliminate-trunc.ll =================================================================== --- /dev/null +++ test/Transforms/IndVarSimplify/eliminate-trunc.ll @@ -0,0 +1,430 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -indvars -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; General case: without extra knowledge, trunc cannot be eliminated. +define void @test_00(i64 %start, i32 %n) { +; +; CHECK-LABEL: @test_00( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [%start, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + + +define void @test_01(i32 %n) { +; +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Max value at which we can eliminate trunc: SINT_MAX - 1. +define void @test_02(i32 %n) { +; +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [2147483646, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; If we start from SINT_MAX then the predicate is always false. +define void @test_03(i32 %n) { +; +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [2147483647, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Minimum value at which we can apply the transform: SINT_MIN + 1. +define void @test_04(i32 %n) { +; +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483647, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [-2147483647, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; FIXME: Harmful LFTR should be thrown away. +define void @test_05(i32 %n) { +; +; CHECK-LABEL: @test_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483648, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [-2147483648, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Trunc changes the actual value of the IV, so it is invalid to remove it: SINT_MIN - 1. +define void @test_06(i32 %n) { +; +; CHECK-LABEL: @test_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -2147483649, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [-2147483649, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; General case: without extra knowledge, trunc cannot be eliminated. +define void @test_00_unsigned(i64 %start, i32 %n) { +; CHECK-LABEL: @test_00_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [%start, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; FIXME: Harmful LFTR should be thrown away. +define void @test_01_unsigned(i32 %n) { +; CHECK-LABEL: @test_01_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Max value at which we can eliminate trunc: UINT_MAX - 1. +define void @test_02_unsigned(i32 %n) { +; CHECK-LABEL: @test_02_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 4294967294, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [4294967294, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; If we start from UINT_MAX then the predicate is always false. +define void @test_03_unsigned(i32 %n) { +; CHECK-LABEL: @test_03_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [4294967295, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Minimum value at which we can apply the transform: UINT_MIN. +define void @test_04_unsigned(i32 %n) { +; CHECK-LABEL: @test_04_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Start from 1. +define void @test_05_unsigned(i32 %n) { +; CHECK-LABEL: @test_05_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [1, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Trunc changes the actual value of the IV, so it is invalid to remove it: UINT_MIN - 1. +define void @test_06_unsigned(i32 %n) { +; CHECK-LABEL: @test_06_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [-1, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp = icmp ult i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Do not eliminate trunc if it is used by something different from icmp. +define void @test_07(i32* %p, i32 %n) { +; CHECK-LABEL: @test_07( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[NARROW_IV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: store i32 [[NARROW_IV]], i32* [[P:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NARROW_IV]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + store i32 %narrow.iv, i32* %p + %cmp = icmp slt i32 %narrow.iv, %n + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; Check that we can eliminate both signed and unsigned compare. +define void @test_08(i32 %n) { +; CHECK-LABEL: @test_08( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] +; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop +loop: + %iv = phi i64 [1, %entry], [%iv.next, %loop] + %iv.next = add i64 %iv, 1 + %narrow.iv = trunc i64 %iv to i32 + %cmp1 = icmp slt i32 %narrow.iv, %n + %cmp2 = icmp ult i32 %narrow.iv, %n + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %loop, label %exit +exit: + ret void +}