Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1984,8 +1984,35 @@ DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n"); } else { - CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), - "lftr.wideiv"); + // First we try to extend trip count if legal, if not + // we will truncate the IV + bool Extended = false; + const SCEV *IV = SE->getSCEV(CmpIndVar); + const SCEV *ZExtTrunc = + SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar), + ExitCnt->getType()), + CmpIndVar->getType()); + + if (ZExtTrunc == IV) { + Extended = true; + ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), + "wide.trip.count"); + } else { + const SCEV *SExtTrunc = + SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar), + ExitCnt->getType()), + CmpIndVar->getType()); + if (SExtTrunc == IV) { + Extended = true; + ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), + "wide.trip.count"); + } + } + + if (!Extended) { + CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), + "lftr.wideiv"); + } } } Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); Index: test/Transforms/IndVarSimplify/elim-extend.ll =================================================================== --- test/Transforms/IndVarSimplify/elim-extend.ll +++ test/Transforms/IndVarSimplify/elim-extend.ll @@ -41,6 +41,8 @@ br i1 %precond, label %loop, label %return ; CHECK: loop: ; CHECK-NOT: sext +; CHECK: wide.trip.count = sext +; CHECK-NOT: sext ; CHECK: exit: loop: %iv = phi i32 [ %postiv, %loop ], [ %init, %entry ] Index: test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll =================================================================== --- test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll +++ test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll @@ -0,0 +1,160 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +; Provide legal integer types. +target datalayout = "n8:16:32:64" + + +define void @test1(float* nocapture %autoc, + float* nocapture %data, + float %d, i32 %data_len, i32 %sample) nounwind { +entry: + %sub = sub i32 %data_len, %sample + %cmp4 = icmp eq i32 %data_len, %sample + br i1 %cmp4, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 68719476736, %entry ] + %0 = trunc i64 %indvars.iv to i32 + %add = add i32 %0, %sample + %idxprom = zext i32 %add to i64 + %arrayidx = getelementptr inbounds float, float* %data, i64 %idxprom + %1 = load float, float* %arrayidx, align 4 + %mul = fmul float %1, %d + %arrayidx2 = getelementptr inbounds float, float* %autoc, i64 %indvars.iv + %2 = load float, float* %arrayidx2, align 4 + %add3 = fadd float %2, %mul + store float %add3, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %3 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp ult i32 %3, %sub + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void + +; CHECK-LABEL: @test1( + +; With the given initial value for IV, it is not legal to widen +; trip count to IV size +; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 +; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32 +; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub +; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit +} + +define float @test2(float* noalias nocapture readonly %a, + float* noalias nocapture readonly %b, + i32 zeroext %m) local_unnamed_addr #0 { +entry: + %cmp5 = icmp ugt i32 %m, 500 + br i1 %cmp5, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %i.06 = phi i32 [ %inc, %for.body ], [ 500, %for.body.preheader ] + %idxprom = zext i32 %i.06 to i64 + %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom + %1 = load float, float* %arrayidx2, align 4 + %mul = fmul float %0, %1 + %add = fadd float %sum.07, %mul + %inc = add i32 %i.06, 1 + %cmp = icmp ult i32 %inc, %m + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.end.loopexit ] + ret float %sum.0.lcssa + +; CHECK-LABEL: @test2( +; Trip count should be widened and LFTR should canonicalize the condition +; CHECK %wide.trip.count = zext +; %exitcond = icmp ne +; br i1 %exitcond +} + +define float @test3(float* noalias nocapture readnone %a, + float* noalias nocapture readonly %b, + i32 signext %m) local_unnamed_addr #0 { +entry: + %cmp5 = icmp sgt i32 %m, -10 + br i1 %cmp5, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %sum.07 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %i.06 = phi i32 [ %inc, %for.body ], [ -10, %for.body.preheader ] + %add = add nsw i32 %i.06, 20 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = sitofp i32 %i.06 to float + %mul = fmul float %conv, %0 + %add1 = fadd float %sum.07, %mul + %inc = add nsw i32 %i.06, 1 + %cmp = icmp slt i32 %inc, %m + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1, %for.end.loopexit ] + ret float %sum.0.lcssa + +; CHECK-LABEL: @test3( +; Trip count should be widened and LFTR should canonicalize the condition +; CHECK %wide.trip.count = sext +; %exitcond = icmp ne +; br i1 %exitcond +} + +define float @test4(float* noalias nocapture readnone %a, + float* noalias nocapture readonly %b, + i32 signext %m) local_unnamed_addr #0 { +entry: + %cmp5 = icmp sgt i32 %m, 10 + br i1 %cmp5, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %sum.07 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %i.06 = phi i32 [ %inc, %for.body ], [ 10, %for.body.preheader ] + %add = add nsw i32 %i.06, 20 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = sitofp i32 %i.06 to float + %mul = fmul float %conv, %0 + %add1 = fadd float %sum.07, %mul + %inc = add nsw i32 %i.06, 1 + %cmp = icmp slt i32 %inc, %m + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + %add1.lcssa = phi float [ %add1, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1.lcssa, %for.end.loopexit ] + ret float %sum.0.lcssa + +; CHECK-LABEL: @test4( +; Trip count should be widened and LFTR should canonicalize the condition +; CHECK %wide.trip.count = zext +; %exitcond = icmp ne +; br i1 %exitcond +} + + Index: test/Transforms/IndVarSimplify/ult-sub-to-eq.ll =================================================================== --- test/Transforms/IndVarSimplify/ult-sub-to-eq.ll +++ test/Transforms/IndVarSimplify/ult-sub-to-eq.ll @@ -33,8 +33,9 @@ ; CHECK-LABEL: @test1( ; check that we turn the IV test into an eq. -; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub +; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 +; CHECK: %wide.trip.count = zext i32 %sub to i64 +; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit }