Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2027,6 +2027,12 @@ if (!PH) continue; if (PH->getNumIncomingValues() != 2) continue; + // If the calculation in integers overflows, the result in FP type will + // differ. So we only can do this transformation if we are guaranteed to not + // deal with overflowing values. + const SCEVAddRecExpr *PS = dyn_cast(SE.getSCEV(PH)); + if (!PS || !PS->hasNoSelfWrap()) continue; + Type *SrcTy = PH->getType(); int Mantissa = DestTy->getFPMantissaWidth(); if (Mantissa == -1) continue; Index: test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll =================================================================== --- test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll +++ test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll @@ -18,7 +18,7 @@ tail call void @bar( i32 %i.03 ) nounwind uitofp i32 %i.03 to double ; :1 [#uses=1] tail call void @foo( double %1 ) nounwind - %indvar.next = add i32 %i.03, 1 ; [#uses=2] + %indvar.next = add nuw i32 %i.03, 1 ; [#uses=2] %exitcond = icmp eq i32 %indvar.next, %umax ; [#uses=1] br i1 %exitcond, label %return, label %bb @@ -43,7 +43,7 @@ tail call void @bar( i32 %1 ) nounwind uitofp i64 %i.03 to double ; :2 [#uses=1] tail call void @foo( double %2 ) nounwind - %indvar.next = add i64 %i.03, 1 ; [#uses=2] + %indvar.next = add nuw i64 %i.03, 1 ; [#uses=2] %exitcond = icmp eq i64 %indvar.next, %umax ; [#uses=1] br i1 %exitcond, label %return, label %bb Index: test/Transforms/LoopStrengthReduce/dont_turn_int_overflow_to_fp.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/dont_turn_int_overflow_to_fp.ll @@ -0,0 +1,59 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +; This test checks that we don't try to replace integer calculations with +; double calculations if we cannot prove that integer calculations will not +; overflow. + +define i32 @test_01() { +; CHECK-LABEL: test_01( +; CHECK: outer_header: +; CHECK-NOT: phi double +; CHECK-NOT: phi float +entry: + br label %outer_header + +outer_header: ; preds = %outer_backedge, %entry + %local_3_ = phi i32 [ -4, %entry ], [ %tmp10, %outer_backedge ] + %local_4_ = phi i32 [ -3220, %entry ], [ %tmp5, %outer_backedge ] + %local_6_ = phi i32 [ 12, %entry ], [ %tmp6, %outer_backedge ] + %tmp1 = sitofp i32 %local_4_ to double + %tmp2 = fsub double 0.000000e+00, %tmp1 + %arg.v.i.peel = insertelement <2 x double> undef, double %tmp2, i32 0 + %res.i.peel = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %arg.v.i.peel) + %tmp3 = add i32 %local_4_, 1 + %tmp4 = add i32 %res.i.peel, %local_3_ + br label %inner_loop + +inner_loop: ; preds = %inner_loop, %outer_header + %local_3_11 = phi i32 [ %tmp4, %outer_header ], [ %tmp10, %inner_loop ] + %local_4_12 = phi i32 [ %tmp3, %outer_header ], [ %tmp12, %inner_loop ] + %local_7_ = phi i32 [ 2, %outer_header ], [ %tmp13, %inner_loop ] + %tmp8 = sitofp i32 %local_4_12 to double + %tmp9 = fsub double 0.000000e+00, %tmp8 + %arg.v.i = insertelement <2 x double> undef, double %tmp9, i32 0 + %res.i = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %arg.v.i) #1 + %tmp10 = add i32 %res.i, %local_3_11 + %tmp11 = mul i32 %local_7_, %local_7_ + %tmp12 = add i32 %tmp11, %local_4_12 + %tmp13 = add nuw nsw i32 %local_7_, 1 + %tmp14 = icmp ugt i32 %local_7_, 305 + br i1 %tmp14, label %outer_backedge, label %inner_loop + +outer_backedge: ; preds = %inner_loop + %tmp5 = add i32 %local_4_, 9597741 + %tmp6 = add nuw nsw i32 %local_6_, 1 + %tmp7 = icmp ugt i32 %local_6_, 235 + br i1 %tmp7, label %exit, label %outer_header + +exit: ; preds = %outer_backedge + ret i32 %tmp10 +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }