Index: llvm/lib/Transforms/Scalar/GuardWidening.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -113,6 +113,23 @@ ++GuardsEliminated; } +/// Find a point at which the widened condition of \p Guard should be inserted. +/// When it is represented as intrinsic call, we can do it right before the call +/// instruction. However, when we are dealing with widenable branch, we must +/// account for the following situation: widening should not turn a +/// loop-invariant condition into a loop-variant. It means that if +/// widenable.condition() call is invariant (w.r.t. any loop), the new wide +/// condition should stay invariant. Otherwise there can be a miscompile, like +/// the one described at https://github.com/llvm/llvm-project/issues/60234. The +/// safest way to do it is to expand the new condition at WC's block. +static Instruction *findInsertionPointForWideCondition(Instruction *Guard) { + Value *Condition, *WC; + BasicBlock *IfTrue, *IfFalse; + if (parseWidenableBranch(Guard, Condition, WC, IfTrue, IfFalse)) + return cast(WC); + return Guard; +} + class GuardWideningImpl { DominatorTree &DT; PostDominatorTree *PDT; @@ -263,8 +280,8 @@ void widenGuard(Instruction *ToWiden, Value *NewCondition, bool InvertCondition) { Value *Result; - - widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result, + Instruction *InsertPt = findInsertionPointForWideCondition(ToWiden); + widenCondCommon(getCondition(ToWiden), NewCondition, InsertPt, Result, InvertCondition); if (isGuardAsWidenableBranch(ToWiden)) { setWidenableBranchCond(cast(ToWiden), Result); @@ -422,7 +439,8 @@ HoistingOutOfLoop = true; } - if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard)) + auto *WideningPoint = findInsertionPointForWideCondition(DominatingGuard); + if (!isAvailableAt(getCondition(DominatedInstr), WideningPoint)) return WS_IllegalOrNegative; // If the guard was conditional executed, it may never be reached Index: llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll =================================================================== --- llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll +++ llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll @@ -7,8 +7,8 @@ define void @f_0(i1 %cond_0, i1 %cond_1) { ; CHECK-LABEL: @f_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: deopt: @@ -50,8 +50,8 @@ define void @f_1(i1 %cond_0, i1 %cond_1) { ; CHECK-LABEL: @f_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -110,9 +110,9 @@ ; CHECK-LABEL: @f_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 10 -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[COND_1:%.*]] = icmp ult i32 [[B:%.*]], 10 ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -231,9 +231,9 @@ ; CHECK-LABEL: @f_4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 10 -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[COND_1:%.*]] = icmp ult i32 [[B:%.*]], 10 ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0]], [[COND_1]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -289,8 +289,8 @@ ; CHECK-LABEL: @f_5( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND_0:%.*]] = icmp ugt i32 [[A:%.*]], 7 -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WIDE_CHK:%.*]] = icmp uge i32 [[A]], 11 +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -397,9 +397,9 @@ ; CHECK-LABEL: @f_7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND_1:%.*]] = load volatile i1, ptr [[COND_BUF:%.*]], align 1 -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[COND_3:%.*]] = icmp ult i32 [[A:%.*]], 7 ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_1]], [[COND_3]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -484,9 +484,9 @@ ; CHECK: guarded: ; CHECK-NEXT: br i1 undef, label [[LOOP]], label [[LEAVE:%.*]] ; CHECK: leave: -; CHECK-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[COND_3:%.*]] = icmp ult i32 [[A:%.*]], 7 ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_2:%.*]], [[COND_3]] +; CHECK-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND3]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND4]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: @@ -663,8 +663,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] ; CHECK: outer_header: -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -725,7 +725,6 @@ define void @f_12(i32 %a0) { ; CHECK-LABEL: @f_12( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[A1:%.*]] = mul i32 [[A0:%.*]], [[A0]] ; CHECK-NEXT: [[A2:%.*]] = mul i32 [[A1]], [[A1]] ; CHECK-NEXT: [[A3:%.*]] = mul i32 [[A2]], [[A2]] @@ -758,6 +757,7 @@ ; CHECK-NEXT: [[A30:%.*]] = mul i32 [[A29]], [[A29]] ; CHECK-NEXT: [[COND:%.*]] = trunc i32 [[A30]] to i1 ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[COND]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -830,8 +830,8 @@ ; CHECK-LABEL: @f_13( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND_0:%.*]] = icmp ult i32 [[A:%.*]], 14 -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WIDE_CHK:%.*]] = icmp ult i32 [[A]], 10 +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -1025,8 +1025,8 @@ define void @swapped_wb(i1 %cond_0, i1 %cond_1) { ; CHECK-LABEL: @swapped_wb( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDENABLE_COND]], [[WIDE_CHK]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: @@ -1067,8 +1067,8 @@ define void @trivial_wb(i1 %cond_0) { ; CHECK-LABEL: @trivial_wb( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[COND_0:%.*]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[TMP0]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: Index: llvm/test/Transforms/GuardWidening/loop_invariant_widenable_condition.ll =================================================================== --- llvm/test/Transforms/GuardWidening/loop_invariant_widenable_condition.ll +++ llvm/test/Transforms/GuardWidening/loop_invariant_widenable_condition.ll @@ -3,17 +3,17 @@ declare i32 @llvm.experimental.deoptimize.i32(...) -; FIXME: Make sure the two loop-invariant conditions can be widened together, -; and the widening point is outside the loop. +; Make sure the two loop-invariant conditions can be widened together, +; and the widening point is outside the loop. define i32 @test_01(i32 %start, i32 %x) { ; CHECK-LABEL: @test_01( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[START:%.*]], [[X:%.*]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[COND]] ; CHECK-NEXT: [[WC1:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[COND]] ; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[WIDE_CHK]], [[WC1]] ; CHECK-NEXT: br i1 [[TMP0]], label [[GUARD_BLOCK:%.*]], label [[EXIT_BY_WC:%.*]] ; CHECK: exit_by_wc: @@ -65,7 +65,7 @@ } -; FIXME: Make sure the loop-variant condition is not widened into loop-invariant. +; Make sure the loop-variant condition is not widened into loop-invariant. define i32 @test_02(i32 %start, i32 %x) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: @@ -73,17 +73,15 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], [[X:%.*]] -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[COND]] -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[WIDE_CHK]], [[WC1]] -; CHECK-NEXT: br i1 [[TMP0]], label [[GUARD_BLOCK:%.*]], label [[EXIT_BY_WC:%.*]] +; CHECK-NEXT: br i1 [[WC1]], label [[GUARD_BLOCK:%.*]], label [[EXIT_BY_WC:%.*]] ; CHECK: exit_by_wc: ; CHECK-NEXT: [[RVAL1:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[IV]]) ] ; CHECK-NEXT: ret i32 [[RVAL1]] ; CHECK: guard_block: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], [[X:%.*]] ; CHECK-NEXT: [[WC2:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[GUARD:%.*]] = and i1 [[COND]], [[WC2]] -; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAILURE:%.*]] +; CHECK-NEXT: br i1 [[GUARD]], label [[BACKEDGE]], label [[FAILURE:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @side_effect() ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 @@ -125,16 +123,16 @@ ret i32 %rval2 } -; FIXME: Same as test_01, but the initial condition is not immediately WC. +; Same as test_01, but the initial condition is not immediately WC. define i32 @test_03(i32 %start, i32 %x, i1 %c) { ; CHECK-LABEL: @test_03( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[START:%.*]], [[X:%.*]] +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C:%.*]], [[COND]] ; CHECK-NEXT: [[WC1:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C:%.*]], [[COND]] ; CHECK-NEXT: [[INVARIANT:%.*]] = and i1 [[WIDE_CHK]], [[WC1]] ; CHECK-NEXT: br i1 [[INVARIANT]], label [[GUARD_BLOCK:%.*]], label [[EXIT_BY_WC:%.*]] ; CHECK: exit_by_wc: Index: llvm/test/Transforms/GuardWidening/mixed_guards.ll =================================================================== --- llvm/test/Transforms/GuardWidening/mixed_guards.ll +++ llvm/test/Transforms/GuardWidening/mixed_guards.ll @@ -44,8 +44,8 @@ define void @test_02(i1 %cond_0, i1 %cond_1) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] ; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: Index: llvm/test/Transforms/GuardWidening/pr60234.ll =================================================================== --- llvm/test/Transforms/GuardWidening/pr60234.ll +++ llvm/test/Transforms/GuardWidening/pr60234.ll @@ -3,10 +3,10 @@ declare i32 @llvm.experimental.deoptimize.i32(...) -; FIXME: Make sure that guard widening does not turn loop-invariant condition -; (that then gets optimized basing on this fact) into non-invariant. -; https://github.com/llvm/llvm-project/issues/60234 explains how it causes -; a miscompile. +; Make sure that guard widening does not turn loop-invariant condition +; (that then gets optimized basing on this fact) into non-invariant. +; https://github.com/llvm/llvm-project/issues/60234 explains how it causes +; a miscompile. define i32 @test(i32 %start) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: @@ -14,19 +14,17 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[START_PLUS_1:%.*]] = add i32 [[START]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[START_PLUS_1]], [[IV]] -; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[COND]] -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[WIDE_CHK]], [[WC1]] -; CHECK-NEXT: br i1 [[TMP0]], label [[GUARD_BLOCK:%.*]], label [[EXIT_BY_WC:%.*]] +; CHECK-NEXT: br i1 [[WC1]], label [[GUARD_BLOCK:%.*]], label [[EXIT_BY_WC:%.*]] ; CHECK: exit_by_wc: ; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[START]], [[LOOP]] ] ; CHECK-NEXT: [[RVAL1:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[IV_LCSSA]]) ] ; CHECK-NEXT: ret i32 [[RVAL1]] ; CHECK: guard_block: +; CHECK-NEXT: [[START_PLUS_1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[START_PLUS_1]], [[IV]] ; CHECK-NEXT: [[WC2:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[GUARD:%.*]] = and i1 [[COND]], [[WC2]] -; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAILURE:%.*]] +; CHECK-NEXT: br i1 [[GUARD]], label [[BACKEDGE]], label [[FAILURE:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @side_effect() ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1