Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -128,6 +128,9 @@ static cl::opt GVNEnableSimpleGVNHoist("enable-simple-gvn-hoist", cl::init(true)); +static cl::opt MaxBlockDepth( + "simple-gvn-hoist-max-block-depth", cl::Hidden, cl::init(100), + cl::desc("Limit hoist candidates to the first N instructions in a block")); struct llvm::GVN::Expression { uint32_t opcode; @@ -3120,8 +3123,11 @@ } void GVN::collectHoistCandidates(BasicBlock *BB) { + uint32_t Depth = 0; bool HasHoistBarrier = false; for (Instruction &I : *BB) { + if (++Depth > MaxBlockDepth) + break; if (I.isTerminator()) break; if (isa(I)) @@ -3137,8 +3143,11 @@ } void GVN::matchHoistCandidates(BasicBlock *BB) { + uint32_t Depth = 0; bool HasHoistBarrier = false; for (Instruction &I : *BB) { + if (++Depth > MaxBlockDepth) + break; if (I.isTerminator()) break; if (isa(I)) @@ -3317,6 +3326,36 @@ return {true, false}; } +// Determine if an instruction should be used to initiate hoisting a +// dependency chain. The aim is to avoid separating instructions, for which it's +// (heuristically) considered better to keep them together, as it's common that +// they can be fused in some way. An instruction, which is denied hoisting by +// this function can still be hoisted if it appears as a dependency (e.g +// operand) of another hoisted instruction. +static bool shouldNotInitiateHoisting(const Instruction *I) { + // Don't separate GEP's form their loads/stores. + if (isa(I)) + return true; + const bool IsBinop = isa(I); + for (const User *U : I->users()) { + // Don't separate conditions from `br` or `select`. + if ((isa(U) || isa(U)) && U->getOperand(0) == I) + return true; + // Don't separate a value from converting that value to a boolean by + // comparing it to zero. + if (!IsBinop) + continue; + const auto *ICmp = dyn_cast(U); + if (ICmp == nullptr || (ICmp->getPredicate() != CmpInst::ICMP_EQ && + ICmp->getPredicate() != CmpInst::ICMP_NE)) + continue; + const auto *Zero = dyn_cast(ICmp->getOperand(1)); + if (Zero != nullptr && Zero->isZero()) + return true; + } + return false; +} + // Perform trivial hoisting of values from two blocks to their common // predecessor. bool GVN::performHoist(Function &F) { @@ -3351,8 +3390,10 @@ // Hoist matched pairs. for (const auto &P : HoistPairs) { bool LocalChange, _; - std::tie(LocalChange, _) = - hoistPair(BB, Then, Else, P.second.first, P.second.second); + Instruction *ThenI = P.second.first, *ElseI = P.second.second; + if (shouldNotInitiateHoisting(ThenI)) + continue; + std::tie(LocalChange, _) = hoistPair(BB, Then, Else, ThenI, ElseI); Change |= LocalChange; } } Index: llvm/test/Transforms/GVN/simple-gvn-hoist-limits.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/simple-gvn-hoist-limits.ll @@ -0,0 +1,152 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --passes=gvn -simple-gvn-hoist-max-block-depth=3 %s -o - | FileCheck %s --check-prefix=MAX-DEPTH3 +; RUN: opt -S --passes=gvn -simple-gvn-hoist-max-block-depth=4 %s -o - | FileCheck %s --check-prefix=MAX-DEPTH4 +; RUN: opt -S --passes=gvn -simple-gvn-hoist-max-block-depth=5 %s -o - | FileCheck %s --check-prefix=MAX-DEPTH5 + +; Check effect of limiting the how deep in a block we look for +; instructions to hoist; At max depth 3 we shouldn't hoist +; anything to the `entry` block, at max depth 4 we should hoist +; the first `add` and at max depth 5 the second `add`. + +; In any case we shouldn't hoist the `and` and the `icmp`, in order +; to not separate them from the `br` + +define i32 @f(i1 %c, i32 %a, i32 %b, i32 %d) { +; MAX-DEPTH3-LABEL: @f( +; MAX-DEPTH3-NEXT: entry: +; MAX-DEPTH3-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; MAX-DEPTH3: if.then: +; MAX-DEPTH3-NEXT: [[R0:%.*]] = add i32 [[B:%.*]], 1 +; MAX-DEPTH3-NEXT: [[AND0:%.*]] = and i32 [[A:%.*]], 1 +; MAX-DEPTH3-NEXT: [[TOBOOL_AND0:%.*]] = icmp eq i32 [[AND0]], 0 +; MAX-DEPTH3-NEXT: [[D0:%.*]] = add i32 [[D:%.*]], 1 +; MAX-DEPTH3-NEXT: [[S0:%.*]] = add i32 [[D0]], [[B]] +; MAX-DEPTH3-NEXT: br i1 [[TOBOOL_AND0]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]] +; MAX-DEPTH3: if.then1: +; MAX-DEPTH3-NEXT: [[OR0:%.*]] = or i32 [[R0]], 1 +; MAX-DEPTH3-NEXT: br label [[EXIT:%.*]] +; MAX-DEPTH3: if.else1: +; MAX-DEPTH3-NEXT: [[OR1:%.*]] = or i32 [[R0]], 2 +; MAX-DEPTH3-NEXT: br label [[EXIT]] +; MAX-DEPTH3: if.else: +; MAX-DEPTH3-NEXT: [[R1:%.*]] = add i32 [[B]], 2 +; MAX-DEPTH3-NEXT: [[AND1:%.*]] = and i32 [[A]], 1 +; MAX-DEPTH3-NEXT: [[TOBOOL_AND1:%.*]] = icmp eq i32 [[AND1]], 0 +; MAX-DEPTH3-NEXT: [[D1:%.*]] = add i32 [[D]], 1 +; MAX-DEPTH3-NEXT: [[S1:%.*]] = add i32 [[D1]], [[B]] +; MAX-DEPTH3-NEXT: br i1 [[TOBOOL_AND1]], label [[IF_THEN2:%.*]], label [[IF_ELSE2:%.*]] +; MAX-DEPTH3: if.then2: +; MAX-DEPTH3-NEXT: [[OR2:%.*]] = or i32 [[R1]], 4 +; MAX-DEPTH3-NEXT: br label [[EXIT]] +; MAX-DEPTH3: if.else2: +; MAX-DEPTH3-NEXT: [[OR3:%.*]] = or i32 [[R1]], 8 +; MAX-DEPTH3-NEXT: br label [[EXIT]] +; MAX-DEPTH3: exit: +; MAX-DEPTH3-NEXT: [[OR:%.*]] = phi i32 [ [[OR0]], [[IF_THEN1]] ], [ [[OR1]], [[IF_ELSE1]] ], [ [[OR2]], [[IF_THEN2]] ], [ [[OR3]], [[IF_ELSE2]] ] +; MAX-DEPTH3-NEXT: [[S:%.*]] = phi i32 [ [[S0]], [[IF_THEN1]] ], [ [[S0]], [[IF_ELSE1]] ], [ [[S1]], [[IF_THEN2]] ], [ [[S1]], [[IF_ELSE2]] ] +; MAX-DEPTH3-NEXT: [[R:%.*]] = add i32 [[OR]], [[S]] +; MAX-DEPTH3-NEXT: ret i32 [[R]] +; +; MAX-DEPTH4-LABEL: @f( +; MAX-DEPTH4-NEXT: entry: +; MAX-DEPTH4-NEXT: [[D0:%.*]] = add i32 [[D:%.*]], 1 +; MAX-DEPTH4-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; MAX-DEPTH4: if.then: +; MAX-DEPTH4-NEXT: [[R0:%.*]] = add i32 [[B:%.*]], 1 +; MAX-DEPTH4-NEXT: [[AND0:%.*]] = and i32 [[A:%.*]], 1 +; MAX-DEPTH4-NEXT: [[TOBOOL_AND0:%.*]] = icmp eq i32 [[AND0]], 0 +; MAX-DEPTH4-NEXT: [[S0:%.*]] = add i32 [[D0]], [[B]] +; MAX-DEPTH4-NEXT: br i1 [[TOBOOL_AND0]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]] +; MAX-DEPTH4: if.then1: +; MAX-DEPTH4-NEXT: [[OR0:%.*]] = or i32 [[R0]], 1 +; MAX-DEPTH4-NEXT: br label [[EXIT:%.*]] +; MAX-DEPTH4: if.else1: +; MAX-DEPTH4-NEXT: [[OR1:%.*]] = or i32 [[R0]], 2 +; MAX-DEPTH4-NEXT: br label [[EXIT]] +; MAX-DEPTH4: if.else: +; MAX-DEPTH4-NEXT: [[R1:%.*]] = add i32 [[B]], 2 +; MAX-DEPTH4-NEXT: [[AND1:%.*]] = and i32 [[A]], 1 +; MAX-DEPTH4-NEXT: [[TOBOOL_AND1:%.*]] = icmp eq i32 [[AND1]], 0 +; MAX-DEPTH4-NEXT: [[S1:%.*]] = add i32 [[D0]], [[B]] +; MAX-DEPTH4-NEXT: br i1 [[TOBOOL_AND1]], label [[IF_THEN2:%.*]], label [[IF_ELSE2:%.*]] +; MAX-DEPTH4: if.then2: +; MAX-DEPTH4-NEXT: [[OR2:%.*]] = or i32 [[R1]], 4 +; MAX-DEPTH4-NEXT: br label [[EXIT]] +; MAX-DEPTH4: if.else2: +; MAX-DEPTH4-NEXT: [[OR3:%.*]] = or i32 [[R1]], 8 +; MAX-DEPTH4-NEXT: br label [[EXIT]] +; MAX-DEPTH4: exit: +; MAX-DEPTH4-NEXT: [[OR:%.*]] = phi i32 [ [[OR0]], [[IF_THEN1]] ], [ [[OR1]], [[IF_ELSE1]] ], [ [[OR2]], [[IF_THEN2]] ], [ [[OR3]], [[IF_ELSE2]] ] +; MAX-DEPTH4-NEXT: [[S:%.*]] = phi i32 [ [[S0]], [[IF_THEN1]] ], [ [[S0]], [[IF_ELSE1]] ], [ [[S1]], [[IF_THEN2]] ], [ [[S1]], [[IF_ELSE2]] ] +; MAX-DEPTH4-NEXT: [[R:%.*]] = add i32 [[OR]], [[S]] +; MAX-DEPTH4-NEXT: ret i32 [[R]] +; +; MAX-DEPTH5-LABEL: @f( +; MAX-DEPTH5-NEXT: entry: +; MAX-DEPTH5-NEXT: [[D0:%.*]] = add i32 [[D:%.*]], 1 +; MAX-DEPTH5-NEXT: [[S0:%.*]] = add i32 [[D0]], [[B:%.*]] +; MAX-DEPTH5-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; MAX-DEPTH5: if.then: +; MAX-DEPTH5-NEXT: [[R0:%.*]] = add i32 [[B]], 1 +; MAX-DEPTH5-NEXT: [[AND0:%.*]] = and i32 [[A:%.*]], 1 +; MAX-DEPTH5-NEXT: [[TOBOOL_AND0:%.*]] = icmp eq i32 [[AND0]], 0 +; MAX-DEPTH5-NEXT: br i1 [[TOBOOL_AND0]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]] +; MAX-DEPTH5: if.then1: +; MAX-DEPTH5-NEXT: [[OR0:%.*]] = or i32 [[R0]], 1 +; MAX-DEPTH5-NEXT: br label [[EXIT:%.*]] +; MAX-DEPTH5: if.else1: +; MAX-DEPTH5-NEXT: [[OR1:%.*]] = or i32 [[R0]], 2 +; MAX-DEPTH5-NEXT: br label [[EXIT]] +; MAX-DEPTH5: if.else: +; MAX-DEPTH5-NEXT: [[R1:%.*]] = add i32 [[B]], 2 +; MAX-DEPTH5-NEXT: [[AND1:%.*]] = and i32 [[A]], 1 +; MAX-DEPTH5-NEXT: [[TOBOOL_AND1:%.*]] = icmp eq i32 [[AND1]], 0 +; MAX-DEPTH5-NEXT: br i1 [[TOBOOL_AND1]], label [[IF_THEN2:%.*]], label [[IF_ELSE2:%.*]] +; MAX-DEPTH5: if.then2: +; MAX-DEPTH5-NEXT: [[OR2:%.*]] = or i32 [[R1]], 4 +; MAX-DEPTH5-NEXT: br label [[EXIT]] +; MAX-DEPTH5: if.else2: +; MAX-DEPTH5-NEXT: [[OR3:%.*]] = or i32 [[R1]], 8 +; MAX-DEPTH5-NEXT: br label [[EXIT]] +; MAX-DEPTH5: exit: +; MAX-DEPTH5-NEXT: [[OR:%.*]] = phi i32 [ [[OR0]], [[IF_THEN1]] ], [ [[OR1]], [[IF_ELSE1]] ], [ [[OR2]], [[IF_THEN2]] ], [ [[OR3]], [[IF_ELSE2]] ] +; MAX-DEPTH5-NEXT: [[S:%.*]] = phi i32 [ [[S0]], [[IF_THEN1]] ], [ [[S0]], [[IF_ELSE1]] ], [ [[S0]], [[IF_THEN2]] ], [ [[S0]], [[IF_ELSE2]] ] +; MAX-DEPTH5-NEXT: [[R:%.*]] = add i32 [[OR]], [[S]] +; MAX-DEPTH5-NEXT: ret i32 [[R]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %r0 = add i32 %b, 1 + %and0 = and i32 %a, 1 + %tobool.and0 = icmp eq i32 %and0, 0 + %d0 = add i32 %d, 1 + %s0 = add i32 %d0, %b + br i1 %tobool.and0, label %if.then1, label %if.else1 +if.then1: + %or0 = or i32 %r0, 1 + br label %exit +if.else1: + %or1 = or i32 %r0, 2 + br label %exit + +if.else: + %r1 = add i32 %b, 2 + %and1 = and i32 %a, 1 + %tobool.and1 = icmp eq i32 %and1, 0 + %d1 = add i32 %d, 1 + %s1 = add i32 %d1, %b + br i1 %tobool.and1, label %if.then2, label %if.else2 +if.then2: + %or2 = or i32 %r1, 4 + br label %exit +if.else2: + %or3 = or i32 %r1, 8 + br label %exit +exit: + %or = phi i32 [%or0, %if.then1], [%or1, %if.else1], [%or2, %if.then2], [%or3, %if.else2] + %s = phi i32 [%s0, %if.then1], [%s0, %if.else1], [%s1, %if.then2], [%s1, %if.else2] + %r = add i32 %or, %s + ret i32 %r +} Index: llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -163,16 +163,16 @@ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !8 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !alias.scope !11 -; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP5]] to <4 x float>* +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !13, !noalias !15 -; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP8]], [[WIDE_LOAD15]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP8]], <4 x float> [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP5]] to <4 x float>* +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP7]], <4 x float> [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP8]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[PREDPHI]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !15 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 @@ -182,18 +182,18 @@ ; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[IV1]] ; CHECK-NEXT: [[C_LV:%.*]] = load i32, i32* [[C_GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C_LV]], 20 -; CHECK-NEXT: [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV1]] ; CHECK-NEXT: [[A_GEP_0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV1]] ; CHECK-NEXT: [[A_LV_0:%.*]] = load float, float* [[A_GEP_0]], align 4 ; CHECK-NEXT: [[MUL2_I81_I:%.*]] = fmul float [[A_LV_0]], [[X]] +; CHECK-NEXT: [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV1]] ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[ELSE:%.*]] ; CHECK: else: ; CHECK-NEXT: [[B_LV:%.*]] = load float, float* [[B_GEP_0]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[MUL2_I81_I]], [[B_LV]] ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ] -; CHECK-NEXT: store float [[STOREMERGE]], float* [[B_GEP_0]], align 4 +; CHECK-NEXT: [[ADD_SINK:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ] +; CHECK-NEXT: store float [[ADD_SINK]], float* [[B_GEP_0]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i64 [[IV1]], 9999 ; CHECK-NEXT: br i1 [[CMP_0]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP17:![0-9]+]]