diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -253,7 +253,8 @@ bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); - bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI); + bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI, + bool EqTermsOnly); bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, const TargetTransformInfo &TTI); bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, @@ -1388,9 +1389,12 @@ /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function -/// guarantees that BI's block dominates BB1 and BB2. +/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, +/// only perform hoisting in case both blocks only contain a terminator. In that +/// case, only the original BI will be replaced and selects for PHIs are added. bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + bool EqTermsOnly) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1427,6 +1431,16 @@ ++NumHoistCommonCode; }); + // Check if only hoisting terminators is allowed. This does not add new + // instructions to the hoist location. + if (EqTermsOnly) { + if (!I1->isIdenticalToWhenDefined(I2)) + return false; + if (!I1->isTerminator()) + return false; + goto HoistTerminator; + } + do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. @@ -6443,9 +6457,9 @@ // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistCommon && Options.HoistCommonInsts) - if (HoistThenElseCodeToIf(BI, TTI)) - return requestResimplify(); + if (HoistCommon && + HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts)) + return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll @@ -41,28 +41,54 @@ define void @loop(double* %X, double* %Y) { ; CHECK-LABEL: @loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 20000 +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr double, double* [[Y:%.*]], i64 20000 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP9]], [[X]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8, !alias.scope !0 +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD11]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP8:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD11]], +; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x double> , <2 x double> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x double> , <2 x double> [[WIDE_LOAD11]] +; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> zeroinitializer, <2 x double> [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP15]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP16]], align 8, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000 +; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[CLAMP_EXIT:%.*]] ] +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_05]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt double [[TMP0]], 0.000000e+00 -; CHECK-NEXT: br i1 [[CMP_I]], label [[CLAMP_EXIT]], label [[IF_END_I:%.*]] -; CHECK: if.end.i: -; CHECK-NEXT: [[CMP1_I:%.*]] = fcmp ogt double [[TMP0]], 6.000000e+00 -; CHECK-NEXT: br i1 [[CMP1_I]], label [[CLAMP_EXIT]], label [[IF_END3_I:%.*]] -; CHECK: if.end3.i: -; CHECK-NEXT: br label [[CLAMP_EXIT]] -; CHECK: clamp.exit: -; CHECK-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[TMP0]], [[IF_END3_I]] ], [ 0.000000e+00, [[FOR_BODY]] ], [ 6.000000e+00, [[IF_END_I]] ] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP18:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt double [[TMP18]], 0.000000e+00 +; CHECK-NEXT: [[CMP1_I:%.*]] = fcmp ogt double [[TMP18]], 6.000000e+00 +; CHECK-NEXT: [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP18]] +; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[IDXPROM]] ; CHECK-NEXT: store double [[RETVAL_0_I]], double* [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_05]], 19999 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP7:![0-9]+]] ; entry: %X.addr = alloca double*, align 8 diff --git a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll --- a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll +++ b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll @@ -110,8 +110,8 @@ } ; A example where only the branch instructions from %if.then2 and %if.end3 need -; to be hoisted, which effectively replaces the original branch in %if.end. As -; this does not add any new instructions to the hoist location, it should always be profitable. +; to be hoisted, which effectively replaces the original branch in %if.end and +; only requires selects for PHIs in the successor. define float @clamp_float_value(float %value, float %minimum_value, float %maximum_value) { ; HOIST-LABEL: @clamp_float_value( ; HOIST-NEXT: entry: @@ -124,14 +124,9 @@ ; NOHOIST-LABEL: @clamp_float_value( ; NOHOIST-NEXT: entry: ; NOHOIST-NEXT: [[CMP:%.*]] = fcmp ogt float [[VALUE:%.*]], [[MAXIMUM_VALUE:%.*]] -; NOHOIST-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]] -; NOHOIST: if.end: ; NOHOIST-NEXT: [[CMP1:%.*]] = fcmp olt float [[VALUE]], [[MINIMUM_VALUE:%.*]] -; NOHOIST-NEXT: br i1 [[CMP1]], label [[RETURN]], label [[IF_END3:%.*]] -; NOHOIST: if.end3: -; NOHOIST-NEXT: br label [[RETURN]] -; NOHOIST: return: -; NOHOIST-NEXT: [[RETVAL_0:%.*]] = phi float [ [[VALUE]], [[IF_END3]] ], [ [[MAXIMUM_VALUE]], [[ENTRY:%.*]] ], [ [[MINIMUM_VALUE]], [[IF_END]] ] +; NOHOIST-NEXT: [[MINIMUM_VALUE_VALUE:%.*]] = select i1 [[CMP1]], float [[MINIMUM_VALUE]], float [[VALUE]] +; NOHOIST-NEXT: [[RETVAL_0:%.*]] = select i1 [[CMP]], float [[MAXIMUM_VALUE]], float [[MINIMUM_VALUE_VALUE]] ; NOHOIST-NEXT: ret float [[RETVAL_0]] ; entry: