Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -115,6 +115,12 @@ HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block")); +static cl::opt + HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, + cl::init(20), + cl::desc("Allow reordering across at most this many " + "instructions when hoisting")); + static cl::opt SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); @@ -1430,6 +1436,32 @@ return true; } +// Returns true if it is safe to reorder an instruction across preceding +// instructions in a basic block. +static bool isSafeToHoistInstr(Instruction *I, bool ForceNoSideEffects, + bool ForceNoSpeculation) { + // If we have seen an instruction with side effects, it's unsafe to reorder an + // instruction which reads memory or itself has side effects. + if (ForceNoSideEffects && (I->mayReadFromMemory() || I->mayHaveSideEffects())) + return false; + + // Reordering across an instruction which does not necessarily transfer + // control to the next instruction is speculation. + if (ForceNoSpeculation && !isSafeToSpeculativelyExecute(I)) + return false; + + // It's also unsafe/illegal to hoist an instruction above its instruction + // operands + BasicBlock *BB = I->getParent(); + for (Value *Op : I->operands()) { + if (auto *J = dyn_cast(Op)) + if (J->getParent() == BB) + return false; + } + + return true; +} + static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); /// Given a conditional branch that goes to BB1 and BB2, hoist any common code @@ -1444,7 +1476,8 @@ // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As // such, we currently just scan for obviously identical instructions in an - // identical order. + // identical order, possibly separated by the same number of non-identical + // instructions. BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. BasicBlock *BB2 = BI->getSuccessor(1); // The false destination @@ -1467,7 +1500,7 @@ while (isa(I2)) I2 = &*BB2_Itr++; } - if (isa(I1) || !I1->isIdenticalToWhenDefined(I2)) + if (isa(I1)) return false; BasicBlock *BIParent = BI->getParent(); @@ -1493,79 +1526,122 @@ // terminator. Let the loop below handle those 2 cases. } - do { + // Count how many instructions were not hoisted so far. There's a limit on how + // many instructions we skip, serving as a compilation time control as well as + // preventing excessive increase of life ranges. + unsigned NumSkipped = 0; + + // Record if any non-hoisted instruction contains side-effects, as it could + // make it illegal to reorder some instructions across. + bool ForceNoReadMemOrSideEffectsBB1 = false; + bool ForceNoReadMemOrSideEffectsBB2 = false; + + // Record if any non-hoisted instructions does not necessarily transfer + // control to the next instruction. Then we can hoist only instrucions which + // are safe to speculate. + bool ForceNoSpeculationBB1 = false; + bool ForceNoSpeculationBB2 = false; + + for (;;) { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. - if (I1->isTerminator()) + if (I1->isTerminator() || I2->isTerminator()) { + // If any instructions remain in the block, we cannot hoist terminators. + if (NumSkipped || !I1->isIdenticalToWhenDefined(I2)) + return Changed; goto HoistTerminator; + } - // Hoisting token-returning instructions would obscure the origin. - if (I1->getType()->isTokenTy()) - return Changed; - - // If we're going to hoist a call, make sure that the two instructions we're - // commoning/hoisting are both marked with musttail, or neither of them is - // marked as such. Otherwise, we might end up in a situation where we hoist - // from a block where the terminator is a `ret` to a block where the terminator - // is a `br`, and `musttail` calls expect to be followed by a return. - auto *C1 = dyn_cast(I1); - auto *C2 = dyn_cast(I2); - if (C1 && C2) - if (C1->isMustTailCall() != C2->isMustTailCall()) + if (I1->isIdenticalToWhenDefined(I2)) { + // Hoisting token-returning instructions would obscure the origin. + if (I1->getType()->isTokenTy()) return Changed; - if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) - return Changed; - - // If any of the two call sites has nomerge attribute, stop hoisting. - if (const auto *CB1 = dyn_cast(I1)) - if (CB1->cannotMerge()) + // Even if the instructions are identical, it may not be safe to hoist + // them if we have skipped over instructions with side effects or their + // operands weren't hoisted. + if (!isSafeToHoistInstr(I1, ForceNoReadMemOrSideEffectsBB1, ForceNoSpeculationBB1) || + !isSafeToHoistInstr(I2, ForceNoReadMemOrSideEffectsBB2, ForceNoSpeculationBB2)) return Changed; - if (const auto *CB2 = dyn_cast(I2)) - if (CB2->cannotMerge()) + + // If we're going to hoist a call, make sure that the two instructions + // we're commoning/hoisting are both marked with musttail, or neither of + // them is marked as such. Otherwise, we might end up in a situation where + // we hoist from a block where the terminator is a `ret` to a block where + // the terminator is a `br`, and `musttail` calls expect to be followed by + // a return. + auto *C1 = dyn_cast(I1); + auto *C2 = dyn_cast(I2); + if (C1 && C2) + if (C1->isMustTailCall() != C2->isMustTailCall()) + return Changed; + + if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) return Changed; - if (isa(I1) || isa(I2)) { - assert (isa(I1) && isa(I2)); - // The debug location is an integral part of a debug info intrinsic - // and can't be separated from it or replaced. Instead of attempting - // to merge locations, simply hoist both copies of the intrinsic. - BIParent->getInstList().splice(BI->getIterator(), - BB1->getInstList(), I1); - BIParent->getInstList().splice(BI->getIterator(), - BB2->getInstList(), I2); + // If any of the two call sites has nomerge attribute, stop hoisting. + if (const auto *CB1 = dyn_cast(I1)) + if (CB1->cannotMerge()) + return Changed; + if (const auto *CB2 = dyn_cast(I2)) + if (CB2->cannotMerge()) + return Changed; + + if (isa(I1) || isa(I2)) { + assert(isa(I1) && isa(I2)); + // The debug location is an integral part of a debug info intrinsic + // and can't be separated from it or replaced. Instead of attempting + // to merge locations, simply hoist both copies of the intrinsic. + BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), + I1); + BIParent->getInstList().splice(BI->getIterator(), BB2->getInstList(), + I2); + } else { + // For a normal instruction, we just move one to right before the + // branch, then replace all uses of the other with the first. Finally, + // we remove the now redundant second instruction. + BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), + I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->andIRFlags(I2); + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, + LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group, + LLVMContext::MD_preserve_access_index}; + combineMetadata(I1, I2, KnownIDs, true); + + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + + I2->eraseFromParent(); + } Changed = true; + ++NumHoistCommonInstrs; } else { - // For a normal instruction, we just move one to right before the branch, - // then replace all uses of the other with the first. Finally, we remove - // the now redundant second instruction. - BIParent->getInstList().splice(BI->getIterator(), - BB1->getInstList(), I1); - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->andIRFlags(I2); - unsigned KnownIDs[] = {LLVMContext::MD_tbaa, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull, - LLVMContext::MD_invariant_group, - LLVMContext::MD_align, - LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_mem_parallel_loop_access, - LLVMContext::MD_access_group, - LLVMContext::MD_preserve_access_index}; - combineMetadata(I1, I2, KnownIDs, true); - - // I1 and I2 are being combined into a single instruction. Its debug - // location is the merged locations of the original instructions. - I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - I2->eraseFromParent(); - Changed = true; + if (NumSkipped >= HoistCommonSkipLimit) + return Changed; + // We are about to skip over a pair of non-identical instructions. Record + // if any of them has side effects. + if (I1->mayHaveSideEffects()) + ForceNoReadMemOrSideEffectsBB1 = true; + if (I2->mayHaveSideEffects()) + ForceNoReadMemOrSideEffectsBB2 = true; + if (!I1->willReturn()) + ForceNoSpeculationBB1 = true; + if (!I2->willReturn()) + ForceNoSpeculationBB2 = true; + ++NumSkipped; } - ++NumHoistCommonInstrs; I1 = &*BB1_Itr++; I2 = &*BB2_Itr++; @@ -1578,9 +1654,9 @@ while (isa(I2)) I2 = &*BB2_Itr++; } - } while (I1->isIdenticalToWhenDefined(I2)); + } - return true; + return Changed; HoistTerminator: // It may not be possible to hoist an invoke. Index: llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --passes='simplifycfg' %s | FileCheck %s +; RUN: opt -S --passes='simplifycfg' -simplifycfg-hoist-common-skip-limit=0 %s | FileCheck %s --check-prefix=LIMIT + +;; Check that the two loads are hoisted to the common predecessor, skipping +;; over the add/sub instructions. +;; Second run line, check that only one of the loads is hoisted, due to +;; the skip limit. + +define i32 @f(ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[D:%.*]], align 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[TMP0]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[CONV2:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[M:%.*]], align 2 +; CHECK-NEXT: [[CONV4:%.*]] = zext i16 [[TMP2]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV2]], [[CONV]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV4]] +; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[MUL]], 16 +; CHECK-NEXT: [[CONV5:%.*]] = trunc i32 [[TMP3]] to i16 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV2]], [[CONV]] +; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], [[CONV4]] +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[MUL9]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 +; CHECK-NEXT: [[CONV12:%.*]] = sub i16 0, [[TMP5]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i16 [ [[CONV12]], [[IF_ELSE]] ], [ [[CONV5]], [[IF_THEN]] ] +; CHECK-NEXT: store i16 [[STOREMERGE]], ptr [[D]], align 2 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[STOREMERGE]], 0 +; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL]] to i32 +; CHECK-NEXT: ret i32 [[LNOT_EXT]] +; +; LIMIT-LABEL: @f( +; LIMIT-NEXT: entry: +; LIMIT-NEXT: [[TMP0:%.*]] = load i16, ptr [[D:%.*]], align 2 +; LIMIT-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +; LIMIT-NEXT: [[CMP:%.*]] = icmp sgt i16 [[TMP0]], 0 +; LIMIT-NEXT: [[TMP1:%.*]] = load i16, ptr [[B:%.*]], align 2 +; LIMIT-NEXT: [[CONV2:%.*]] = zext i16 [[TMP1]] to i32 +; LIMIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; LIMIT: if.then: +; LIMIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV2]], [[CONV]] +; LIMIT-NEXT: [[TMP2:%.*]] = load i16, ptr [[M:%.*]], align 2 +; LIMIT-NEXT: [[CONV4:%.*]] = zext i16 [[TMP2]] to i32 +; LIMIT-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV4]] +; LIMIT-NEXT: [[TMP3:%.*]] = lshr i32 [[MUL]], 16 +; LIMIT-NEXT: [[CONV5:%.*]] = trunc i32 [[TMP3]] to i16 +; LIMIT-NEXT: br label [[IF_END:%.*]] +; LIMIT: if.else: +; LIMIT-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV2]], [[CONV]] +; LIMIT-NEXT: [[TMP4:%.*]] = load i16, ptr [[M]], align 2 +; LIMIT-NEXT: [[CONV8:%.*]] = zext i16 [[TMP4]] to i32 +; LIMIT-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], [[CONV8]] +; LIMIT-NEXT: [[TMP5:%.*]] = lshr i32 [[MUL9]], 16 +; LIMIT-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16 +; LIMIT-NEXT: [[CONV12:%.*]] = sub i16 0, [[TMP6]] +; LIMIT-NEXT: br label [[IF_END]] +; LIMIT: if.end: +; LIMIT-NEXT: [[STOREMERGE:%.*]] = phi i16 [ [[CONV12]], [[IF_ELSE]] ], [ [[CONV5]], [[IF_THEN]] ] +; LIMIT-NEXT: store i16 [[STOREMERGE]], ptr [[D]], align 2 +; LIMIT-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[STOREMERGE]], 0 +; LIMIT-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL]] to i32 +; LIMIT-NEXT: ret i32 [[LNOT_EXT]] +; + +entry: + %0 = load i16, ptr %d, align 2 + %conv = sext i16 %0 to i32 + %cmp = icmp sgt i16 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %1 = load i16, ptr %b, align 2 + %conv2 = zext i16 %1 to i32 + %add = add nsw i32 %conv2, %conv + %2 = load i16, ptr %m, align 2 + %conv4 = zext i16 %2 to i32 + %mul = mul nsw i32 %add, %conv4 + %3 = lshr i32 %mul, 16 + %conv5 = trunc i32 %3 to i16 + br label %if.end + +if.else: ; preds = %entry + %4 = load i16, ptr %b, align 2 + %conv6 = zext i16 %4 to i32 + %sub = sub nsw i32 %conv6, %conv + %5 = load i16, ptr %m, align 2 + %conv8 = zext i16 %5 to i32 + %mul9 = mul nsw i32 %sub, %conv8 + %6 = lshr i32 %mul9, 16 + %7 = trunc i32 %6 to i16 + %conv12 = sub i16 0, %7 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %storemerge = phi i16 [ %conv12, %if.else ], [ %conv5, %if.then ] + store i16 %storemerge, ptr %d, align 2 + %tobool = icmp ne i16 %storemerge, 0 + %lnot.ext = zext i1 %tobool to i32 + ret i32 %lnot.ext +}