Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -116,6 +116,12 @@ HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block")); +static cl::opt + HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, + cl::init(20), + cl::desc("Allow reordering across at most this many " + "instructions when hoisting")); + static cl::opt SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); @@ -1423,6 +1429,56 @@ return true; } +// Get interesting characteristics of instructions that `HoistThenElseCodeToIf` +// didn't hoist. They restrict what kind of instructions can be reordered +// across. +enum SkipFlags { + SkipReadMem = 1, + SkipSideEffect = 2, + SkipImplicitControlFlow = 4 +}; + +static unsigned skippedInstrFlags(Instruction *I) { + unsigned Flags = 0; + if (I->mayReadFromMemory()) + Flags |= SkipReadMem; + if (I->mayHaveSideEffects()) + Flags |= SkipSideEffect; + if (!isGuaranteedToTransferExecutionToSuccessor(I)) + Flags |= SkipImplicitControlFlow; + return Flags; +} + +// Returns true if it is safe to reorder an instruction across preceding +// instructions in a basic block. +static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) { + // Don't reorder a store over a load. + if ((Flags & SkipReadMem) && I->mayWriteToMemory()) + return false; + + // If we have seen an instruction with side effects, it's unsafe to reorder an + // instruction which reads memory or itself has side effects. + if ((Flags & SkipSideEffect) && + (I->mayReadFromMemory() || I->mayHaveSideEffects())) + return false; + + // Reordering across an instruction which does not necessarily transfer + // control to the next instruction is speculation. + if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I)) + return false; + + // It's also unsafe/illegal to hoist an instruction above its instruction + // operands + BasicBlock *BB = I->getParent(); + for (Value *Op : I->operands()) { + if (auto *J = dyn_cast(Op)) + if (J->getParent() == BB) + return false; + } + + return true; +} + static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); /// Given a conditional branch that goes to BB1 and BB2, hoist any common code @@ -1437,7 +1493,8 @@ // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As // such, we currently just scan for obviously identical instructions in an - // identical order. + // identical order, possibly separated by the same number of non-identical + // instructions. BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. BasicBlock *BB2 = BI->getSuccessor(1); // The false destination @@ -1460,7 +1517,7 @@ while (isa(I2)) I2 = &*BB2_Itr++; } - if (isa(I1) || !I1->isIdenticalToWhenDefined(I2)) + if (isa(I1)) return false; BasicBlock *BIParent = BI->getParent(); @@ -1486,75 +1543,107 @@ // terminator. Let the loop below handle those 2 cases. } - do { + // Count how many instructions were not hoisted so far. There's a limit on how + // many instructions we skip, serving as a compilation time control as well as + // preventing excessive increase of life ranges. + unsigned NumSkipped = 0; + + // Record any skipped instuctions that may read memory, write memory or have + // side effects, or have implicit control flow. + unsigned SkipFlagsBB1 = 0; + unsigned SkipFlagsBB2 = 0; + + for (;;) { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. - if (I1->isTerminator()) + if (I1->isTerminator() || I2->isTerminator()) { + // If any instructions remain in the block, we cannot hoist terminators. + if (NumSkipped || !I1->isIdenticalToWhenDefined(I2)) + return Changed; goto HoistTerminator; + } - // If we're going to hoist a call, make sure that the two instructions we're - // commoning/hoisting are both marked with musttail, or neither of them is - // marked as such. Otherwise, we might end up in a situation where we hoist - // from a block where the terminator is a `ret` to a block where the terminator - // is a `br`, and `musttail` calls expect to be followed by a return. - auto *C1 = dyn_cast(I1); - auto *C2 = dyn_cast(I2); - if (C1 && C2) - if (C1->isMustTailCall() != C2->isMustTailCall()) + if (I1->isIdenticalToWhenDefined(I2)) { + // Even if the instructions are identical, it may not be safe to hoist + // them if we have skipped over instructions with side effects or their + // operands weren't hoisted. + if (!isSafeToHoistInstr(I1, SkipFlagsBB1) || + !isSafeToHoistInstr(I2, SkipFlagsBB2)) return Changed; - if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) - return Changed; - - // If any of the two call sites has nomerge attribute, stop hoisting. - if (const auto *CB1 = dyn_cast(I1)) - if (CB1->cannotMerge()) - return Changed; - if (const auto *CB2 = dyn_cast(I2)) - if (CB2->cannotMerge()) + // If we're going to hoist a call, make sure that the two instructions + // we're commoning/hoisting are both marked with musttail, or neither of + // them is marked as such. Otherwise, we might end up in a situation where + // we hoist from a block where the terminator is a `ret` to a block where + // the terminator is a `br`, and `musttail` calls expect to be followed by + // a return. + auto *C1 = dyn_cast(I1); + auto *C2 = dyn_cast(I2); + if (C1 && C2) + if (C1->isMustTailCall() != C2->isMustTailCall()) + return Changed; + + if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) return Changed; - if (isa(I1) || isa(I2)) { - assert (isa(I1) && isa(I2)); - // The debug location is an integral part of a debug info intrinsic - // and can't be separated from it or replaced. Instead of attempting - // to merge locations, simply hoist both copies of the intrinsic. - BIParent->getInstList().splice(BI->getIterator(), - BB1->getInstList(), I1); - BIParent->getInstList().splice(BI->getIterator(), - BB2->getInstList(), I2); + // If any of the two call sites has nomerge attribute, stop hoisting. + if (const auto *CB1 = dyn_cast(I1)) + if (CB1->cannotMerge()) + return Changed; + if (const auto *CB2 = dyn_cast(I2)) + if (CB2->cannotMerge()) + return Changed; + + if (isa(I1) || isa(I2)) { + assert(isa(I1) && isa(I2)); + // The debug location is an integral part of a debug info intrinsic + // and can't be separated from it or replaced. Instead of attempting + // to merge locations, simply hoist both copies of the intrinsic. + BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), + I1); + BIParent->getInstList().splice(BI->getIterator(), BB2->getInstList(), + I2); + } else { + // For a normal instruction, we just move one to right before the + // branch, then replace all uses of the other with the first. Finally, + // we remove the now redundant second instruction. + BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), + I1); + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->andIRFlags(I2); + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, + LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group, + LLVMContext::MD_preserve_access_index}; + combineMetadata(I1, I2, KnownIDs, true); + + // I1 and I2 are being combined into a single instruction. Its debug + // location is the merged locations of the original instructions. + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + + I2->eraseFromParent(); + } Changed = true; + ++NumHoistCommonInstrs; } else { - // For a normal instruction, we just move one to right before the branch, - // then replace all uses of the other with the first. Finally, we remove - // the now redundant second instruction. - BIParent->getInstList().splice(BI->getIterator(), - BB1->getInstList(), I1); - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->andIRFlags(I2); - unsigned KnownIDs[] = {LLVMContext::MD_tbaa, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull, - LLVMContext::MD_invariant_group, - LLVMContext::MD_align, - LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_mem_parallel_loop_access, - LLVMContext::MD_access_group, - LLVMContext::MD_preserve_access_index}; - combineMetadata(I1, I2, KnownIDs, true); - - // I1 and I2 are being combined into a single instruction. Its debug - // location is the merged locations of the original instructions. - I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - - I2->eraseFromParent(); - Changed = true; + if (NumSkipped >= HoistCommonSkipLimit) + return Changed; + // We are about to skip over a pair of non-identical instructions. Record + // if any have characteristics that would prevent reordering instructions + // across them. + SkipFlagsBB1 |= skippedInstrFlags(I1); + SkipFlagsBB2 |= skippedInstrFlags(I2); + ++NumSkipped; } - ++NumHoistCommonInstrs; I1 = &*BB1_Itr++; I2 = &*BB2_Itr++; @@ -1567,9 +1656,9 @@ while (isa(I2)) I2 = &*BB2_Itr++; } - } while (I1->isIdenticalToWhenDefined(I2)); + } - return true; + return Changed; HoistTerminator: // It may not be possible to hoist an invoke. Index: llvm/test/Transforms/SimplifyCFG/hoist-common-skip-limit.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/hoist-common-skip-limit.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --passes='simplifycfg' -simplifycfg-hoist-common-skip-limit=0 %s | FileCheck %s --check-prefix=LIMIT0 +; RUN: opt -S --passes='simplifycfg' -simplifycfg-hoist-common-skip-limit=1 %s | FileCheck %s --check-prefix=LIMIT1 +; RUN: opt -S --passes='simplifycfg' -simplifycfg-hoist-common-skip-limit=2 %s | FileCheck %s --check-prefix=LIMIT2 + +define void @f(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +; LIMIT0-LABEL: @f( +; LIMIT0-NEXT: entry: +; LIMIT0-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; LIMIT0-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; LIMIT0: if.then: +; LIMIT0-NEXT: call void @no_side_effects0() +; LIMIT0-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP0]], 1 +; LIMIT0-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +; LIMIT0-NEXT: [[U:%.*]] = add i16 [[ADD]], [[TMP1]] +; LIMIT0-NEXT: br label [[IF_END:%.*]] +; LIMIT0: if.else: +; LIMIT0-NEXT: call void @no_side_effects1() +; LIMIT0-NEXT: [[SUB:%.*]] = sub nsw i16 [[TMP0]], 1 +; LIMIT0-NEXT: [[TMP2:%.*]] = load i16, ptr [[M]], align 2 +; LIMIT0-NEXT: [[V:%.*]] = add i16 [[SUB]], [[TMP2]] +; LIMIT0-NEXT: br label [[IF_END]] +; LIMIT0: if.end: +; LIMIT0-NEXT: [[UV:%.*]] = phi i16 [ [[V]], [[IF_ELSE]] ], [ [[U]], [[IF_THEN]] ] +; LIMIT0-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +; LIMIT0-NEXT: ret void +; +; LIMIT1-LABEL: @f( +; LIMIT1-NEXT: entry: +; LIMIT1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; LIMIT1-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; LIMIT1: if.then: +; LIMIT1-NEXT: call void @no_side_effects0() +; LIMIT1-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP0]], 1 +; LIMIT1-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +; LIMIT1-NEXT: [[U:%.*]] = add i16 [[ADD]], [[TMP1]] +; LIMIT1-NEXT: br label [[IF_END:%.*]] +; LIMIT1: if.else: +; LIMIT1-NEXT: call void @no_side_effects1() +; LIMIT1-NEXT: [[SUB:%.*]] = sub nsw i16 [[TMP0]], 1 +; LIMIT1-NEXT: [[TMP2:%.*]] = load i16, ptr [[M]], align 2 +; LIMIT1-NEXT: [[V:%.*]] = add i16 [[SUB]], [[TMP2]] +; LIMIT1-NEXT: br label [[IF_END]] +; LIMIT1: if.end: +; LIMIT1-NEXT: [[UV:%.*]] = phi i16 [ [[V]], [[IF_ELSE]] ], [ [[U]], [[IF_THEN]] ] +; LIMIT1-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +; LIMIT1-NEXT: ret void +; +; LIMIT2-LABEL: @f( +; LIMIT2-NEXT: entry: +; LIMIT2-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; LIMIT2-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +; LIMIT2-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; LIMIT2: if.then: +; LIMIT2-NEXT: call void @no_side_effects0() +; LIMIT2-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP0]], 1 +; LIMIT2-NEXT: [[U:%.*]] = add i16 [[ADD]], [[TMP1]] +; LIMIT2-NEXT: br label [[IF_END:%.*]] +; LIMIT2: if.else: +; LIMIT2-NEXT: call void @no_side_effects1() +; LIMIT2-NEXT: [[SUB:%.*]] = sub nsw i16 [[TMP0]], 1 +; LIMIT2-NEXT: [[V:%.*]] = add i16 [[SUB]], [[TMP1]] +; LIMIT2-NEXT: br label [[IF_END]] +; LIMIT2: if.end: +; LIMIT2-NEXT: [[UV:%.*]] = phi i16 [ [[V]], [[IF_ELSE]] ], [ [[U]], [[IF_THEN]] ] +; LIMIT2-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +; LIMIT2-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %0 = load i16, ptr %b, align 2 + call void @no_side_effects0() + %add = add nsw i16 %0, 1 + %1 = load i16, ptr %m, align 2 + %u = add i16 %add, %1 + br label %if.end + +if.else: + %2 = load i16, ptr %b, align 2 + call void @no_side_effects1() + %sub = sub nsw i16 %2, 1 + %3 = load i16, ptr %m, align 2 + %v = add i16 %sub, %3 + br label %if.end + +if.end: + %uv = phi i16 [ %v, %if.else ], [ %u, %if.then ] + store i16 %uv, ptr %d, align 2 + ret void +} + +declare void @side_effects0() +declare void @side_effects1() +declare void @no_side_effects0() readonly nounwind willreturn +declare void @no_side_effects1() readonly nounwind willreturn Index: llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/hoist-common-skip.ll @@ -0,0 +1,393 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --passes='simplifycfg' %s | FileCheck %s + +;; Check that the two loads are hoisted to the common predecessor, skipping +;; over the add/sub instructions. + +define void @f0(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +; CHECK-LABEL: @f0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP0]], 1 +; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD]], [[TMP1]] +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i16 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[SUB]], 3 +; CHECK-NEXT: [[V:%.*]] = add i16 [[SUB]], [[TMP2]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[V]], [[IF_ELSE]] ], [ [[U]], [[IF_THEN]] ] +; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %0 = load i16, ptr %b, align 2 + %add = add nsw i16 %0, 1 + %1 = load i16, ptr %m, align 2 + %u = add i16 %add, %1 + br label %if.end + +if.else: + %2 = load i16, ptr %b, align 2 + %sub = sub nsw i16 %2, 1 + %3 = load i16, ptr %m, align 2 + %4 = add i16 %sub, 3 + %v = add i16 %sub, %4 + br label %if.end + +if.end: + %uv = phi i16 [ %v, %if.else ], [ %u, %if.then ] + store i16 %uv, ptr %d, align 2 + ret void +} + + +;; Check some instructions (e.g. add) can be reordered across instructions with side +;; effects, while others (e.g. load) can't. +define void @f2(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[ADD_0:%.*]] = add nsw i16 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @side_effects0() +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD_0]], [[TMP1]] +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @no_side_effects0() +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[M]], align 2 +; CHECK-NEXT: [[V:%.*]] = add i16 [[ADD_0]], [[TMP2]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[V]], [[IF_ELSE]] ], [ [[U]], [[IF_THEN]] ] +; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %0 = load i16, ptr %b, align 2 + call void @side_effects0() + %add.0 = add nsw i16 %0, 1 + %1 = load i16, ptr %m, align 2 + %u = add i16 %add.0, %1 + br label %if.end + +if.else: + %2 = load i16, ptr %b, align 2 + call void @no_side_effects0() + %add.1 = add nsw i16 %2, 1 + %3 = load i16, ptr %m, align 2 + %v = add i16 %add.1, %3 + br label %if.end + +if.end: + %uv = phi i16 [ %v, %if.else ], [ %u, %if.then ] + store i16 %uv, ptr %d, align 2 + ret void +} + + +;; Check indeed it was the side effects that prevented hoisting the load +;; in the previous test. +define void @f3(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +; CHECK-LABEL: @f3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[ADD_0:%.*]] = add nsw i16 [[TMP0]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +; CHECK-NEXT: [[U:%.*]] = add i16 [[ADD_0]], [[TMP1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @no_side_effects0() +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @no_side_effects1() +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: store i16 [[U]], ptr [[D:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %0 = load i16, ptr %b, align 2 + call void @no_side_effects0() + %add.0 = add nsw i16 %0, 1 + %1 = load i16, ptr %m, align 2 + %u = add i16 %add.0, %1 + br label %if.end + +if.else: + %2 = load i16, ptr %b, align 2 + call void @no_side_effects1() + %add.1 = add nsw i16 %2, 1 + %3 = load i16, ptr %m, align 2 + %v = add i16 %add.1, %3 + br label %if.end + +if.end: + %uv = phi i16 [ %v, %if.else ], [ %u, %if.then ] + store i16 %uv, ptr %d, align 2 + ret void +} + +;; Check some instructions (e.g. sdiv) are not speculatively executed. + +;; Division by non-zero constant OK to speculate ... +define void @f4(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +; CHECK-LABEL: @f4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 [[TMP0]], 2 +; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @side_effects0() +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @side_effects1() +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: store i16 [[U]], ptr [[D:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %0 = load i16, ptr %b, align 2 + call void @side_effects0() + %div.0 = sdiv i16 %0, 2 + %u = add i16 %div.0, %0 + br label %if.end + +if.else: + %1 = load i16, ptr %b, align 2 + call void @side_effects1() + %div.1 = sdiv i16 %1, 2 + %v = add i16 %div.1, %1 + br label %if.end + +if.end: + %uv = phi i16 [ %v, %if.else ], [ %u, %if.then ] + store i16 %uv, ptr %d, align 2 + ret void +} + +;; ... but not a general division ... +define void @f5(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +; CHECK-LABEL: @f5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @side_effects0() +; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 211, [[TMP0]] +; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @side_effects1() +; CHECK-NEXT: [[DIV_1:%.*]] = sdiv i16 211, [[TMP0]] +; CHECK-NEXT: [[V:%.*]] = add i16 [[DIV_1]], [[TMP0]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[UV:%.*]] = phi i16 [ [[V]], [[IF_ELSE]] ], [ [[U]], [[IF_THEN]] ] +; CHECK-NEXT: store i16 [[UV]], ptr [[D:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %0 = load i16, ptr %b, align 2 + call void @side_effects0() + %div.0 = sdiv i16 211, %0 + %u = add i16 %div.0, %0 + br label %if.end + +if.else: + %1 = load i16, ptr %b, align 2 + call void @side_effects1() + %div.1 = sdiv i16 211, %1 + %v = add i16 %div.1, %1 + br label %if.end + +if.end: + %uv = phi i16 [ %v, %if.else ], [ %u, %if.then ] + store i16 %uv, ptr %d, align 2 + ret void +} + +;; ... and it's also OK to hoist the division when there's no speculation happening. +define void @f6(i1 %c, ptr nocapture noundef %d, ptr nocapture noundef readonly %m, ptr nocapture noundef readonly %b) { +; CHECK-LABEL: @f6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[DIV_0:%.*]] = sdiv i16 211, [[TMP0]] +; CHECK-NEXT: [[U:%.*]] = add i16 [[DIV_0]], [[TMP0]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @no_side_effects0() +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @no_side_effects1() +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: store i16 [[U]], ptr [[D:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %0 = load i16, ptr %b, align 2 + call void @no_side_effects0() + %div.0 = sdiv i16 211, %0 + %u = add i16 %div.0, %0 + br label %if.end + +if.else: + %1 = load i16, ptr %b, align 2 + call void @no_side_effects1() + %div.1 = sdiv i16 211, %1 + %v = add i16 %div.1, %1 + br label %if.end + +if.end: + %uv = phi i16 [ %v, %if.else ], [ %u, %if.then ] + store i16 %uv, ptr %d, align 2 + ret void +} + +;; No reorder of store over a load. +define i16 @f7(i1 %c, ptr %a, ptr %b) { +; CHECK-LABEL: @f7( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[VA:%.*]] = load i16, ptr [[A:%.*]], align 2 +; CHECK-NEXT: store i16 0, ptr [[B:%.*]], align 2 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B]], align 2 +; CHECK-NEXT: store i16 0, ptr [[B]], align 2 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[IF_THEN]] ], [ [[VB]], [[IF_ELSE]] ] +; CHECK-NEXT: ret i16 [[V]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %va = load i16, ptr %a, align 2 + store i16 0, ptr %b, align 2 + br label %if.end + +if.else: + %vb = load i16, ptr %b, align 2 + store i16 0, ptr %b, align 2 + br label %if.end + +if.end: + %v = phi i16 [ %va, %if.then ], [ %vb, %if.else ] + ret i16 %v +} + +;; Can reorder load over another load +define i16 @f8(i1 %cond, ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: @f8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_0:%.*]] = load i16, ptr [[C:%.*]], align 2 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[VA:%.*]] = load i16, ptr [[A:%.*]], align 2 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[IF_THEN]] ], [ [[VB]], [[IF_ELSE]] ] +; CHECK-NEXT: [[U:%.*]] = phi i16 [ [[C_0]], [[IF_THEN]] ], [ [[C_0]], [[IF_ELSE]] ] +; CHECK-NEXT: [[W:%.*]] = add i16 [[V]], [[U]] +; CHECK-NEXT: ret i16 [[W]] +; +entry: + br i1 %cond, label %if.then, label %if.else + +if.then: + %va = load i16, ptr %a, align 2 + %c.0 = load i16, ptr %c + br label %if.end + +if.else: + %vb = load i16, ptr %b, align 2 + %c.1 = load i16, ptr %c + br label %if.end + +if.end: + %v = phi i16 [ %va, %if.then ], [ %vb, %if.else ] + %u = phi i16 [ %c.0, %if.then ], [ %c.1, %if.else ] + + %w = add i16 %v, %u + + ret i16 %w +} + +;; Currently won't reorder volatile and non-volatile loads. +define i16 @f9(i1 %cond, ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: @f9( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[VA:%.*]] = load volatile i16, ptr [[A:%.*]], align 2 +; CHECK-NEXT: [[C_0:%.*]] = load i16, ptr [[C:%.*]], align 2 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[VB:%.*]] = load i16, ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[C_1:%.*]] = load i16, ptr [[C]], align 2 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[V:%.*]] = phi i16 [ [[VA]], [[IF_THEN]] ], [ [[VB]], [[IF_ELSE]] ] +; CHECK-NEXT: [[U:%.*]] = phi i16 [ [[C_0]], [[IF_THEN]] ], [ [[C_1]], [[IF_ELSE]] ] +; CHECK-NEXT: [[W:%.*]] = add i16 [[V]], [[U]] +; CHECK-NEXT: ret i16 [[W]] +; +entry: + br i1 %cond, label %if.then, label %if.else + +if.then: + %va = load volatile i16, ptr %a, align 2 + %c.0 = load i16, ptr %c + br label %if.end + +if.else: + %vb = load i16, ptr %b, align 2 + %c.1 = load i16, ptr %c + br label %if.end + +if.end: + %v = phi i16 [ %va, %if.then ], [ %vb, %if.else ] + %u = phi i16 [ %c.0, %if.then ], [ %c.1, %if.else ] + + %w = add i16 %v, %u + + ret i16 %w +} + +declare void @side_effects0() +declare void @side_effects1() +declare void @no_side_effects0() readonly nounwind willreturn +declare void @no_side_effects1() readonly nounwind willreturn