Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -133,6 +133,7 @@ void initializeDataFlowSanitizerPass(PassRegistry&); void initializeScalarizerPass(PassRegistry&); void initializeEarlyCSELegacyPassPass(PassRegistry &); +void initializeEarlyGVNLegacyPassPass(PassRegistry &); void initializeEliminateAvailableExternallyPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -156,6 +156,7 @@ (void) llvm::createConstantHoistingPass(); (void) llvm::createCodeGenPreparePass(); (void) llvm::createEarlyCSEPass(); + (void) llvm::createEarlyGVNPass(); (void) llvm::createMergedLoadStoreMotionPass(); (void) llvm::createGVNPass(); (void) llvm::createMemCpyOptPass(); Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -328,6 +328,13 @@ //===----------------------------------------------------------------------===// // +// EarlyGVN - This pass performs a simple and fast GVN pass over the dominator +// tree to hoist common expressions from sibling branches. +// +FunctionPass *createEarlyGVNPass(); + +//===----------------------------------------------------------------------===// +// // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -58,11 +58,7 @@ AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } MemoryDependenceResults &getMemDep() const { return *MD; } -private: - friend class gvn::GVNLegacyPass; - struct Expression; - friend struct DenseMapInfo; /// This class holds the mapping between values and value numbers. It is used /// as an efficient mechanism to determine the expression-wise equivalence of @@ -105,6 +101,10 @@ void verifyRemoved(const Value *) const; }; +private: + friend class gvn::GVNLegacyPass; + friend struct DenseMapInfo; + MemoryDependenceResults *MD; DominatorTree *DT; const TargetLibraryInfo *TLI; @@ -229,6 +229,13 @@ /// loads are eliminated by the pass. FunctionPass *createGVNPass(bool NoLoads = false); +/// \brief A simple and fast domtree-based GVN pass to hoist common expressions +/// from sibling branches. +struct EarlyGVNPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, AnalysisManager &AM); +}; + } #endif Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -95,6 +95,7 @@ FUNCTION_PASS("aa-eval", AAEvaluator()) FUNCTION_PASS("adce", ADCEPass()) FUNCTION_PASS("early-cse", EarlyCSEPass()) +FUNCTION_PASS("early-gvn", EarlyGVNPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("invalidate", InvalidateAllAnalysesPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -210,6 +210,7 @@ else FPM.add(createScalarReplAggregatesPass()); FPM.add(createEarlyCSEPass()); + FPM.add(createEarlyGVNPass()); FPM.add(createLowerExpectIntrinsicPass()); } Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -48,10 +48,12 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include +#include using namespace llvm; using namespace llvm::gvn; using namespace PatternMatch; @@ -2718,3 +2720,324 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) + +static cl::opt +HoistedScalarsThreshold("hoisted-scalars-threshold", cl::Hidden, cl::init(-1), + cl::desc("Max number of scalar instructions to hoist " + "(default unlimited = -1)")); +static cl::opt +HoistedLoadsThreshold("hoisted-loads-threshold", cl::Hidden, cl::init(-1), + cl::desc("Max number of loads to hoist " + "(default unlimited = -1)")); +static cl::opt +HoistMaxDepthDependence("hoist-max-depth-dependence", cl::Hidden, cl::init(5), + cl::desc("Max depth of a dependence chain to be hoisted " + "(default 5, unlimited = -1)")); + +static int ScalarCounter = 0; +static int LoadCounter = 0; + +namespace { +// This pass hoists common computations across branches sharing +// common immediate dominator. The primary goal is to reduce the code size, +// and in some cases reduce critical path (by exposing more ILP). +class EarlyGVNLegacyPassImpl { +public: + GVN::ValueTable VN; + DominatorTree *DT; + AliasAnalysis *AA; + MemoryDependenceResults *MD; + static char ID; + + EarlyGVNLegacyPassImpl(DominatorTree *dt, AliasAnalysis *aa, MemoryDependenceResults *md) + : DT (dt), AA (aa), MD (md) + { } + + // Return true when all operands of Instr are available at insertion point + // InsertPt. When limiting the number of hoisted expressions, one could hoist + // a load without hoisting its access function. So before hoisting any + // expression, make sure that all its operands are available at insert point. + bool allOperandsAvailable(Instruction *I, Instruction *InsertPt) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *Op = I->getOperand(i); + Instruction *Inst = dyn_cast(Op); + if (!Inst) + continue; + + if (!DT->dominates(Inst->getParent(), InsertPt->getParent())) + return false; + } + + return true; + } + + // Hoist all instructions in H at InsertPt. + void hoist(std::vector > &H, + Instruction *InsertPt) { + for (std::pair &P : H) { + Instruction *I1 = P.first; + Instruction *I2 = P.second; + I1->moveBefore(InsertPt); + patchAndReplaceAllUsesWith(I2, I1); + I2->eraseFromParent(); + DEBUG(dbgs() << "GVN hoisting: " << *I1 << '\n'); + } + } + + // Hoist scalar operations. + bool hoistScalars(Instruction *InsertPt, BasicBlock *BB1, BasicBlock *BB2) { + bool Changed = false; + + // Record from BB1 all instructions and their VN. + std::unordered_map VNtoInstruction; + for (Instruction &I1 : *BB1) { + unsigned V = VN.lookup_or_add(&I1); + VNtoInstruction.insert(std::make_pair(V, &I1)); + } + + // Scan BB2 for instructions appearing in BB1 with identical VN. + std::vector > HoistInstructions; + for (Instruction &I2 : *BB2) { + unsigned V = VN.lookup_or_add(&I2); + + if (I2.mayWriteToMemory()) + continue; + + // We are not dealing with loads here. + LoadInst *Load = dyn_cast(&I2); + if (Load) + continue; + + // Check whether BB1 contains an similar scalar instruction. + auto It = VNtoInstruction.find(V); + if (It == VNtoInstruction.end()) + continue; + + // Make sure all operands are available at insertion point. + if (!allOperandsAvailable(&I2, InsertPt)) + continue; + + // Bound the number of hoisted scalar expressions. + if (HoistedScalarsThreshold != -1 && + ScalarCounter >= HoistedScalarsThreshold) + break; + ScalarCounter++; + + // Hoist identical instructions I2 and I1. + Changed = true; + Instruction *I1 = It->second; + HoistInstructions.push_back(std::make_pair(I1, &I2)); + } + + hoist(HoistInstructions, InsertPt); + + return Changed; + } + + // Hoist identical loads from BB1 and BB2 into BB. + bool hoistLoads(Instruction *InsertPt, BasicBlock *BB1, BasicBlock *BB2) { + bool Changed = false; + bool IsTriangle = false; + + // The First BB to be traversed should be the one with single predecessor. + if (!BB1->getSinglePredecessor()) { + if (!BB2->getSinglePredecessor()) + return false; + if (BB2->getSingleSuccessor() != BB1) + return false; + + std::swap(BB1, BB2); + IsTriangle = true; + } else if (!BB2->getSinglePredecessor()) { + if (BB1->getSingleSuccessor() != BB2) + return false; + + IsTriangle = true; + } + + assert (BB1->getSinglePredecessor() == InsertPt->getParent()); + + // Record from BB1 all loads and their access function VN. + std::unordered_map VNtoLoad; + for (Instruction &I1 : *BB1) { + if (I1.mayHaveSideEffects()) { + if (IsTriangle) + return false; + break; + } + LoadInst *Load = dyn_cast(&I1); + if (!Load) + continue; + if (!Load->isSimple()) + break; + + Value *Ptr = Load->getPointerOperand(); + unsigned V = VN.lookup_or_add(Ptr); + VNtoLoad.insert(std::make_pair(V, Load)); + } + + if (VNtoLoad.empty()) + return false; + + // Scan BB2 for loads appearing in BB1 with identical access functions. + std::vector > HoistInstructions; + for (Instruction &I2 : *BB2) { + if (I2.mayHaveSideEffects()) + break; + + LoadInst *Load = dyn_cast(&I2); + if (!Load) + continue; + if (!Load->isSimple()) + break; + + Value *Ptr = Load->getPointerOperand(); + unsigned V = VN.lookup_or_add(Ptr); + + // Check whether BB1 contains a similar load. + auto It = VNtoLoad.find(V); + if (It == VNtoLoad.end()) + continue; + + // Check whether the load elements are of the same type. + LoadInst *I1 = It->second; + if (cast(I1->getPointerOperand()->getType())->getElementType() != + cast(Ptr->getType())->getElementType()) + continue; + + // Make sure all operands are available at insertion point. + if (!allOperandsAvailable(&I2, InsertPt)) + continue; + + // Bound the number of hoisted load expressions. + if (HoistedLoadsThreshold != -1 && + LoadCounter >= HoistedLoadsThreshold) + break; + LoadCounter++; + + // Hoist identical load instructions I2 and I1. + Changed = true; + HoistInstructions.push_back(std::make_pair(I1, &I2)); + } + + // Code generate. + hoist(HoistInstructions, InsertPt); + + return Changed; + } + + // Hoist all expressions. Return true when code has been hoisted under Dom. + bool hoistExpressions(DomTreeNodeBase *Dom) { + // Depth first search for the leaves of the dominator tree. We start + // hoisting expressions from the bottom up because that would allow some + // expressions to be hoisted several times. + for (auto *BB : *Dom) + hoistExpressions(BB); + + BasicBlock *BB = Dom->getBlock(); + // Only handle two branches for now: it is possible to extend the hoisting + // to switch statements. + BranchInst *BI = dyn_cast(BB->getTerminator()); + if (!BI || BI->getNumSuccessors() != 2) + return false; + + BasicBlock *BB1 = BI->getSuccessor(0); + BasicBlock *BB2 = BI->getSuccessor(1); + assert(BB1 != BB2 && "invalid CFG"); + + if (!DT->properlyDominates(BB, BB1) || + !DT->properlyDominates(BB, BB2) || + BB1->isEHPad() || BB1->hasAddressTaken() || + BB2->isEHPad() || BB2->hasAddressTaken()) + return false; + + bool Changed, Res = false; + int Depth = HoistMaxDepthDependence; + do { + // Limit to HoistMaxDepthDependence the number of iterations in order to + // avoid O(N^2) behavior: dependent instructions are hoisted one at a time + // in subsequent iterations of this loop. + if (Depth == 0) + break; + if (Depth != -1) + --Depth; + + Changed = hoistScalars(BB->getTerminator(), BB1, BB2); + if (hoistLoads(BB->getTerminator(), BB1, BB2)) { + // Clear the value number table as otherwise the scalar computations + // depending on the loads would not get value numbered again based on the + // hoisted loads. + VN.clear(); + Changed = true; + } + + if (Changed) + Res = true; + } while (Changed); + + return Res; + } + + bool run() { + VN.setDomTree(DT); + VN.setAliasAnalysis(AA); + VN.setMemDep(MD); + hoistExpressions(DT->getNode(DT->getRoot())); + return false; + } +}; + +class EarlyGVNLegacyPass : public FunctionPass { +public: + static char ID; + + EarlyGVNLegacyPass() : FunctionPass(ID) { + initializeEarlyGVNLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipOptnoneFunction(F)) + return false; + + auto &DT = getAnalysis().getDomTree(); + auto &AA = getAnalysis().getAAResults(); + auto &MD = getAnalysis().getMemDep(); + + EarlyGVNLegacyPassImpl G (&DT, &AA, &MD); + return G.run(); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + + AU.addPreserved(); + } +}; +} // namespace + +PreservedAnalyses +EarlyGVNPass::run(Function &F, + AnalysisManager &AM) { + DominatorTree &DT = AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + MemoryDependenceResults &MD = AM.getResult(F); + + EarlyGVNLegacyPassImpl G (&DT, &AA, &MD); + if (!G.run()) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +char EarlyGVNLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(EarlyGVNLegacyPass, "early-gvn", "Early GVN Hoisting of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(EarlyGVNLegacyPass, "early-gvn", "Early GVN Hoisting of Expressions", false, false) + +FunctionPass *llvm::createEarlyGVNPass() { return new EarlyGVNLegacyPass(); } Index: llvm/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalar.cpp +++ llvm/lib/Transforms/Scalar/Scalar.cpp @@ -43,6 +43,7 @@ initializeDSEPass(Registry); initializeGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); + initializeEarlyGVNLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyPass(Registry); @@ -236,6 +237,10 @@ unwrap(PM)->add(createEarlyCSEPass()); } +void LLVMAddEarlyGVNLegacyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createEarlyGVNPass()); +} + void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createTypeBasedAAWrapperPass()); } Index: llvm/test/Transforms/GVN/hoist.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/hoist.ll @@ -0,0 +1,246 @@ +; RUN: opt -early-gvn -hoist-max-depth-dependence=1 -S < %s | FileCheck --check-prefix=DEP1 %s +; RUN: opt -early-gvn -hoist-max-depth-dependence=2 -S < %s | FileCheck --check-prefix=DEP2 %s +; RUN: opt -early-gvn -hoist-max-depth-dependence=3 -S < %s | FileCheck --check-prefix=DEP3 %s +; RUN: opt -early-gvn -hoist-max-depth-dependence=4 -S < %s | FileCheck --check-prefix=DEP4 %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; After hoisting the expressions from the branches we should only have 3 loads, +; 2 fsub, 2 fmul, instead of 6 loads, 4 fsub, 4 fmul. +; +; DEP1-LABEL: @memgvn +; DEP2-LABEL: @memgvn +; DEP3-LABEL: @memgvn +; DEP4-LABEL: @memgvn +; DEP3: load +; DEP3: load +; DEP3: load +; DEP3: fsub +; DEP3: fsub +; DEP3: fmul +; DEP3: fmul +; DEP3-NOT: load +; DEP3-NOT: fmul +; DEP3-NOT: fsub + +define float @memgvn(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do not hoist loads after a store: the first two loads will be +; hoisted, and then the third load will not be hoisted. +; +; DEP1-LABEL: @readsAndWrites +; DEP2-LABEL: @readsAndWrites +; DEP3-LABEL: @readsAndWrites +; DEP4-LABEL: @readsAndWrites +; DEP3: load +; DEP3: load +; DEP3: fsub +; DEP3: fmul +; DEP3: store +; DEP3: load +; DEP3: fsub +; DEP3: fmul +; DEP3: load +; DEP3: fsub +; DEP3: fmul +; DEP3-NOT: load +; DEP3-NOT: fmul +; DEP3-NOT: fsub + +@G = internal global float 1.000000e+00 + +define float @readsAndWrites(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + store float %0, float* @G + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we can hoist all independent expressions in one iteration. +; DEP1-LABEL: @dependenceChain1 +; DEP2-LABEL: @dependenceChain1 +; DEP3-LABEL: @dependenceChain1 +; DEP4-LABEL: @dependenceChain1 +; DEP1: fadd +; DEP1: fsub +; DEP1: fdiv +; DEP1: fmul +; DEP1-NOT: fsub +; DEP1-NOT: fdiv +; DEP1-NOT: fmul +define float @dependenceChain1(float %a, float %b, i1 %c) { +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %d = fadd float %b, %a + %e = fsub float %b, %a + %f = fdiv float %b, %a + %g = fmul float %b, %a + br label %if.end + +if.else: + %i = fadd float %b, %a + %h = fsub float %b, %a + %j = fdiv float %b, %a + %k = fmul float %b, %a + br label %if.end + +if.end: + %p = phi float [ %d, %if.then ], [ %i, %if.else ] + %q = phi float [ %e, %if.then ], [ %h, %if.else ] + %r = phi float [ %f, %if.then ], [ %j, %if.else ] + %s = phi float [ %g, %if.then ], [ %k, %if.else ] + %t = fadd float %p, %q + %u = fadd float %r, %s + %v = fadd float %t, %u + ret float %v +} + +; After hoisting the expressions from the branches we should only have 2 fsub and 2 fmul +; instead of 4 fsub and 4 fmul. +; +; DEP1-LABEL: @scalars +; DEP2-LABEL: @scalars +; DEP3-LABEL: @scalars +; DEP4-LABEL: @scalars +; DEP2: fsub +; DEP2: fsub +; DEP2: fmul +; DEP2: fmul +; DEP2-NOT: fmul +; DEP2-NOT: fsub +define float @scalars(float %d, float %min, float %max, float %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %sub = fsub float %min, %a + %mul = fmul float %sub, %div + %sub1 = fsub float %max, %a + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %sub3 = fsub float %max, %a + %mul4 = fmul float %sub3, %div + %sub5 = fsub float %min, %a + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check whether the flag -hoist-max-depth-dependence works: at depth 4 we should +; hoist all 4 expressions, whereas at depth 3 we should see the last fmul +; instruction twice. + +; DEP1-LABEL: @dependenceChain4 +; DEP2-LABEL: @dependenceChain4 +; DEP3-LABEL: @dependenceChain4 +; DEP4-LABEL: @dependenceChain4 +; DEP4: fsub +; DEP4: fadd +; DEP4: fdiv +; DEP4: fmul +; DEP4-NOT: fsub +; DEP4-NOT: fadd +; DEP4-NOT: fdiv +; DEP4-NOT: fmul + +; DEP3: fsub +; DEP3: fadd +; DEP3: fdiv +; DEP3: fmul +; DEP3: fmul +; DEP3-NOT: fsub +; DEP3-NOT: fadd +; DEP3-NOT: fdiv +; DEP3-NOT: fmul +define float @dependenceChain4(float %a, float %b, i1 %c) { +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %d = fsub float %b, %a + %e = fadd float %d, %a + %f = fdiv float %e, %a + %g = fmul float %f, %a + br label %if.end + +if.else: + %h = fsub float %b, %a + %i = fadd float %h, %a + %j = fdiv float %i, %a + %k = fmul float %j, %a + br label %if.end + +if.end: + %r = phi float [ %g, %if.then ], [ %k, %if.else ] + ret float %r +}