Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -195,10 +196,12 @@ /// Update the appropriate Phi nodes as we do so. void SplitExitEdges(Loop *L, const SmallVectorImpl &ExitBlocks); - bool UnswitchIfProfitable(Value *LoopCond, Constant *Val); + bool UnswitchIfProfitable(Value *LoopCond, Constant *Val, + TerminatorInst *TI = nullptr); void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, - BasicBlock *ExitBlock); - void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L); + BasicBlock *ExitBlock, TerminatorInst *TI); + void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, + TerminatorInst *TI); void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool isEqual); @@ -206,7 +209,8 @@ void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, - Instruction *InsertPt); + Instruction *InsertPt, + TerminatorInst *TI); void SimplifyCode(std::vector &Worklist, Loop *L); bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr, @@ -453,8 +457,8 @@ // unswitch on it if we desire. Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed); - if (LoopCond && UnswitchIfProfitable(LoopCond, - ConstantInt::getTrue(Context))) { + if (LoopCond && + UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { ++NumBranches; return true; } @@ -643,7 +647,8 @@ /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when /// LoopCond == Val to simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. -bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { +bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val, + TerminatorInst *TI) { Function *F = loopHeader->getParent(); Constant *CondVal = nullptr; BasicBlock *ExitBlock = nullptr; @@ -651,7 +656,7 @@ if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { // If the condition is trivial, always unswitch. There is no code growth // for this case. - UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock); + UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI); return true; } @@ -661,7 +666,7 @@ if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize)) return false; - UnswitchNontrivialCondition(LoopCond, Val, currentLoop); + UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI); return true; } @@ -685,25 +690,65 @@ return New; } +static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst, + bool Swapped) { + if (!SrcInst || !SrcInst->hasMetadata()) + return; + + SmallVector, 4> MDs; + SrcInst->getAllMetadata(MDs); + for (auto &MD : MDs) { + switch (MD.first) { + default: + break; + case LLVMContext::MD_prof: + if (Swapped && MD.second->getNumOperands() == 3 && + isa(MD.second->getOperand(0))) { + MDString *MDName = cast(MD.second->getOperand(0)); + if (MDName->getString() == "branch_weights") { + auto *ValT = cast_or_null( + MD.second->getOperand(1))->getValue(); + auto *ValF = cast_or_null( + MD.second->getOperand(2))->getValue(); + assert(ValT && ValF && "Invalid Operands of branch_weights"); + auto NewMD = + MDBuilder(DstInst->getParent()->getContext()) + .createBranchWeights(cast(ValF)->getZExtValue(), + cast(ValT)->getZExtValue()); + MD.second = NewMD; + } + } + // fallthrough. + case LLVMContext::MD_dbg: + DstInst->setMetadata(MD.first, MD.second); + } + } +} + /// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values /// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the /// code immediately before InsertPt. void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, - Instruction *InsertPt) { + Instruction *InsertPt, + TerminatorInst *TI) { // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; + bool Swapped = false; if (!isa(Val) || Val->getType() != Type::getInt1Ty(LIC->getContext())) BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val); - else if (Val != ConstantInt::getTrue(Val->getContext())) + else if (Val != ConstantInt::getTrue(Val->getContext())) { // We want to enter the new loop when the condition is true. std::swap(TrueDest, FalseDest); + Swapped = true; + } // Insert the new branch. BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); + copyMetadata(BI, TI, Swapped); // If either edge is critical, split it. This helps preserve LoopSimplify // form for enclosing loops. @@ -717,13 +762,14 @@ /// where the path through the loop that doesn't execute its body has no /// side-effects), unswitch it. This doesn't involve any code duplication, just /// moving the conditional branch outside of the loop and updating loop info. -void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, - Constant *Val, - BasicBlock *ExitBlock) { +void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, + BasicBlock *ExitBlock, + TerminatorInst *TI) { DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" - << loopHeader->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " << L->getHeader()->getParent()->getName() - << " on cond: " << *Val << " == " << *Cond << "\n"); + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " + << L->getHeader()->getParent()->getName() << " on cond: " << *Val + << " == " << *Cond << "\n"); // First step, split the preheader, so that we know that there is a safe place // to insert the conditional branch. We will change loopPreheader to have a @@ -744,7 +790,7 @@ // Okay, now we have a position to branch from and a position to branch to, // insert the new conditional branch. EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, - loopPreheader->getTerminator()); + loopPreheader->getTerminator(), TI); LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L); loopPreheader->getTerminator()->eraseFromParent(); @@ -780,7 +826,7 @@ /// to unswitch when LIC equal Val. Split it into loop versions and test the /// condition outside of either loop. Return the loops created as Out1/Out2. void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, - Loop *L) { + Loop *L, TerminatorInst *TI) { Function *F = loopHeader->getParent(); DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" << loopHeader->getName() << " [" << L->getBlocks().size() @@ -897,7 +943,8 @@ "Preheader splitting did not work correctly!"); // Emit the new branch that selects between the two versions of this loop. - EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR); + EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, + TI); LPM->deleteSimpleAnalysisValue(OldBR, L); OldBR->eraseFromParent(); Index: test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll @@ -0,0 +1,77 @@ +;RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s + +define i32 @foo(i32 %a, i32 %b) { +;CHECK-LABEL: foo +entry: + br label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %cmp0 = icmp sgt i32 %b, 0 + br i1 %cmp0, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %for.inc, %for.body.lr.ph + %inc.i = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %mul.i = phi i32 [ 3, %for.body.lr.ph ], [ %mul.p, %for.inc ] + %add.i = phi i32 [ %a, %for.body.lr.ph ], [ %add.p, %for.inc ] + %cmp1 = icmp eq i32 %a, 12345 + br i1 %cmp1, label %if.then, label %if.else, !prof !0 +; CHECK: %cmp1 = icmp eq i32 %a, 12345 +; CHECK-NEXT: br i1 %cmp1, label %if.then.us, label %if.else, !prof !0 +if.then: ; preds = %for.body +; CHECK: if.then.us: +; CHECK: add nsw i32 %{{.*}}, 123 +; CHECK: %exitcond.us = icmp eq i32 %inc.us, %b +; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %if.then.us + %add = add nsw i32 %add.i, 123 + br label %for.inc + +if.else: ; preds = %for.body + %mul = mul nsw i32 %mul.i, %b + br label %for.inc +; CHECK: if.else: +; CHECK: %mul = mul nsw i32 %mul.i, %b +; CHECK: %inc = add nuw nsw i32 %inc.i, 1 +; CHECK: %exitcond = icmp eq i32 %inc, %b +; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %if.else +for.inc: ; preds = %if.then, %if.else + %mul.p = phi i32 [ %b, %if.then ], [ %mul, %if.else ] + %add.p = phi i32 [ %add, %if.then ], [ %a, %if.else ] + %inc = add nuw nsw i32 %inc.i, 1 + %exitcond = icmp eq i32 %inc, %b + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.inc, %for.body.lr.ph + %t2 = phi i32 [ %b, %for.body.lr.ph ], [ %mul.p, %for.inc ] + %t1 = phi i32 [ %a, %for.body.lr.ph ], [ %add.p, %for.inc ] + %add3 = add nsw i32 %t2, %t1 + ret i32 %add3 +} + +define void @foo_swapped(i32 %a, i32 %b) { +;CHECK-LABEL: foo_swapped +entry: + br label %for.body +;CHECK: entry: +;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2 +;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1 +;CHECK: for.body: +for.body: ; preds = %for.inc, %entry + %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ] + %add.i = phi i32 [ 100, %entry ], [ %add, %if.then ] + %inc = add nuw nsw i32 %inc.i, 1 + %cmp1 = icmp eq i32 1, 2 + br i1 %cmp1, label %if.then, label %for.cond.cleanup, !prof !0 + +if.then: ; preds = %for.body + %add = add nsw i32 %a, %add.i + + %exitcond = icmp eq i32 %inc, %b + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.inc, %for.body.lr.ph, %for.body + ret void +} +!0 = !{!"branch_weights", i32 64, i32 4} + +;CHECK: !0 = !{!"branch_weights", i32 64, i32 4} +;CHECK: !1 = !{!"branch_weights", i32 4, i32 64}