diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -56,6 +56,7 @@
 FunctionPass *createSIWholeQuadModePass();
 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
+FunctionPass *createSIOptimizeVGPRLiveRangePass();
 FunctionPass *createSIFixSGPRCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
 FunctionPass *createSIInsertWaitcntsPass();
@@ -288,6 +289,9 @@
 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
 extern char &SIOptimizeExecMaskingPreRAID;
 
+void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
+extern char &SIOptimizeVGPRLiveRangeID;
+
 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
 extern char &AMDGPUAnnotateUniformValuesPassID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -162,6 +162,11 @@
   cl::init(true),
   cl::Hidden);
 
+static cl::opt<bool> OptVGPRLiveRange(
+    "amdgpu-opt-vgpr-liverange",
+    cl::desc("Enable VGPR liverange optimizations for if-else structure"),
+    cl::init(false), cl::Hidden);
+
 // Enable atomic optimization
 static cl::opt<bool> EnableAtomicOptimizations(
   "amdgpu-atomic-optimizations",
@@ -220,6 +225,7 @@
   initializeSIPeepholeSDWAPass(*PR);
   initializeSIShrinkInstructionsPass(*PR);
   initializeSIOptimizeExecMaskingPreRAPass(*PR);
+  initializeSIOptimizeVGPRLiveRangePass(*PR);
   initializeSILoadStoreOptimizerPass(*PR);
   initializeAMDGPUFixFunctionBitcastsPass(*PR);
   initializeAMDGPUAlwaysInlinePass(*PR);
@@ -1169,6 +1175,8 @@
     insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
   insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
 
+  if (OptVGPRLiveRange)
+    insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID);
   // This must be run immediately after phi elimination and before
   // TwoAddressInstructions, otherwise the processing of the tied operand of
   // SI_ELSE will introduce a copy of the tied operand source after the else.
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -131,6 +131,7 @@
   SIMemoryLegalizer.cpp
   SIOptimizeExecMasking.cpp
   SIOptimizeExecMaskingPreRA.cpp
+  SIOptimizeVGPRLiveRange.cpp
   SIPeepholeSDWA.cpp
   SIPostRABundler.cpp
   SIPreEmitPeephole.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -0,0 +1,497 @@
+//===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass tries to optimize off unnecessary VGPR live range in divergent
+/// if-else structure.
+///
+/// When we do structurization, we usually transform a if-else into two
+/// sucessive if-then (with a flow block to do predicate inversion). Consider a
+/// simple case after structurization: A divergent value %a was defined before
+/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
+///    bb.if:
+///      %a = ...
+///      ...
+///    bb.then:
+///      ... = op %a
+///      ... // %a can be dead here
+///    bb.flow:
+///      ...
+///    bb.else:
+///      ... = %a
+///      ...
+///    bb.endif
+///
+///  As LLVM has no idea of the thread-control-flow, it will just assume
+///  %a would be alive in the whole range of bb.then because of a later use in
+///  bb.else. On AMDGPU architecture, the VGPR was accessed with respect to exec
+///  mask. For this if-else case, the lanes active in bb.then will be inactive
+///  in bb.else, and vice-verse. So we are safe to say that %a was dead after
+///  the last use in bb.then untill the end of the block. The reason is the
+///  instructions in bb.then will only overwrite lanes that will never be
+///  accessed in bb.else.
+///
+///  This pass aims to to tell LLVM that %a is in-fact dead, through inserting
+///  a phi-node in bb.flow saying that %a is undef when coming from bb.then,
+///  and then replace the uses in the bb.else with the result of newly
+///  inserted phi.
+///
+///  Two key conditions must be met to ensure correctness:
+///  1.) The def-point should be in the same loop-level as if-else-endif to make
+///      sure the second loop iteration still get correct data.
+///  2.) There should be no further uses after the IF-ELSE region.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-opt-vgpr-liverange"
+
+namespace {
+
+class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
+private:
+  const SIRegisterInfo *TRI = nullptr;
+  const SIInstrInfo *TII = nullptr;
+  LiveVariables *LV;
+  MachineDominatorTree *MDT = nullptr;
+  const MachineLoopInfo *Loops = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+
+public:
+  static char ID;
+
+  MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
+
+  void collectElseRegionBlocks(MachineBasicBlock *Flow,
+                               MachineBasicBlock *Endif,
+                               SmallVectorImpl<MachineBasicBlock *> &) const;
+
+  void
+  collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
+                            MachineBasicBlock *Endif,
+                            SmallVectorImpl<MachineBasicBlock *> &ElseBlocks,
+                            SmallVectorImpl<Register> &CandidateRegs) const;
+
+  void FindNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
+                             SmallVectorImpl<MachineInstr *> &Uses) const;
+
+  void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
+                                   MachineBasicBlock *Flow) const;
+
+  void updateLiveRangeInElseRegion(
+      Register Reg, Register NewReg, MachineBasicBlock *Flow,
+      MachineBasicBlock *Endif,
+      SmallVectorImpl<MachineBasicBlock *> &ElseBlocks) const;
+
+  void
+  optimizeLiveRange(Register Reg, MachineBasicBlock *If,
+                    MachineBasicBlock *Flow, MachineBasicBlock *Endif,
+                    SmallVectorImpl<MachineBasicBlock *> &ElseBlocks) const;
+
+  SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "SI Optimize VGPR LiveRange";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<LiveVariables>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addPreserved<MachineLoopInfo>();
+    AU.addPreserved<LiveVariables>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
+  }
+};
+
+} // end anonymous namespace
+
+// Check whether the MBB is a else flow block and get the branching target which
+// is the Endif block
+MachineBasicBlock *
+SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
+  for (auto &br : MBB->terminators()) {
+    if (br.getOpcode() == AMDGPU::SI_ELSE) {
+      return br.getOperand(2).getMBB();
+    }
+  }
+  return nullptr;
+}
+
+void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
+    MachineBasicBlock *Flow, MachineBasicBlock *Endif,
+    SmallVectorImpl<MachineBasicBlock *> &Blocks) const {
+  assert(Flow != Endif);
+
+  MachineBasicBlock *MBB = Endif;
+  unsigned Cur = 0;
+  while (MBB != nullptr) {
+    for (auto *Pred : MBB->predecessors())
+      if (Pred != Flow && llvm::find(Blocks, Pred) == Blocks.end())
+        Blocks.push_back(Pred);
+
+    if (Cur < Blocks.size()) {
+      MBB = Blocks[Cur++];
+    } else
+      MBB = nullptr;
+  }
+
+  LLVM_DEBUG(dbgs() << "Found Else blocks:");
+  for (auto *MBB : Blocks) {
+    LLVM_DEBUG(dbgs() << " bb." << MBB->getNumber());
+  }
+  LLVM_DEBUG(dbgs() << "\n");
+}
+
+/// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
+void SIOptimizeVGPRLiveRange::FindNonPHIUsesInBlock(
+    Register Reg, MachineBasicBlock *MBB,
+    SmallVectorImpl<MachineInstr *> &Uses) const {
+  for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
+       ++I) {
+    auto *UseMI = I->getParent();
+    if (UseMI->getParent() == MBB && !UseMI->isPHI())
+      Uses.push_back(UseMI);
+  }
+}
+
+/// Collect the killed registers in the ELSE region which are not alive through
+/// the whole THEN region.
+void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
+    MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
+    SmallVectorImpl<MachineBasicBlock *> &ElseBlocks,
+    SmallVectorImpl<Register> &CandidateRegs) const {
+
+  SmallSet<Register, 8> KillsInElse;
+
+  for (auto *Else : ElseBlocks) {
+    for (auto &MI : Else->instrs()) {
+      if (MI.isDebugInstr())
+        continue;
+      unsigned NumOps = MI.getNumOperands();
+      for (unsigned Op = 0; Op < NumOps; ++Op) {
+        MachineOperand &MO = MI.getOperand(Op);
+        if (!MO.isReg() || MO.getReg() == 0 || MO.isDef())
+          continue;
+
+        Register MOReg = MO.getReg();
+        // We can only optimize VGPR virtual register
+        if (MOReg.isPhysical() || !TRI->isVGPR(*MRI, MOReg))
+          continue;
+
+        if (MO.isKill() && MO.readsReg()) {
+          LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
+          const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
+          // Make sure two conditions are met:
+          // a.) the value is defined before/in the IF block
+          // b.) should be defined in the same loop-level.
+          if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
+              Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
+            KillsInElse.insert(MOReg);
+        }
+      }
+    }
+  }
+
+  // Check the phis in the Endif, looking for value coming from the ELSE
+  // region. Make sure the phi-use is the last use.
+  for (auto &MI : Endif->phis()) {
+    for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
+      auto &MO = MI.getOperand(Idx);
+      auto *Pred = MI.getOperand(Idx + 1).getMBB();
+      if (Pred == Flow)
+        continue;
+
+      if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef())
+        continue;
+      Register Reg = MO.getReg();
+
+      LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+      const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
+
+      if (Reg.isPhysical() || !TRI->isVGPR(*MRI, Reg))
+        continue;
+
+      if (VI.isLiveIn(*Endif, Reg, *MRI)) {
+        LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
+                          << " as Live in Endif\n");
+        continue;
+      }
+      // Make sure two conditions are met:
+      // a.) the value is defined before/in the IF block
+      // b.) should be defined in the same loop-level.
+      if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
+          Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
+        KillsInElse.insert(Reg);
+    }
+  }
+
+  auto IsLiveThroughThen = [&](Register Reg) {
+    for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
+         ++I) {
+      if (!I->readsReg())
+        continue;
+      auto *UseMI = I->getParent();
+      auto *UseMBB = UseMI->getParent();
+      if (UseMBB == Flow || UseMBB == Endif) {
+        if (!UseMI->isPHI())
+          return true;
+
+        auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
+        // The register is live through the path If->Flow or Flow->Endif.
+        // we should not optimize for such cases.
+        if ((UseMBB == Flow && IncomingMBB != If) ||
+            (UseMBB == Endif && IncomingMBB == Flow))
+          return true;
+      }
+    }
+    return false;
+  };
+
+  for (auto Reg : KillsInElse)
+    if (!IsLiveThroughThen(Reg))
+      CandidateRegs.push_back(Reg);
+}
+
+// Re-calculate the liveness of \p Reg in the THEN-region
+void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
+    Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
+
+  SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming;
+
+  MachineBasicBlock *ThenEntry = nullptr;
+  for (auto *Succ : If->successors()) {
+    if (Succ != Flow)
+      ThenEntry = Succ;
+  }
+
+  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
+  df_iterator_default_set<MachineBasicBlock *, 16> Visited;
+
+  for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
+    if (MBB == Flow)
+      break;
+
+    // Clear Live bit, as we will recalculate afterwards
+    LLVM_DEBUG(dbgs() << "Clear AliveBlock bb." << MBB->getNumber() << "\n");
+    OldVarInfo.AliveBlocks.reset(MBB->getNumber());
+  }
+
+  // Get the blocks the Reg should be alive through
+  for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
+       ++I) {
+    auto *UseMI = I->getParent();
+    if (UseMI->isPHI() && I->readsReg()) {
+      if (Visited.contains(UseMI->getParent()))
+        PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
+    }
+  }
+
+  Visited.clear();
+
+  for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
+    if (MBB == Flow)
+      break;
+
+    SmallVector<MachineInstr *, 8> Uses;
+    // PHI instructions has been processed before.
+    FindNonPHIUsesInBlock(Reg, MBB, Uses);
+
+    if (Uses.size() == 1) {
+      LLVM_DEBUG(dbgs() << "Found one Non-PHI use in bb." << MBB->getNumber()
+                        << "\n");
+      LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
+    } else if (Uses.size() > 1) {
+      // Process the instructions in-order
+      LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in bb."
+                        << MBB->getNumber() << "\n");
+      for (MachineInstr &MI : *MBB) {
+        if (llvm::find(Uses, &MI) != Uses.end()) {
+          LV->HandleVirtRegUse(Reg, MBB, MI);
+        }
+      }
+    }
+
+    // Mark Reg alive through the block if this is a PHI incoming block
+    if (PHIIncoming.contains(MBB))
+      LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
+                                  MBB);
+  }
+
+  // Set the isKilled flag if we get new Kills in the THEN region.
+  for (auto *MI : OldVarInfo.Kills)
+    if (llvm::find(Visited, MI->getParent()) != Visited.end())
+      MI->addRegisterKilled(Reg, TRI);
+}
+
+void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
+    Register Reg, Register NewReg, MachineBasicBlock *Flow,
+    MachineBasicBlock *Endif,
+    SmallVectorImpl<MachineBasicBlock *> &ElseBlocks) const {
+  LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
+  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
+
+  // Transfer aliveBlocks from Reg to NewReg
+  for (auto *MBB : ElseBlocks) {
+    unsigned BBNum = MBB->getNumber();
+    if (OldVarInfo.AliveBlocks.test(BBNum)) {
+      NewVarInfo.AliveBlocks.set(BBNum);
+      LLVM_DEBUG(dbgs() << "Removing ALiveBlock bb." << BBNum << "\n");
+      OldVarInfo.AliveBlocks.reset(BBNum);
+    }
+  }
+
+  // Transfer the possible Kills in ElseBlocks from Reg to NewReg
+  std::vector<MachineInstr *>::iterator I = OldVarInfo.Kills.begin();
+  for (; I != OldVarInfo.Kills.end();) {
+    auto *KillBB = (*I)->getParent();
+    auto It = llvm::find(ElseBlocks, KillBB);
+
+    if (It != ElseBlocks.end()) {
+      NewVarInfo.Kills.push_back(*I);
+      I = OldVarInfo.Kills.erase(I);
+    } else {
+      ++I;
+    }
+  }
+}
+
+void SIOptimizeVGPRLiveRange::optimizeLiveRange(
+    Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
+    MachineBasicBlock *Endif,
+    SmallVectorImpl<MachineBasicBlock *> &ElseBlocks) const {
+  // Insert a new PHI, marking the value from the THEN region being
+  // undef.
+  LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << "\n");
+  auto *RC = MRI->getRegClass(Reg);
+  Register NewReg = MRI->createVirtualRegister(RC);
+  Register UndefReg = MRI->createVirtualRegister(RC);
+  MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
+                                    TII->get(TargetOpcode::PHI), NewReg);
+  for (auto *Pred : Flow->predecessors()) {
+    if (Pred == If)
+      PHI.addReg(Reg).addMBB(Pred);
+    else
+      PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
+  }
+
+  // Replace all uses in the ELSE region or the PHIs in ENDIF block
+  for (auto I = MRI->use_begin(Reg), E = MRI->use_end(); I != E;) {
+    MachineOperand &O = *I;
+    // This is a little bit tricky, the setReg() will update the linked list,
+    // so we have to increment the iterator before setReg() to avoid skipping
+    // some uses.
+    ++I;
+    auto *UseMI = O.getParent();
+    auto *UseBlock = UseMI->getParent();
+    // Replace uses in Endif block
+    if (UseBlock == Endif) {
+      assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
+      O.setReg(NewReg);
+      continue;
+    }
+
+    // Replace uses in Else region
+    auto It = llvm::find(ElseBlocks, UseBlock);
+    if (It != ElseBlocks.end()) {
+      O.setReg(NewReg);
+    }
+  }
+
+  // The optimized Reg is not alive through Flow blocks anymore.
+  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
+  OldVarInfo.AliveBlocks.reset(Flow->getNumber());
+
+  updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
+  updateLiveRangeInThenRegion(Reg, If, Flow);
+}
+
+char SIOptimizeVGPRLiveRange::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
+                      "SI Optimize VGPR LiveRange", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
+                    "SI Optimize VGPR LiveRange", false, false)
+
+char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
+
+FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
+  return new SIOptimizeVGPRLiveRange();
+}
+
+bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  TII = ST.getInstrInfo();
+  TRI = &TII->getRegisterInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  Loops = &getAnalysis<MachineLoopInfo>();
+  LV = &getAnalysis<LiveVariables>();
+  MRI = &MF.getRegInfo();
+
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  bool MadeChange = false;
+
+  // TODO: we need to think about the order of visiting the blocks to get
+  // optimal result for nesting if-else cases.
+  for (MachineBasicBlock &MBB : MF) {
+    for (auto &MI : MBB.terminators()) {
+      // Detect the if-else blocks
+      if (MI.getOpcode() == AMDGPU::SI_IF) {
+        MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
+        auto *Endif = getElseTarget(IfTarget);
+        if (!Endif)
+          continue;
+
+        SmallVector<MachineBasicBlock *, 8> ElseBlocks;
+        SmallVector<Register, 8> CandidateRegs;
+
+        LLVM_DEBUG(dbgs() << "Checking IF-FLOW-ENDIF: bb." << MBB.getNumber()
+                          << " bb." << IfTarget->getNumber() << " bb."
+                          << Endif->getNumber() << "\n");
+
+        // Collect all the blocks in the ELSE region
+        collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
+
+        // Collect the registers can be optimized
+        collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
+                                  CandidateRegs);
+        MadeChange |= !CandidateRegs.empty();
+        // Now we are safe to optimize.
+        for (auto Reg : CandidateRegs)
+          optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
+      }
+    }
+  }
+
+  return MadeChange;
+}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-opt-vgpr-liverange=true -stop-after=si-opt-vgpr-liverange -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; a normal if-else
+define amdgpu_ps float @else1(i32 %z, float %v) #0 {
+  ; SI-LABEL: name: else1
+  ; SI: bb.0.main_body:
+  ; SI:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
+  ; SI:   liveins: $vgpr0, $vgpr1
+  ; SI:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
+  ; SI:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec
+  ; SI:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.3
+  ; SI: bb.1.Flow:
+  ; SI:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
+  ; SI:   [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %13:vgpr_32, %bb.0, %4, %bb.3
+  ; SI:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, undef %15:vgpr_32, %bb.3
+  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.2
+  ; SI: bb.2.if:
+  ; SI:   successors: %bb.4(0x80000000)
+  ; SI:   %3:vgpr_32 = nofpexcept V_ADD_F32_e32 killed [[PHI1]], [[PHI1]], implicit $mode, implicit $exec
+  ; SI:   S_BRANCH %bb.4
+  ; SI: bb.3.else:
+  ; SI:   successors: %bb.1(0x80000000)
+  ; SI:   %4:vgpr_32 = nofpexcept V_MUL_F32_e32 1077936128, killed [[COPY]], implicit $mode, implicit $exec
+  ; SI:   S_BRANCH %bb.1
+  ; SI: bb.4.end:
+  ; SI:   [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, %3, %bb.2
+  ; SI:   SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   $vgpr0 = COPY killed [[PHI2]]
+  ; SI:   SI_RETURN_TO_EPILOG killed $vgpr0
+main_body:
+  %cc = icmp sgt i32 %z, 5
+  br i1 %cc, label %if, label %else
+
+if:
+  %v.if = fmul float %v, 2.0
+  br label %end
+
+else:
+  %v.else = fmul float %v, 3.0
+  br label %end
+
+end:
+  %r = phi float [ %v.if, %if ], [ %v.else, %else ]
+  ret float %r
+}
+
+
+; %v was used after if-else
+define amdgpu_ps float @else2(i32 %z, float %v) #0 {
+  ; SI-LABEL: name: else2
+  ; SI: bb.0.main_body:
+  ; SI:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
+  ; SI:   liveins: $vgpr0, $vgpr1
+  ; SI:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
+  ; SI:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec
+  ; SI:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.3
+  ; SI: bb.1.Flow:
+  ; SI:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
+  ; SI:   [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %15:vgpr_32, %bb.0, %4, %bb.3
+  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.2
+  ; SI: bb.2.if:
+  ; SI:   successors: %bb.4(0x80000000)
+  ; SI:   %3:vgpr_32 = nofpexcept V_ADD_F32_e32 killed [[COPY]], [[COPY]], implicit $mode, implicit $exec
+  ; SI:   S_BRANCH %bb.4
+  ; SI: bb.3.else:
+  ; SI:   successors: %bb.1(0x80000000)
+  ; SI:   %4:vgpr_32 = nofpexcept V_MUL_F32_e32 1077936128, [[COPY]], implicit $mode, implicit $exec
+  ; SI:   S_BRANCH %bb.1
+  ; SI: bb.4.end:
+  ; SI:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.1, %3, %bb.2
+  ; SI:   [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, %3, %bb.2
+  ; SI:   SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   %14:vgpr_32 = nofpexcept V_ADD_F32_e32 killed [[PHI1]], killed [[PHI2]], implicit $mode, implicit $exec
+  ; SI:   $vgpr0 = COPY killed %14
+  ; SI:   SI_RETURN_TO_EPILOG killed $vgpr0
+main_body:
+  %cc = icmp sgt i32 %z, 5
+  br i1 %cc, label %if, label %else
+
+if:
+  %v.if = fmul float %v, 2.0
+  br label %end
+
+else:
+  %v.else = fmul float %v, 3.0
+  br label %end
+
+end:
+  %r0 = phi float [ %v.if, %if ], [ %v, %else ]
+  %r1 = phi float [ %v.if, %if ], [ %v.else, %else ]
+  %r2 = fadd float %r0, %r1
+  ret float %r2
+}
+
+; if-else inside loop, %x can be optimized, but %v cannot be.
+define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
+  ; SI-LABEL: name: else3
+  ; SI: bb.0.entry:
+  ; SI:   successors: %bb.1(0x80000000)
+  ; SI:   liveins: $vgpr0, $vgpr1, $sgpr0, $vgpr2
+  ; SI:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2
+  ; SI:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY killed $sgpr0
+  ; SI:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
+  ; SI:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 6, killed [[COPY3]], implicit $exec
+  ; SI:   %1:vgpr_32 = nofpexcept V_MUL_F32_e32 1077936128, [[COPY2]], implicit $mode, implicit $exec
+  ; SI:   %2:vgpr_32 = nofpexcept V_ADD_F32_e32 killed [[COPY2]], [[COPY2]], implicit $mode, implicit $exec
+  ; SI:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; SI: bb.1.for.body:
+  ; SI:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
+  ; SI:   [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %13, %bb.5
+  ; SI:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %12, %bb.5
+  ; SI:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_GT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.4
+  ; SI: bb.2.Flow:
+  ; SI:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
+  ; SI:   [[PHI2:%[0-9]+]]:vgpr_32 = PHI undef %35:vgpr_32, %bb.1, %9, %bb.4
+  ; SI:   [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, undef %38:vgpr_32, %bb.4
+  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.3
+  ; SI: bb.3.if:
+  ; SI:   successors: %bb.5(0x80000000)
+  ; SI:   %8:vgpr_32, dead %31:sreg_64 = V_ADD_CO_U32_e64 1, killed [[PHI3]], 0, implicit $exec
+  ; SI:   S_BRANCH %bb.5
+  ; SI: bb.4.else:
+  ; SI:   successors: %bb.2(0x80000000)
+  ; SI:   [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[PHI1]], 3, implicit $exec
+  ; SI:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[V_MUL_LO_U32_e64_]]
+  ; SI:   S_BRANCH %bb.2
+  ; SI: bb.5.if.end:
+  ; SI:   successors: %bb.6(0x04000000), %bb.1(0x7c000000)
+  ; SI:   [[PHI4:%[0-9]+]]:vgpr_32 = PHI %1, %bb.2, %2, %bb.3
+  ; SI:   [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI2]], %bb.2, %8, %bb.3
+  ; SI:   SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   %12:vgpr_32, dead %33:sreg_64 = V_ADD_CO_U32_e64 1, [[PHI5]], 0, implicit $exec
+  ; SI:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[PHI]], 1, implicit-def dead $scc
+  ; SI:   S_CMP_LT_I32 [[S_ADD_I32_]], [[COPY1]], implicit-def $scc
+  ; SI:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+  ; SI:   S_BRANCH %bb.6
+  ; SI: bb.6.for.end:
+  ; SI:   %34:vgpr_32 = nofpexcept V_ADD_F32_e32 killed [[PHI5]], killed [[PHI4]], implicit $mode, implicit $exec
+  ; SI:   $vgpr0 = COPY killed %34
+  ; SI:   SI_RETURN_TO_EPILOG killed $vgpr0
+entry:
+;  %break = icmp sgt i32 %bound, 0
+;  br i1 %break, label %for.body, label %for.end
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
+  %x = phi i32 [ %x0, %entry ], [ %xinc, %if.end ]
+  %cc = icmp sgt i32 %z, 5
+  br i1 %cc, label %if, label %else
+
+if:
+  %v.if = fmul float %v, 2.0
+  %x.if = add i32 %x, 1
+  br label %if.end
+
+else:
+  %v.else = fmul float %v, 3.0
+  %x.else = mul i32 %x, 3
+  br label %if.end
+
+if.end:
+  %v.endif = phi float [ %v.if, %if ], [ %v.else, %else ]
+  %x.endif = phi i32 [ %x.if, %if ], [ %x.else, %else ]
+
+  %xinc = add i32 %x.endif, 1
+  %inc = add i32 %i, 1
+  %cond = icmp slt i32 %inc, %bound
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  %x_float = bitcast i32 %x.endif to float
+  %r = fadd float %x_float, %v.endif
+  ret float %r
+}
+
+attributes #0 = { nounwind }