diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -147,11 +147,23 @@
 /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
 /// instructions of the loop and loop safety information as
 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when
+/// this function is called by \p sinkRegionForLoopNest.
 bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
                 BlockFrequencyInfo *, TargetLibraryInfo *,
-                TargetTransformInfo *, Loop *, AliasSetTracker *,
+                TargetTransformInfo *, Loop *CurLoop, AliasSetTracker *,
                 MemorySSAUpdater *, ICFLoopSafetyInfo *,
-                SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+                SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *,
+                Loop *OutermostLoop = nullptr);
+
+/// Call sinkRegion on loops contained within the specified loop
+/// in order from innermost to outermost.
+bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
+                           DominatorTree *, BlockFrequencyInfo *,
+                           TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+                           AliasSetTracker *, MemorySSAUpdater *,
+                           ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
+                           OptimizationRemarkEmitter *);
 
 /// Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -151,7 +151,8 @@
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop);
+                                  TargetTransformInfo *TTI, bool &FreeInLoop,
+                                  bool LoopNestMode);
 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
                   MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
@@ -436,8 +437,13 @@
   // instructions, we perform another pass to hoist them out of the loop.
   if (L->hasDedicatedExits())
     Changed |=
-        sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
-                   CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
+        LoopNestMode
+            ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
+                                    BFI, TLI, TTI, L, CurAST.get(), MSSAU.get(),
+                                    &SafetyInfo, *Flags.get(), ORE)
+            : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI,
+                         L, CurAST.get(), MSSAU.get(), &SafetyInfo,
+                         *Flags.get(), ORE);
   Flags->setIsSink(false);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
@@ -555,7 +561,7 @@
                       Loop *CurLoop, AliasSetTracker *CurAST,
                       MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
                       SinkAndHoistLICMFlags &Flags,
-                      OptimizationRemarkEmitter *ORE) {
+                      OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
 
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
@@ -598,8 +604,10 @@
       // operands of the instruction are loop invariant.
       //
       bool FreeInLoop = false;
+      bool LoopNestMode = OutermostLoop != nullptr;
       if (!I.mayHaveSideEffects() &&
-          isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
+          isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+                                SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
                              ORE)) {
         if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
@@ -618,6 +626,26 @@
   return Changed;
 }
 
+bool llvm::sinkRegionForLoopNest(
+    DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
+    BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+    Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+    ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags,
+    OptimizationRemarkEmitter *ORE) {
+
+  bool Changed = false;
+  SmallPriorityWorklist<Loop *, 4> Worklist;
+  Worklist.insert(CurLoop);
+  appendLoopsToWorklist(*CurLoop, Worklist);
+  while (!Worklist.empty()) {
+    Loop *L = Worklist.pop_back_val();
+    Changed |=
+        sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
+                   CurAST, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
+  }
+  return Changed;
+}
+
 namespace {
 // This is a helper class for hoistRegion to make it able to hoist control flow
 // in order to be able to hoist phis. The way this works is that we initially
@@ -1443,7 +1471,8 @@
 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
                                   const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop) {
+                                  TargetTransformInfo *TTI, bool &FreeInLoop,
+                                  bool LoopNestMode) {
   const auto &BlockColors = SafetyInfo->getBlockColors();
   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
   for (const User *U : I.users()) {
@@ -1460,6 +1489,15 @@
         if (!BlockColors.empty() &&
             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
           return false;
+
+      if (LoopNestMode) {
+        while (isa<PHINode>(UI) && UI->hasOneUser() &&
+               UI->getNumOperands() == 1) {
+          if (!CurLoop->contains(UI))
+            break;
+          UI = cast<Instruction>(U->user_back());
+        }
+      }
     }
 
     if (CurLoop->contains(UI)) {
diff --git a/llvm/test/Transforms/LICM/lnicm-sink.ll b/llvm/test/Transforms/LICM/lnicm-sink.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LICM/lnicm-sink.ll
@@ -0,0 +1,68 @@
+; RUN: opt -passes='loop(licm)' -S %s | FileCheck %s --check-prefixes CHECK,LICM
+; RUN: opt -passes='loop(lnicm)' -S %s | FileCheck %s --check-prefixes CHECK,LNICM
+
+; This test represents the following function:
+;
+; double sin(double);
+; int abs(int);
+; double test(double x, int y[10]) {
+;   double t = 0; int s = 0;
+;   for (int i = 0; i < 10; i++) {
+;     for (int j = 0; j < 10; j++) {
+;       t = sin(x);
+;       s = abs(i);
+;     }
+;     y[i] = s;
+;   }
+;   return t;
+; }
+;
+; We only want to sink the call of sin out of the loop nest.
+; LICM also sinks the call of abs out of j-loop, but LNICM doesn't do so
+; to try to make a perfect loop nest. (though y[i] = s; still prevents the 
+; loop nest from being a perfect loop nest in this test case)
+
+define dso_local double @test(double %x, i32* noalias %y) {
+entry:
+  br label %for.body
+
+for.body:
+  %i.02 = phi i32 [ 0, %entry ], [ %inc6, %for.end ]
+  br label %for.body3
+
+; CHECK: for.body3:
+; LNICM: call i32 @abs(i32 %i.02)
+; LICM-NOT: call i32 @abs(i32 %i.02)
+for.body3:
+  %j.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
+  %call = call double @sin(double %x)
+  %call4 = call i32 @abs(i32 %i.02)
+  %inc = add nsw i32 %j.01, 1
+  %cmp2 = icmp slt i32 %inc, 10
+  br i1 %cmp2, label %for.body3, label %for.end
+
+; CHECK: for.end:
+; LICM: call i32 @abs(i32 %i.02)
+; LNICM-NOT: call i32 @abs(i32 %i.02)
+for.end:
+  %s.1.lcssa = phi i32 [ %call4, %for.body3 ]
+  %t.1.lcssa = phi double [ %call, %for.body3 ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
+  store i32 %s.1.lcssa, i32* %arrayidx, align 4
+  %inc6 = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc6, 10
+  br i1 %cmp, label %for.body, label %for.end7
+
+; CHECK: for.end7:
+; CHECK: call double @sin(double %x)
+for.end7:
+  %t.0.lcssa = phi double [ %t.1.lcssa, %for.end ]
+  ret double %t.0.lcssa
+}
+
+declare dso_local double @sin(double) #0
+
+declare dso_local i32 @abs(i32) #0
+
+attributes #0 = { nounwind readnone willreturn }