Index: lib/Transforms/Utils/SimplifyLibCalls.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -47,6 +47,11 @@
                          cl::desc("Enable unsafe double to float "
                                   "shrinking for math lib calls"));
 
+static cl::opt<unsigned>
+    PowerOptThreshold("power-opt-threshold", cl::Hidden, cl::init(32),
+                       cl::desc("Control the limit upto which pow(x, n) "
+                                "should be optimized"));
+
 
 //===----------------------------------------------------------------------===//
 // Helper Functions
@@ -1057,6 +1062,23 @@
   return Ret;
 }
 
+static Value *getPow(Value *Op1, unsigned Exp, IRBuilder<> &B) {
+  Value *Val = Op1, *FMul = nullptr, *PrevVal = nullptr;
+
+  while (Exp) {
+    // The idea here is that: x^(2^i) = x^(2^(i/2)) * x^(2^(i/2)).
+    if (PrevVal)
+      Val = B.CreateFMul(PrevVal, PrevVal);
+
+    if (Exp & 1) // If the current bit is set.
+      FMul = FMul ? B.CreateFMul(FMul, Val) : Val;
+
+    PrevVal = Val;
+    Exp /= 2;
+  }
+  return FMul;
+}
+
 Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
 
@@ -1124,6 +1146,24 @@
     return B.CreateFMul(Op1, Op1, "pow2");
   if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
     return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+
+  // Only in fast-math mode, generate repeated fmul
+  // instead of generating pow(x, n).
+  Function *F = CI->getParent()->getParent();
+  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+  if (Attr.getValueAsString() == "true") {
+    if (PowerOptThreshold >= 2) {
+      for (unsigned Exp = 2; Exp <= PowerOptThreshold; ++Exp) {
+        if (Op2C->isExactlyValue(Exp) || Op2C->isExactlyValue((int)-Exp)) {
+          Value *FMul = getPow(Op1, Exp, B);
+          // For negative exponents simply divide by 1.0.
+          if (Op2C->isExactlyValue((int)-Exp))
+            FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+          return FMul;
+        }
+      }
+    }
+  }
   return nullptr;
 }
 
Index: test/Transforms/InstCombine/pow-4.ll
===================================================================
--- /dev/null
+++ test/Transforms/InstCombine/pow-4.ll
@@ -0,0 +1,139 @@
+; Test that the pow library call simplifier works correctly.
+
+; RUN: opt -instcombine -S < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefix=CHECK
+; RUN: opt -instcombine -S < %s -mtriple=aarch64-linux-gnu -power-opt-threshold=33 | FileCheck %s --check-prefix=CHECK-THRESH
+; RUN: opt -instcombine -S < %s -mtriple=arm-linux-gnueabi | FileCheck %s --check-prefix=CHECK
+; RUN: opt -instcombine -S < %s -mtriple=arm-linux-gnueabi -power-opt-threshold=33 | FileCheck %s --check-prefix=CHECK-THRESH
+
+; Function Attrs: nounwind readnone
+declare double @llvm.pow.f64(double, double) #1
+declare float @llvm.pow.f32(float, float) #1
+
+; pow(x, 4.0f)
+define float @test_simplify_4f(float %x) #0 {
+; CHECK-LABEL: @test_simplify_4f(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul float %x, %x
+; CHECK-NEXT: %2 = fmul float %1, %1
+; CHECK-NEXT: ret float %2
+  %1 = tail call float @llvm.pow.f32(float %x, float 4.000000e+00)
+  ret float %1
+}
+
+; pow(x, 3.0)
+define double @test_simplify_3(double %x) #0 {
+; CHECK-LABEL: @test_simplify_3(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: ret double %2
+  %1 = tail call double @llvm.pow.f64(double %x, double 3.000000e+00)
+  ret double %1
+}
+
+; pow(x, 4.0)
+define double @test_simplify_4(double %x) #0 {
+; CHECK-LABEL: @test_simplify_4(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: ret double %2
+  %1 = tail call double @llvm.pow.f64(double %x, double 4.000000e+00)
+  ret double %1
+}
+
+; pow(x, 15.0)
+define double @test_simplify_15(double %x) #0 {
+; CHECK-LABEL: @test_simplify_15(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %1, %1
+; CHECK-NEXT: %4 = fmul double %2, %3
+; CHECK-NEXT: %5 = fmul double %3, %3
+; CHECK-NEXT: %6 = fmul double %4, %5
+; CHECK-NEXT: ret double %6
+  %1 = tail call double @llvm.pow.f64(double %x, double 1.500000e+01)
+  ret double %1
+}
+
+; pow(x, -7.0)
+define double @test_simplify_neg_7(double %x) #0 {
+; CHECK-LABEL: @test_simplify_neg_7(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %1, %1
+; CHECK-NEXT: %4 = fmul double %2, %3
+; CHECK-NEXT: %5 = fdiv double 1.000000e+00, %4
+; CHECK-NEXT: ret double %5
+  %1 = tail call double @llvm.pow.f64(double %x, double -7.000000e+00)
+  ret double %1
+}
+
+; pow(x, -19.0)
+define double @test_simplify_neg_19(double %x) #0 {
+; CHECK-LABEL: @test_simplify_neg_19(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %1, %1
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %4, %4
+; CHECK-NEXT: %6 = fmul double %2, %5
+; CHECK-NEXT: %7 = fdiv double 1.000000e+00, %6
+; CHECK-NEXT: ret double %7
+  %1 = tail call double @llvm.pow.f64(double %x, double -1.900000e+01)
+  ret double %1
+}
+
+; pow(x, 11.23)
+define double @test_simplify_11_23(double %x) #0 {
+; CHECK-LABEL: @test_simplify_11_23(
+; CHECK-NOT: fmul
+; CHECK-NEXT: %1 = tail call double @llvm.pow.f64(double %x, double 1.123000e+01)
+; CHECK-NEXT: ret double %1
+  %1 = tail call double @llvm.pow.f64(double %x, double 1.123000e+01)
+  ret double %1
+}
+
+; pow(x, 32.0), with default power-opt-threshold=32
+define double @test_simplify_32(double %x) #0 {
+; CHECK-LABEL: @test_simplify_32(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %4, %4
+; CHECK-NEXT: ret double %5
+  %1 = tail call double @llvm.pow.f64(double %x, double 3.200000e+01)
+  ret double %1
+}
+
+; pow(x, 33.0), with power-opt-threshold=32
+define double @test_simplify_33(double %x) #0 {
+; CHECK-LABEL: @test_simplify_33(
+; CHECK-NOT: fmul
+; CHECK-NEXT: %1 = tail call double @llvm.pow.f64(double %x, double 3.300000e+01)
+; CHECK-NEXT: ret double %1
+  %1 = tail call double @llvm.pow.f64(double %x, double 3.300000e+01)
+  ret double %1
+}
+
+; pow(x, 33.0), with power-opt-threshold=33
+define double @test_simplify_33_thresh(double %x) #0 {
+; CHECK-THRESH-LABEL: @test_simplify_33_thresh(
+; CHECK-THRESH-NOT: pow
+; CHECK-THRESH: %1 = fmul double %x, %x
+; CHECK-THRESH: %2 = fmul double %1, %1
+; CHECK-THRESH: %3 = fmul double %2, %2
+; CHECK-THRESH: %4 = fmul double %3, %3
+; CHECK-THRESH: %5 = fmul double %4, %4
+; CHECK-THRESH: %6 = fmul double %5, %x
+; CHECK-THRESH: ret double %6
+  %1 = tail call double @llvm.pow.f64(double %x, double 3.300000e+01)
+  ret double %1
+}
+
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }