Index: lib/Transforms/InstCombine/InstCombineInternal.h
===================================================================
--- lib/Transforms/InstCombine/InstCombineInternal.h
+++ lib/Transforms/InstCombine/InstCombineInternal.h
@@ -678,6 +678,7 @@
 
   Value *SimplifyVectorOp(BinaryOperator &Inst);
 
+  Value *SimplifyTrigOp(Instruction &Inst);
 
   /// Given a binary operator, cast instruction, or select which has a PHI node
   /// as operand #0, see if we can fold the instruction into the PHI (which is
Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -193,6 +193,9 @@
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyTrigOp(I))
+    return replaceInstUsesWith(I, V);
+
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
@@ -613,6 +616,9 @@
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyTrigOp(I))
+    return replaceInstUsesWith(I, V);
+
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
@@ -1167,6 +1173,9 @@
 Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyTrigOp(I))
+    return replaceInstUsesWith(I, V);
+
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
@@ -1236,6 +1245,9 @@
 Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyTrigOp(I))
+    return replaceInstUsesWith(I, V);
+
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
@@ -1347,6 +1359,9 @@
 Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyTrigOp(I))
+    return replaceInstUsesWith(I, V);
+
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
Index: lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- lib/Transforms/InstCombine/InstructionCombining.cpp
+++ lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -107,23 +107,23 @@
 
 #define DEBUG_TYPE "instcombine"
 
-STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumCombined, "Number of insts combined");
 STATISTIC(NumConstProp, "Number of constant folds");
-STATISTIC(NumDeadInst , "Number of dead inst eliminated");
-STATISTIC(NumSunkInst , "Number of instructions sunk");
-STATISTIC(NumExpand,    "Number of expansions");
-STATISTIC(NumFactor   , "Number of factorizations");
-STATISTIC(NumReassoc  , "Number of reassociations");
+STATISTIC(NumDeadInst, "Number of dead inst eliminated");
+STATISTIC(NumSunkInst, "Number of instructions sunk");
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor, "Number of factorizations");
+STATISTIC(NumReassoc, "Number of reassociations");
 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
               "Controls which instructions are visited");
 
 static cl::opt<bool>
-EnableExpensiveCombines("expensive-combines",
-                        cl::desc("Enable expensive instruction combines"));
+    EnableExpensiveCombines("expensive-combines",
+                            cl::desc("Enable expensive instruction combines"));
 
-static cl::opt<unsigned>
-MaxArraySize("instcombine-maxarray-size", cl::init(1024),
-             cl::desc("Maximum array size considered when doing a combine"));
+static cl::opt<unsigned> MaxArraySize(
+    "instcombine-maxarray-size", cl::init(1024),
+    cl::desc("Maximum array size considered when doing a combine"));
 
 // FIXME: Remove this flag when it is no longer necessary to convert
 // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
@@ -289,8 +289,8 @@
     // Order operands such that they are listed from right (least complex) to
     // left (most complex).  This puts constants before unary operators before
     // binary operators.
-    if (I.isCommutative() && getComplexity(I.getOperand(0)) <
-        getComplexity(I.getOperand(1)))
+    if (I.isCommutative() &&
+        getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
       Changed = !I.swapOperands();
 
     BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
@@ -396,11 +396,10 @@
 
       // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
       // if C1 and C2 are constants.
-      if (Op0 && Op1 &&
-          Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
-          isa<Constant>(Op0->getOperand(1)) &&
-          isa<Constant>(Op1->getOperand(1)) &&
-          Op0->hasOneUse() && Op1->hasOneUse()) {
+      if (Op0 && Op1 && Op0->getOpcode() == Opcode &&
+          Op1->getOpcode() == Opcode && isa<Constant>(Op0->getOperand(1)) &&
+          isa<Constant>(Op1->getOperand(1)) && Op0->hasOneUse() &&
+          Op1->hasOneUse()) {
         Value *A = Op0->getOperand(0);
         Constant *C1 = cast<Constant>(Op0->getOperand(1));
         Value *B = Op1->getOperand(0);
@@ -664,16 +663,14 @@
     // term.
     if (Op0)
       if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
-        if (Value *V =
-                tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
+        if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
           return V;
 
     // The instruction has the form "(B) op (C op' D)".  Try to factorize common
     // term.
     if (Op1)
       if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
-        if (Value *V =
-                tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
+        if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
           return V;
   }
 
@@ -856,8 +853,8 @@
     std::swap(Op0, Op1);
 
   auto *BO = cast<BinaryOperator>(&I);
-  Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1,
-                                  SO->getName() + ".op");
+  Value *RI =
+      Builder.CreateBinOp(BO->getOpcode(), Op0, Op1, SO->getName() + ".op");
   auto *FPInst = dyn_cast<Instruction>(RI);
   if (FPInst && isa<FPMathOperator>(FPInst))
     FPInst->copyFastMathFlags(BO);
@@ -965,8 +962,10 @@
     if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
       continue;
 
-    if (isa<PHINode>(InVal)) return nullptr;  // Itself a phi.
-    if (NonConstBB) return nullptr;  // More than one non-const value.
+    if (isa<PHINode>(InVal))
+      return nullptr; // Itself a phi.
+    if (NonConstBB)
+      return nullptr; // More than one non-const value.
 
     NonConstBB = PN->getIncomingBlock(i);
 
@@ -989,7 +988,8 @@
   // do this if the pred block is unconditionally branching into the phi block.
   if (NonConstBB != nullptr) {
     BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
-    if (!BI || !BI->isUnconditional()) return nullptr;
+    if (!BI || !BI->isUnconditional())
+      return nullptr;
   }
 
   // Okay, we can do the transformation: create the new PHI node.
@@ -1045,17 +1045,17 @@
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
         InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
       else if (isa<ICmpInst>(CI))
-        InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
-                                 C, "phitmp");
+        InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), C,
+                                 "phitmp");
       else
-        InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
-                                 C, "phitmp");
+        InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), C,
+                                 "phitmp");
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
   } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
     for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i),
-                                             Builder);
+      Value *InV =
+          foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), Builder);
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
   } else {
@@ -1074,7 +1074,8 @@
 
   for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
     Instruction *User = cast<Instruction>(*UI++);
-    if (User == &I) continue;
+    if (User == &I)
+      continue;
     replaceInstUsesWith(*User, NewPN);
     eraseInstFromFunction(*User);
   }
@@ -1110,8 +1111,8 @@
   Type *IntPtrTy = DL.getIntPtrType(PtrTy);
   int64_t FirstIdx = 0;
   if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
-    FirstIdx = Offset/TySize;
-    Offset -= FirstIdx*TySize;
+    FirstIdx = Offset / TySize;
+    Offset -= FirstIdx * TySize;
 
     // Handle hosts where % returns negative instead of values [0..TySize).
     if (Offset < 0) {
@@ -1136,15 +1137,15 @@
              "Offset must stay within the indexed type");
 
       unsigned Elt = SL->getElementContainingOffset(Offset);
-      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
-                                            Elt));
+      NewIndices.push_back(
+          ConstantInt::get(Type::getInt32Ty(Ty->getContext()), Elt));
 
       Offset -= SL->getElementOffset(Elt);
       Ty = STy->getElementType(Elt);
     } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
       uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
       assert(EltSize && "Cannot index into a zero-sized array");
-      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+      NewIndices.push_back(ConstantInt::get(IntPtrTy, Offset / EltSize));
       Offset %= EltSize;
       Ty = AT->getElementType();
     } else {
@@ -1160,8 +1161,7 @@
   // If this GEP has only 0 indices, it is the same pointer as
   // Src. If Src is not a trivial GEP too, don't combine
   // the indices.
-  if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
-      !Src.hasOneUse())
+  if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && !Src.hasOneUse())
     return false;
   return true;
 }
@@ -1171,7 +1171,8 @@
 Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
   assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
   assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
-         Scale.getBitWidth() && "Scale not compatible with value!");
+             Scale.getBitWidth() &&
+         "Scale not compatible with value!");
 
   // If Val is zero or Scale is one then Val = Val * Scale.
   if (match(Val, m_Zero()) || Scale == 1) {
@@ -1277,8 +1278,8 @@
           return nullptr;
 
         Value *LHS = BO->getOperand(0);
-        int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
-          getLimitedValue(Scale.getBitWidth());
+        int32_t Amt = cast<ConstantInt>(BO->getOperand(1))
+                          ->getLimitedValue(Scale.getBitWidth());
         // Op = LHS << Amt.
 
         if (Amt == logScale) {
@@ -1419,12 +1420,144 @@
   return BO;
 }
 
+static bool isTrigLibCall(CallInst *CI) {
+  // We can only hope to do anything useful if we can ignore things like errno
+  // and floating-point exceptions.
+  // We already checked the prototype.
+  return CI->hasFnAttr(Attribute::NoUnwind) &&
+         CI->hasFnAttr(Attribute::ReadNone);
+}
+
+/// \brief Replaces the division or multiplication operations on trigonometric
+/// functions to equivalent trigonometric operation
+
+Value *InstCombiner::SimplifyTrigOp(Instruction &I) {
+
+  if (((I.getOpcode() == Instruction::FMul) ||
+       (I.getOpcode() == Instruction::Mul))) {
+    if (isa<CallInst>(I.getOperand(0)) && isa<CallInst>(I.getOperand(1))) {
+      CallInst *call1 = dyn_cast<CallInst>(I.getOperand(0));
+      CallInst *call2 = dyn_cast<CallInst>(I.getOperand(1));
+
+      // return nullptr if argument to calls are not same
+      if (!(call1->getOperand(0) == call2->getOperand(0)))
+        return nullptr;
+
+      // tan(x)*cos(x)=sin(x);
+
+      Value *Op1 = call1->getArgOperand(1);
+      Value *Op2 = call2->getArgOperand(1);
+
+      if ((isTrigLibCall(call1) &&
+           ((Op1->getName() == "tan") || (Op1->getName() == "tanf") ||
+            (Op1->getName() == "tanf")) &&
+           (Op2->getName().startswith("llvm.cos"))) ||
+          (isTrigLibCall(call2) && (Op1->getName().startswith("llvm.cos")) &&
+           ((Op2->getName() == "tan") || (Op2->getName() == "tanf") ||
+            (Op2->getName() == "tanl")))) {
+        IRBuilder<>::FastMathFlagGuard Guard(Builder);
+        Builder.setFastMathFlags(I.getFastMathFlags());
+        Function *fun = Intrinsic::getDeclaration(I.getModule(), Intrinsic::sin,
+                                                  I.getType());
+        CallInst *SinCall = Builder.CreateCall(fun, call2->getOperand(0));
+
+        SinCall->takeName(&I);
+        SinCall->copyFastMathFlags(&I);
+
+        return SinCall;
+      }
+
+      // sin(x)*cos(x)=sin(2x)*0.5
+
+      else if (((call1->getOperand(1)->getName().startswith("llvm.sin")) &&
+                (call2->getOperand(1)->getName().startswith("llvm.cos"))) ||
+               ((call1->getOperand(1)->getName().startswith("llvm.cos")) &&
+                (call2->getOperand(1)->getName().startswith("llvm.sin")))) {
+        IRBuilder<>::FastMathFlagGuard Guard(Builder);
+        Builder.setFastMathFlags(I.getFastMathFlags());
+        Function *fun = Intrinsic::getDeclaration(I.getModule(), Intrinsic::sin,
+                                                  I.getType());
+
+        Constant *ConstFloatTwo =
+            ConstantFP::get(Type::getDoubleTy(I.getContext()), 2.0);
+        Instruction *MulIns = dyn_cast<Instruction>(
+            Builder.CreateFMul(ConstFloatTwo, call1->getOperand(0)));
+        CallInst *SinCall = Builder.CreateCall(fun, MulIns);
+        Instruction *DivIns =
+            dyn_cast<Instruction>(Builder.CreateFDiv(SinCall, ConstFloatTwo));
+        DivIns->takeName(&I);
+        DivIns->copyFastMathFlags(&I);
+
+        return DivIns;
+      }
+    }
+  }
+
+  else if ((I.getOpcode() == Instruction::FDiv) ||
+           (I.getOpcode() == Instruction::SDiv) ||
+           (I.getOpcode() == Instruction::UDiv)) {
+    if (isa<CallInst>(I.getOperand(0)) && isa<CallInst>(I.getOperand(1))) {
+      CallInst *call1 = dyn_cast<CallInst>(I.getOperand(0));
+      CallInst *call2 = dyn_cast<CallInst>(I.getOperand(1));
+
+      // return nullptr if argument to calls are not same
+
+      if (!(call1->getOperand(0) == call2->getOperand(0)))
+        return nullptr;
+
+      Value *Op1 = call1->getArgOperand(1);
+      Value *Op2 = call2->getArgOperand(1);
+
+      // sin/tan=cos;
+      if ((Op1->getName().startswith("llvm.sin")) &&
+
+          (isTrigLibCall(call2) &&
+           ((Op2->getName() == "tan") || (Op2->getName() == "tanf") ||
+            (Op2->getName() == "tanl")))) {
+        IRBuilder<>::FastMathFlagGuard Guard(Builder);
+        Builder.setFastMathFlags(I.getFastMathFlags());
+        Function *fun = Intrinsic::getDeclaration(I.getModule(), Intrinsic::cos,
+                                                  I.getType());
+        CallInst *CosCall = Builder.CreateCall(fun, call1->getOperand(0));
+        CosCall->takeName(&I);
+        CosCall->copyFastMathFlags(&I);
+        return CosCall;
+      }
+
+      // tan/sin=1/cos;
+      else if ((Op2->getName().startswith("llvm.sin")) &&
+
+               (isTrigLibCall(call1) &&
+                ((Op1->getName() == "tan") || (Op1->getName() == "tanf") ||
+                 (Op1->getName() == "tanl")))) {
+        IRBuilder<>::FastMathFlagGuard Guard(Builder);
+        Builder.setFastMathFlags(I.getFastMathFlags());
+        Function *fun = Intrinsic::getDeclaration(I.getModule(), Intrinsic::cos,
+                                                  I.getType());
+        CallInst *CosCall = Builder.CreateCall(fun, call1->getOperand(0));
+        Constant *ConstFloatOne =
+            ConstantFP::get(Type::getDoubleTy(I.getContext()), 1.0);
+
+        Instruction *DivIns =
+            dyn_cast<Instruction>(Builder.CreateFDiv(ConstFloatOne, CosCall));
+        DivIns->takeName(&I);
+        DivIns->copyFastMathFlags(&I);
+
+        return DivIns;
+      }
+    }
+  }
+
+  return nullptr;
+}
+
 /// \brief Makes transformation of binary operation specific for vector types.
 /// \param Inst Binary operator to transform.
 /// \return Pointer to node that must replace the original binary operator, or
 ///         null pointer if no transformation was made.
 Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
-  if (!Inst.getType()->isVectorTy()) return nullptr;
+  if (!Inst.getType()->isVectorTy())
+    return nullptr;
 
   // It may not be safe to reorder shuffles and things like div, urem, etc.
   // because we may trap when executing those ops on unknown vector elements.
@@ -1448,18 +1581,22 @@
       LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) {
     Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
                                       RShuf->getOperand(0), Builder);
-    return Builder.CreateShuffleVector(
-        NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask());
+    return Builder.CreateShuffleVector(NewBO, UndefValue::get(NewBO->getType()),
+                                       LShuf->getMask());
   }
 
   // If one argument is a shuffle within one vector, the other is a constant,
   // try moving the shuffle after the binary operation.
   ShuffleVectorInst *Shuffle = nullptr;
   Constant *C1 = nullptr;
-  if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS);
-  if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS);
-  if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS);
-  if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS);
+  if (isa<ShuffleVectorInst>(LHS))
+    Shuffle = cast<ShuffleVectorInst>(LHS);
+  if (isa<ShuffleVectorInst>(RHS))
+    Shuffle = cast<ShuffleVectorInst>(RHS);
+  if (isa<Constant>(LHS))
+    C1 = cast<Constant>(LHS);
+  if (isa<Constant>(RHS))
+    C1 = cast<Constant>(RHS);
   if (Shuffle && C1 &&
       (isa<ConstantVector>(C1) || isa<ConstantDataVector>(C1)) &&
       isa<UndefValue>(Shuffle->getOperand(1)) &&
@@ -1469,8 +1606,8 @@
     //   shuffle(C2, ShMask) = C1
     // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>)
     // reorder is not possible.
-    SmallVector<Constant*, 16> C2M(VWidth,
-                               UndefValue::get(C1->getType()->getScalarType()));
+    SmallVector<Constant *, 16> C2M(
+        VWidth, UndefValue::get(C1->getType()->getScalarType()));
     bool MayChange = true;
     for (unsigned I = 0; I < VWidth; ++I) {
       if (ShMask[I] >= 0) {
@@ -1487,8 +1624,8 @@
       Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0);
       Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2;
       Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
-      return Builder.CreateShuffleVector(NewBO,
-          UndefValue::get(Inst.getType()), Shuffle->getMask());
+      return Builder.CreateShuffleVector(NewBO, UndefValue::get(Inst.getType()),
+                                         Shuffle->getMask());
     }
   }
 
@@ -1496,7 +1633,7 @@
 }
 
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+  SmallVector<Value *, 8> Ops(GEP.op_begin(), GEP.op_end());
 
   if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops,
                                  SQ.getWithInstruction(&GEP)))
@@ -1508,7 +1645,7 @@
   // by multiples of a zero size type with zero.
   bool MadeChange = false;
   Type *IntPtrTy =
-    DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType());
+      DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType());
 
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
@@ -1519,8 +1656,10 @@
 
     // Index type should have the same width as IntPtr
     Type *IndexTy = (*I)->getType();
-    Type *NewIndexType = IndexTy->isVectorTy() ?
-      VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy;
+    Type *NewIndexType =
+        IndexTy->isVectorTy()
+            ? VectorType::get(IntPtrTy, IndexTy->getVectorNumElements())
+            : IntPtrTy;
 
     // If the element type has zero size then any index over it is equivalent
     // to an index of zero, so replace it with zero if it is not zero already.
@@ -1559,7 +1698,7 @@
 
     int DI = -1;
 
-    for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
+    for (auto I = PN->op_begin() + 1, E = PN->op_end(); I != E; ++I) {
       GetElementPtrInst *Op2 = dyn_cast<GetElementPtrInst>(*I);
       if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
         return nullptr;
@@ -1659,12 +1798,11 @@
     // Note that if our source is a gep chain itself then we wait for that
     // chain to be resolved before we perform this transformation.  This
     // avoids us creating a TON of code in some cases.
-    if (GEPOperator *SrcGEP =
-          dyn_cast<GEPOperator>(Src->getOperand(0)))
+    if (GEPOperator *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
       if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
-        return nullptr;   // Wait until our source is folded to completion.
+        return nullptr; // Wait until our source is folded to completion.
 
-    SmallVector<Value*, 8> Indices;
+    SmallVector<Value *, 8> Indices;
 
     // Find out whether the last index in the source GEP is a sequential idx.
     bool EndsWithSequential = false;
@@ -1676,7 +1814,7 @@
     if (EndsWithSequential) {
       // Replace: gep (gep %P, long B), long A, ...
       // With:    T = long A+B; gep %P, T, ...
-      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+      Value *SO1 = Src->getOperand(Src->getNumOperands() - 1);
       Value *GO1 = GEP.getOperand(1);
 
       // If they aren't the same type, then the input hasn't been processed
@@ -1699,15 +1837,15 @@
         GEP.setOperand(1, Sum);
         return &GEP;
       }
-      Indices.append(Src->op_begin()+1, Src->op_end()-1);
+      Indices.append(Src->op_begin() + 1, Src->op_end() - 1);
       Indices.push_back(Sum);
-      Indices.append(GEP.op_begin()+2, GEP.op_end());
+      Indices.append(GEP.op_begin() + 2, GEP.op_end());
     } else if (isa<Constant>(*GEP.idx_begin()) &&
                cast<Constant>(*GEP.idx_begin())->isNullValue() &&
                Src->getNumOperands() != 1) {
       // Otherwise we can do the fold if the first index of the GEP is a zero
-      Indices.append(Src->op_begin()+1, Src->op_end());
-      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
+      Indices.append(Src->op_begin() + 1, Src->op_end());
+      Indices.append(GEP.idx_begin() + 1, GEP.idx_end());
     }
 
     if (!Indices.empty())
@@ -1787,12 +1925,11 @@
     //
     // This occurs when the program declares an array extern like "int X[];"
     if (HasZeroPointerIndex) {
-      if (ArrayType *CATy =
-          dyn_cast<ArrayType>(GEP.getSourceElementType())) {
+      if (ArrayType *CATy = dyn_cast<ArrayType>(GEP.getSourceElementType())) {
         // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
         if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
           // -> GEP i8* X, ...
-          SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+          SmallVector<Value *, 8> Idx(GEP.idx_begin() + 1, GEP.idx_end());
           GetElementPtrInst *Res = GetElementPtrInst::Create(
               StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
           Res->setIsInBounds(GEP.isInBounds());
@@ -1808,7 +1945,7 @@
         }
 
         if (ArrayType *XATy =
-              dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
+                dyn_cast<ArrayType>(StrippedPtrTy->getElementType())) {
           // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
           if (CATy->getElementType() == XATy->getElementType()) {
             // -> GEP [10 x i8]* X, i32 0, ...
@@ -1830,7 +1967,7 @@
             // ->
             // %0 = GEP [10 x i8] addrspace(1)* X, ...
             // addrspacecast i8 addrspace(1)* %0 to i8*
-            SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
+            SmallVector<Value *, 8> Idx(GEP.idx_begin(), GEP.idx_end());
             Value *NewGEP = GEP.isInBounds()
                                 ? Builder.CreateInBoundsGEP(
                                       nullptr, StrippedPtr, Idx, GEP.getName())
@@ -1850,7 +1987,7 @@
           DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
               DL.getTypeAllocSize(ResElTy)) {
         Type *IdxType = DL.getIntPtrType(GEP.getType());
-        Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
+        Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)};
         Value *NewGEP =
             GEP.isInBounds()
                 ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
@@ -1985,7 +2122,8 @@
           }
         }
 
-        if (Operand->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+        if (Operand->getType()->getPointerAddressSpace() !=
+            GEP.getAddressSpace())
           return new AddrSpaceCastInst(Operand, GEP.getType());
         return new BitCastInst(Operand, GEP.getType());
       }
@@ -1993,7 +2131,7 @@
       // Otherwise, if the offset is non-zero, we need to find out if there is a
       // field at Offset in 'A's type.  If so, we can pull the cast through the
       // GEP.
-      SmallVector<Value*, 8> NewIndices;
+      SmallVector<Value *, 8> NewIndices;
       if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
         Value *NGEP =
             GEP.isInBounds()
@@ -2016,8 +2154,7 @@
         DL.getPointerSizeInBits(PtrOp->getType()->getPointerAddressSpace());
     APInt BasePtrOffset(PtrWidth, 0);
     Value *UnderlyingPtrOp =
-            PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
-                                                             BasePtrOffset);
+        PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset);
     if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
       if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
           BasePtrOffset.isNonNegative()) {
@@ -2050,7 +2187,7 @@
 static bool isAllocSiteRemovable(Instruction *AI,
                                  SmallVectorImpl<WeakTrackingVH> &Users,
                                  const TargetLibraryInfo *TLI) {
-  SmallVector<Instruction*, 4> Worklist;
+  SmallVector<Instruction *, 4> Worklist;
   Worklist.push_back(AI);
 
   do {
@@ -2148,7 +2285,7 @@
       // Lowering all @llvm.objectsize calls first because they may
       // use a bitcast/GEP of the alloca we are removing.
       if (!Users[i])
-       continue;
+        continue;
 
       Instruction *I = cast<Instruction>(&*Users[i]);
 
@@ -2186,8 +2323,8 @@
       // Replace invoke with a NOP intrinsic to maintain the original CFG
       Module *M = II->getModule();
       Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
-      InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
-                         None, "", II->getParent());
+      InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), None, "",
+                         II->getParent());
     }
 
     for (auto *DII : DIIs)
@@ -2213,8 +2350,7 @@
 /// The profitability is out-of concern here and this function should
 /// be called only if the caller knows this transformation would be
 /// profitable (e.g., for code size).
-static Instruction *
-tryToMoveFreeBeforeNullTest(CallInst &FI) {
+static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI) {
   Value *Op = FI.getArgOperand(0);
   BasicBlock *FreeInstrBB = FI.getParent();
   BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
@@ -2368,7 +2504,8 @@
         LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
   }
 
-  unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
+  unsigned NewWidth =
+      Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
 
   // Shrink the condition operand if the new type is smaller than the old type.
   // This may produce a non-standard type for the switch, but that's ok because
@@ -2402,10 +2539,9 @@
   if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
     // We're extracting from an insertvalue instruction, compare the indices
     const unsigned *exti, *exte, *insi, *inse;
-    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
-         exte = EV.idx_end(), inse = IV->idx_end();
-         exti != exte && insi != inse;
-         ++exti, ++insi) {
+    for (exti = EV.idx_begin(), insi = IV->idx_begin(), exte = EV.idx_end(),
+        inse = IV->idx_end();
+         exti != exte && insi != inse; ++exti, ++insi) {
       if (*insi != *exti)
         // The insert and extract both reference distinctly different elements.
         // This means the extract is not influenced by the insert, and we can
@@ -2461,7 +2597,7 @@
       switch (II->getIntrinsicID()) {
       case Intrinsic::uadd_with_overflow:
       case Intrinsic::sadd_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
+        if (*EV.idx_begin() == 0) { // Normal result.
           Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
           replaceInstUsesWith(*II, UndefValue::get(II->getType()));
           eraseInstFromFunction(*II);
@@ -2478,7 +2614,7 @@
         break;
       case Intrinsic::usub_with_overflow:
       case Intrinsic::ssub_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
+        if (*EV.idx_begin() == 0) { // Normal result.
           Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
           replaceInstUsesWith(*II, UndefValue::get(II->getType()));
           eraseInstFromFunction(*II);
@@ -2487,7 +2623,7 @@
         break;
       case Intrinsic::umul_with_overflow:
       case Intrinsic::smul_with_overflow:
-        if (*EV.idx_begin() == 0) {  // Normal result.
+        if (*EV.idx_begin() == 0) { // Normal result.
           Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
           replaceInstUsesWith(*II, UndefValue::get(II->getType()));
           eraseInstFromFunction(*II);
@@ -2507,11 +2643,11 @@
     // don't want to do the transformation as it loses padding knowledge.
     if (L->isSimple() && L->hasOneUse()) {
       // extractvalue has integer indices, getelementptr has Value*s. Convert.
-      SmallVector<Value*, 4> Indices;
+      SmallVector<Value *, 4> Indices;
       // Prefix an i32 0 since we need the first element.
       Indices.push_back(Builder.getInt32(0));
       for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
-            I != E; ++I)
+           I != E; ++I)
         Indices.push_back(Builder.getInt32(*I));
 
       // We need to insert these at the location of the old load, not at that of
@@ -2568,10 +2704,8 @@
 }
 
 static bool shorter_filter(const Value *LHS, const Value *RHS) {
-  return
-    cast<ArrayType>(LHS->getType())->getNumElements()
-  <
-    cast<ArrayType>(RHS->getType())->getNumElements();
+  return cast<ArrayType>(LHS->getType())->getNumElements() <
+         cast<ArrayType>(RHS->getType())->getNumElements();
 }
 
 Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
@@ -2585,7 +2719,7 @@
   // (these are often created by inlining).
   bool MakeNewInstruction = false; // If true, recreate using the following:
   SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
-  bool CleanupFlag = LI.isCleanup();   // - The new instruction is a cleanup.
+  bool CleanupFlag = LI.isCleanup();      // - The new instruction is a cleanup.
 
   SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
   for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
@@ -2637,13 +2771,13 @@
         break;
       }
 
-      bool MakeNewFilter = false; // If true, make a new filter.
+      bool MakeNewFilter = false;                // If true, make a new filter.
       SmallVector<Constant *, 16> NewFilterElts; // New elements.
       if (isa<ConstantAggregateZero>(FilterClause)) {
         // Not an empty filter - it contains at least one null typeinfo.
         assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
         Constant *TypeInfo =
-          Constant::getNullValue(FilterType->getElementType());
+            Constant::getNullValue(FilterType->getElementType());
         // If this typeinfo is a catch-all then the filter can never match.
         if (isCatchAll(Personality, TypeInfo)) {
           // Throw the filter away.
@@ -2708,8 +2842,8 @@
           MakeNewFilter = true;
       }
       if (MakeNewFilter) {
-        FilterType = ArrayType::get(FilterType->getElementType(),
-                                    NewFilterElts.size());
+        FilterType =
+            ArrayType::get(FilterType->getElementType(), NewFilterElts.size());
         FilterClause = ConstantArray::get(FilterType, NewFilterElts);
         MakeNewInstruction = true;
       }
@@ -2733,7 +2867,7 @@
   // advantageous because shorter filters are more likely to match, speeding up
   // unwinding, but mostly because it increases the effectiveness of the other
   // filter optimizations below.
-  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
+  for (unsigned i = 0, e = NewClauses.size(); i + 1 < e;) {
     unsigned j;
     // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
     for (j = i; j != e; ++j)
@@ -2744,7 +2878,7 @@
     // if sorting them is actually going to do anything so that we only make a
     // new landingpad instruction if it does.
     for (unsigned k = i; k + 1 < j; ++k)
-      if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
+      if (shorter_filter(NewClauses[k + 1], NewClauses[k])) {
         // Not sorted, so sort the filters now.  Doing an unstable sort would be
         // correct too but reordering filters pointlessly might confuse users.
         std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
@@ -2859,8 +2993,8 @@
   // If we changed any of the clauses, replace the old landingpad instruction
   // with a new one.
   if (MakeNewInstruction) {
-    LandingPadInst *NLI = LandingPadInst::Create(LI.getType(),
-                                                 NewClauses.size());
+    LandingPadInst *NLI =
+        LandingPadInst::Create(LI.getType(), NewClauses.size());
     for (unsigned i = 0, e = NewClauses.size(); i != e; ++i)
       NLI->addClause(NewClauses[i]);
     // A landing pad with no clauses must have the cleanup flag set.  It is
@@ -2896,8 +3030,8 @@
     return false;
 
   // Do not sink alloca instructions out of the entry block.
-  if (isa<AllocaInst>(I) && I->getParent() ==
-        &DestBlock->getParent()->getEntryBlock())
+  if (isa<AllocaInst>(I) &&
+      I->getParent() == &DestBlock->getParent()->getEntryBlock())
     return false;
 
   // Do not sink into catchswitch blocks.
@@ -2928,7 +3062,8 @@
 bool InstCombiner::run() {
   while (!Worklist.isEmpty()) {
     Instruction *I = Worklist.RemoveOne();
-    if (I == nullptr) continue;  // skip null values.
+    if (I == nullptr)
+      continue; // skip null values.
 
     // Check to see if we can DCE the instruction.
     if (isInstructionTriviallyDead(I, &TLI)) {
@@ -2962,11 +3097,11 @@
     // a value even when the operands are not all constants.
     Type *Ty = I->getType();
     if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) {
-      KnownBits Known = computeKnownBits(I, /*Depth*/0, I);
+      KnownBits Known = computeKnownBits(I, /*Depth*/ 0, I);
       if (Known.isConstant()) {
         Constant *C = ConstantInt::get(Ty, Known.getConstant());
-        DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
-                        " from: " << *I << '\n');
+        DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C
+                     << " from: " << *I << '\n');
 
         // Add operands to the worklist.
         replaceInstUsesWith(*I, C);
@@ -3093,10 +3228,10 @@
                                        InstCombineWorklist &ICWorklist,
                                        const TargetLibraryInfo *TLI) {
   bool MadeIRChange = false;
-  SmallVector<BasicBlock*, 256> Worklist;
+  SmallVector<BasicBlock *, 256> Worklist;
   Worklist.push_back(BB);
 
-  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+  SmallVector<Instruction *, 128> InstrsForInstCombineWorklist;
   DenseMap<Constant *, Constant *> FoldedConstants;
 
   do {
@@ -3106,7 +3241,7 @@
     if (!Visited.insert(BB).second)
       continue;
 
-    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
       Instruction *Inst = &*BBI++;
 
       // DCE instruction if trivially dead.
@@ -3123,8 +3258,8 @@
       if (!Inst->use_empty() &&
           (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
         if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
-          DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
-                       << *Inst << '\n');
+          DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst
+                       << '\n');
           Inst->replaceAllUsesWith(C);
           ++NumConstProp;
           if (isInstructionTriviallyDead(Inst, TLI))
@@ -3147,15 +3282,16 @@
 
         if (FoldRes != C) {
           DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst
-                       << "\n    Old = " << *C
-                       << "\n    New = " << *FoldRes << '\n');
+                       << "\n    Old = " << *C << "\n    New = " << *FoldRes
+                       << '\n');
           U = FoldRes;
           MadeIRChange = true;
         }
       }
 
-      // Skip processing debug intrinsics in InstCombine. Processing these call instructions
-      // consumes non-trivial amount of time and provides no value for the optimization.
+      // Skip processing debug intrinsics in InstCombine. Processing these call
+      // instructions consumes non-trivial amount of time and provides no value
+      // for the optimization.
       if (!isa<DbgInfoIntrinsic>(Inst))
         InstrsForInstCombineWorklist.push_back(Inst);
     }
Index: test/Transforms/InstCombine/trigonometric-optimizations.ll
===================================================================
--- /dev/null
+++ test/Transforms/InstCombine/trigonometric-optimizations.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare double @tan(double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.sin.f64(double)
+
+define double @_Z11tan_mul_cosd(double)  {
+  %2 = call fast double @tan(double %0) #8
+  %3 = call fast double @llvm.cos.f64(double %0)
+  %4 = fmul fast double %2, %3
+  ret double %4
+
+; CHECK-LABEL:@_Z11tan_mul_cosd(
+; CHECK-NEXT: %2 = call fast double @llvm.sin.f64(double %0)
+; CHECK-NEXT: ret double %2
+
+}
+
+define double @_Z11sin_mul_cosd(double)  {
+  %2 = call fast double @llvm.sin.f64(double %0)
+  %3 = call fast double @llvm.cos.f64(double %0)
+  %4 = fmul fast double %2, %3
+  ret double %4
+
+; CHECK-LABEL: @_Z11sin_mul_cosd(
+; CHECK-NEXT:  %2 = fmul fast double %0, 2.000000e+00
+; CHECK-NEXT:  %3 = call fast double @llvm.sin.f64(double %2)
+; CHECK-NEXT:  %4 = fmul fast double %3, 5.000000e-01
+; CHECK-NEXT:  ret double %4
+}
+
+define double @_Z11sin_div_tand(double)  {
+  %2 = call fast double @llvm.sin.f64(double %0)
+  %3 = call fast double @tan(double %0) #8
+  %4 = fdiv fast double %2, %3
+  ret double %4
+
+; CHECK-LABEL: @_Z11sin_div_tand(
+; CHECK-NEXT:  %2 = call fast double @llvm.cos.f64(double %0)
+; CHECK-NEXT:  ret double %2
+}
+
+define double @_Z11tan_div_sind(double)  {
+  %2 = call fast double @tan(double %0) #8
+  %3 = call fast double @llvm.sin.f64(double %0)
+  %4 = fdiv fast double %2, %3
+  ret double %4
+
+; CHECK-LABEL: @_Z11tan_div_sind(
+; CHECK-NEXT:  %2 = call fast double @llvm.cos.f64(double %0)
+; CHECK-NEXT:  %3 = fdiv fast double 1.000000e+00, %2
+; CHECK-NEXT:  ret double %3
+}
+
+define double @_Z8combinedd(double)  {
+  %2 = call fast double @llvm.sin.f64(double %0)
+  %3 = call fast double @tan(double %0) #8
+  %4 = fdiv fast double %2, %3
+  %5 = call fast double @tan(double %0) #8
+  %6 = call fast double @llvm.cos.f64(double %0)
+  %7 = fmul fast double %5, %6
+  %8 = fmul fast double %4, %7
+  ret double %8
+
+; CHECK-LABEL: @_Z8combinedd(
+; CHECK-NEXT:  %2 = fmul fast double %0, 2.000000e+00
+; CHECK-NEXT:  %3 = call fast double @llvm.sin.f64(double %2)
+; CHECK-NEXT:  %4 = fmul fast double %3, 5.000000e-01
+; CHECK-NEXT:  ret double %4
+}
+
+define double @_Z19different_argumentsdd(double, double)  {
+  %3 = call fast double @llvm.sin.f64(double %0)
+  %4 = call fast double @tan(double %0) #8
+  %5 = fdiv fast double %3, %4
+  %6 = call fast double @tan(double %1) #8
+  %7 = call fast double @llvm.cos.f64(double %1)
+  %8 = fmul fast double %6, %7
+  %9 = fmul fast double %5, %8
+  ret double %9
+
+; CHECK-LABEL: @_Z19different_argumentsdd(
+; CHECK-NEXT:  %3 = call fast double @llvm.cos.f64(double %0)
+; CHECK-NEXT:  %4 = call fast double @llvm.sin.f64(double %1)
+; CHECK-NEXT:  %5 = fmul fast double %3, %4
+; CHECK-NEXT:  ret double %5
+}
+
+attributes #8 = { nounwind readnone }