Index: llvm/include/llvm/Analysis/ConstantFolding.h
===================================================================
--- llvm/include/llvm/Analysis/ConstantFolding.h
+++ llvm/include/llvm/Analysis/ConstantFolding.h
@@ -31,6 +31,7 @@
 class GlobalValue;
 class Instruction;
 class TargetLibraryInfo;
+class TargetTransformInfo;
 class Type;
 
 /// If this constant is a constant offset from a global, return the global and
@@ -47,7 +48,8 @@
 /// this function can only fail when attempting to fold instructions like loads
 /// and stores, which have no constant expression form.
 Constant *ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
-                                  const TargetLibraryInfo *TLI = nullptr);
+                                  const TargetLibraryInfo *TLI = nullptr,
+                                  const TargetTransformInfo *TTI = nullptr);
 
 /// ConstantFoldConstant - Fold the constant using the specified DataLayout.
 /// This function always returns a non-null constant: Either the folding result,
@@ -63,7 +65,8 @@
 ///
 Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops,
                                    const DataLayout &DL,
-                                   const TargetLibraryInfo *TLI = nullptr);
+                                   const TargetLibraryInfo *TLI = nullptr,
+                                   const TargetTransformInfo *TTI = nullptr);
 
 /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
 /// instruction (icmp/fcmp) with the specified operands.  If it fails, it
Index: llvm/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -824,6 +824,10 @@
   /// Return true if the hardware has a fast square-root instruction.
   bool haveFastSqrt(Type *Ty) const;
 
+  /// Return true if folding a floating-point instruction to a constant
+  /// should produce zero instead of a denormal
+  bool enableFPDenormalFlushToZero(const Instruction &Inst) const;
+
   /// Return true if it is faster to check if a floating-point value is NaN
   /// (or not-NaN) versus a comparison against a constant FP zero value.
   /// Targets should override this if materializing a 0.0 for comparison is
@@ -1597,6 +1601,7 @@
                                               bool *Fast) = 0;
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
   virtual bool haveFastSqrt(Type *Ty) = 0;
+  virtual bool enableFPDenormalFlushToZero(const Instruction &Inst) = 0;
   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
   virtual InstructionCost getFPOpCost(Type *Ty) = 0;
   virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
@@ -2059,6 +2064,10 @@
   }
   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
 
+  bool enableFPDenormalFlushToZero(const Instruction &Inst) override {
+    return Impl.enableFPDenormalFlushToZero(Inst);
+  }
+
   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
   }
Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -361,6 +361,10 @@
 
   bool haveFastSqrt(Type *Ty) const { return false; }
 
+  bool enableFPDenormalFlushToZero(const Instruction &Inst) const {
+    return false;
+  }
+
   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
 
   InstructionCost getFPOpCost(Type *Ty) const {
Index: llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
===================================================================
--- llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -43,10 +43,6 @@
 /// This class provides both the logic to recursively visit instructions and
 /// combine them.
 class LLVM_LIBRARY_VISIBILITY InstCombiner {
-  /// Only used to call target specific intrinsic combining.
-  /// It must **NOT** be used for any other purpose, as InstCombine is a
-  /// target-independent canonicalization transform.
-  TargetTransformInfo &TTI;
 
 public:
   /// Maximum size of array considered when transforming.
@@ -69,6 +65,10 @@
   // Required analyses.
   AssumptionCache &AC;
   TargetLibraryInfo &TLI;
+  /// Only used to call target specific intrinsic combining.
+  /// It must **NOT** be used for any other purpose, as InstCombine is a
+  /// target-independent canonicalization transform.
+  TargetTransformInfo &TTI;
   DominatorTree &DT;
   const DataLayout &DL;
   const SimplifyQuery SQ;
@@ -89,8 +89,8 @@
                DominatorTree &DT, OptimizationRemarkEmitter &ORE,
                BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
                const DataLayout &DL, LoopInfo *LI)
-      : TTI(TTI), Builder(Builder), Worklist(Worklist),
-        MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
+      : Builder(Builder), Worklist(Worklist),
+        MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), TTI(TTI), DT(DT), DL(DL),
         SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
 
   virtual ~InstCombiner() {}
Index: llvm/lib/Analysis/ConstantFolding.cpp
===================================================================
--- llvm/lib/Analysis/ConstantFolding.cpp
+++ llvm/lib/Analysis/ConstantFolding.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/Config/config.h"
@@ -1006,14 +1007,47 @@
 Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
                                        ArrayRef<Constant *> Ops,
                                        const DataLayout &DL,
-                                       const TargetLibraryInfo *TLI) {
+                                       const TargetLibraryInfo *TLI,
+                                       const TargetTransformInfo *TTI = nullptr) {
   Type *DestTy = InstOrCE->getType();
 
-  if (Instruction::isUnaryOp(Opcode))
-    return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
+  if (Instruction::isUnaryOp(Opcode)) {
+    Constant *C = ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
+    if (auto *CFP = dyn_cast<ConstantFP>(C)) {
+      /// If folding produces a floating point denormal, check whether
+      /// it should be forced to zero.
+      if (CFP->getValueAPF().isDenormal()) {
+        if (auto *I = dyn_cast<Instruction>(InstOrCE)) {
+          /// If TTI is not available to determine support for flushing
+          /// denormals to zero, do not fold the instruction.
+          if (!TTI)
+            return nullptr;
+          if (TTI->enableFPDenormalFlushToZero(*I))
+            return Constant::getNullValue(C->getType());
+        }
+      }
+    }
+    return C;
+  }
 
-  if (Instruction::isBinaryOp(Opcode))
-    return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
+  if (Instruction::isBinaryOp(Opcode)) {
+    Constant *C = ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
+    if (auto *CFP = dyn_cast<ConstantFP>(C)) {
+      /// If folding produces a floating point denormal, check whether
+      /// it should be forced to zero.
+      if (CFP->getValueAPF().isDenormal()) {
+        if (auto *I = dyn_cast<Instruction>(InstOrCE)) {
+          /// If TTI is not available to determine support for flushing
+          /// denormals to zero, do not fold the instruction.
+          if (!TTI)
+            return nullptr;
+          if (TTI->enableFPDenormalFlushToZero(*I))
+            return Constant::getNullValue(C->getType());
+        }
+      }
+    }
+    return C;
+  }
 
   if (Instruction::isCast(Opcode))
     return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
@@ -1106,7 +1140,8 @@
 } // end anonymous namespace
 
 Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
-                                        const TargetLibraryInfo *TLI) {
+                                        const TargetLibraryInfo *TLI,
+                                        const TargetTransformInfo *TTI) {
   // Handle PHI nodes quickly here...
   if (auto *PN = dyn_cast<PHINode>(I)) {
     Constant *CommonValue = nullptr;
@@ -1166,7 +1201,7 @@
   if (auto *EVI = dyn_cast<ExtractValueInst>(I))
     return ConstantExpr::getExtractValue(Ops[0], EVI->getIndices());
 
-  return ConstantFoldInstOperands(I, Ops, DL, TLI);
+  return ConstantFoldInstOperands(I, Ops, DL, TLI, TTI);
 }
 
 Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
@@ -1178,8 +1213,9 @@
 Constant *llvm::ConstantFoldInstOperands(Instruction *I,
                                          ArrayRef<Constant *> Ops,
                                          const DataLayout &DL,
-                                         const TargetLibraryInfo *TLI) {
-  return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
+                                         const TargetLibraryInfo *TLI,
+                                         const TargetTransformInfo *TTI) {
+  return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI, TTI);
 }
 
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate,
Index: llvm/lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Analysis/TargetTransformInfo.cpp
+++ llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -547,6 +547,11 @@
   return TTIImpl->haveFastSqrt(Ty);
 }
 
+bool TargetTransformInfo::enableFPDenormalFlushToZero(
+    const Instruction &Inst) const {
+  return TTIImpl->enableFPDenormalFlushToZero(Inst);
+}
+
 bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
   return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
 }
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -67,6 +67,8 @@
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
+  bool enableFPDenormalFlushToZero(const Instruction &Inst) const;
+
   /// \name Scalar TTI Implementations
   /// @{
 
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -50,6 +50,25 @@
   return (CallerBits & CalleeBits) == CalleeBits;
 }
 
+bool AArch64TTIImpl::enableFPDenormalFlushToZero(
+    const Instruction &Inst) const {
+  if (!Inst.isFast())
+    return false;
+
+  switch (Inst.getOpcode()) {
+  case Instruction::FNeg:
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
 /// Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h
===================================================================
--- llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -106,6 +106,8 @@
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
+  bool enableFPDenormalFlushToZero(const Instruction &Inst) const;
+
   bool enableInterleavedAccessVectorization() { return true; }
 
   TTI::AddressingModeKind
Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -100,6 +100,24 @@
   return MatchExact && MatchSubset;
 }
 
+bool ARMTTIImpl::enableFPDenormalFlushToZero(const Instruction &Inst) const {
+  if (!Inst.isFast())
+    return false;
+
+  switch (Inst.getOpcode()) {
+  case Instruction::FNeg:
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
 TTI::AddressingModeKind
 ARMTTIImpl::getPreferredAddressingMode(const Loop *L,
                                        ScalarEvolution *SE) const {
Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3992,7 +3992,7 @@
     // Instruction isn't dead, see if we can constant propagate it.
     if (!I->use_empty() &&
         (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
-      if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
+      if (Constant *C = ConstantFoldInstruction(I, DL, &TLI, &TTI)) {
         LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
                           << '\n');
 
@@ -4199,6 +4199,7 @@
 /// whose condition is a known constant, we only visit the reachable successors.
 static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
                                           const TargetLibraryInfo *TLI,
+                                          const TargetTransformInfo *TTI,
                                           InstructionWorklist &ICWorklist) {
   bool MadeIRChange = false;
   SmallPtrSet<BasicBlock *, 32> Visited;
@@ -4220,7 +4221,7 @@
       // ConstantProp instruction if trivially constant.
       if (!Inst.use_empty() &&
           (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
-        if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
+        if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI, TTI)) {
           LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
                             << '\n');
           Inst.replaceAllUsesWith(C);
@@ -4366,7 +4367,7 @@
     LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
                       << F.getName() << "\n");
 
-    MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
+    MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, &TTI, Worklist);
 
     InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
                         ORE, BFI, PSI, DL, LI);
Index: llvm/test/Transforms/InstCombine/AArch64/constant-fold-fp-denormal.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/constant-fold-fp-denormal.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64--linux-gnu"
+
+define float @test_float() {
+; CHECK-LABEL: @test_float(
+; CHECK-NEXT:    ret float 0x3800000000000000
+  %mul = fmul float 0x3810000000000000, 5.000000e-01
+  ret float %mul
+}
+
+define double @test_double() {
+; CHECK-LABEL: @test_double(
+; CHECK-NEXT:    ret double 0x8000000000000
+  %mul = fmul double 0x10000000000000, 5.000000e-01
+  ret double %mul
+}
+
+define float @test_float_fast() {
+; CHECK-LABEL: @test_float_fast(
+; CHECK-NEXT:    ret float 0.000000e+00
+  %mul = fmul fast float 0x3810000000000000, 5.000000e-01
+  ret float %mul
+}
+
+define double @test_double_fast() {
+; CHECK-LABEL: @test_double_fast(
+; CHECK-NEXT:    ret double 0.000000e+00
+  %mul = fmul fast double 0x10000000000000, 5.000000e-01
+  ret double %mul
+}
Index: llvm/test/Transforms/InstCombine/ARM/constant-fold-fp-denormal.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InstCombine/ARM/constant-fold-fp-denormal.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "armv8-arm-none-eabi"
+
+define float @test_float() {
+; CHECK-LABEL: @test_float(
+; CHECK-NEXT:    ret float 0x3800000000000000
+  %mul = fmul float 0x3810000000000000, 5.000000e-01
+  ret float %mul
+}
+
+define double @test_double() {
+; CHECK-LABEL: @test_double(
+; CHECK-NEXT:    ret double 0x8000000000000
+  %mul = fmul double 0x10000000000000, 5.000000e-01
+  ret double %mul
+}
+
+define float @test_float_fast() {
+; CHECK-LABEL: @test_float_fast(
+; CHECK-NEXT:    ret float 0.000000e+00
+  %mul = fmul fast float 0x3810000000000000, 5.000000e-01
+  ret float %mul
+}
+
+define double @test_double_fast() {
+; CHECK-LABEL: @test_double_fast(
+; CHECK-NEXT:    ret double 0.000000e+00
+  %mul = fmul fast double 0x10000000000000, 5.000000e-01
+  ret double %mul
+}