diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -163,6 +163,18 @@
   return IC.replaceInstUsesWith(II, NewCall);
 }
 
+static bool isConstantFPZero(Value *V) {
+  if (auto *C = dyn_cast<ConstantFP>(V))
+    return C->isZero();
+  return false;
+}
+
+static bool isConstantFPFiniteNonZero(Value *V) {
+  if (auto *C = dyn_cast<ConstantFP>(V))
+    return !C->isZero() && !C->isInfinity() && !C->isNaN();
+  return false;
+}
+
 Optional<Instruction *>
 GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
   Intrinsic::ID IID = II.getIntrinsicID();
@@ -823,6 +835,36 @@
 
     break;
   }
+  case Intrinsic::amdgcn_fmul_legacy: {
+    Value *Op0 = II.getArgOperand(0);
+    Value *Op1 = II.getArgOperand(1);
+
+    // The legacy behaviour is that multiplying zero by anything, even NaN or
+    // infinity, gives +0.0.
+    // TODO: Move to InstSimplify?
+    if (isConstantFPZero(Op0) || isConstantFPZero(Op1))
+      return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
+
+    // If we can prove we don't have one of the special cases then we can use a
+    // normal fmul instruction instead.
+    auto *TLI = &IC.getTargetLibraryInfo();
+    bool CanSimplifyToMul = false;
+    if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
+        isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
+      // Neither operand is infinity or NaN.
+      CanSimplifyToMul = true;
+    } else if (isConstantFPFiniteNonZero(Op0) ||
+               isConstantFPFiniteNonZero(Op1)) {
+      // One operand is not zero or infinity or NaN.
+      CanSimplifyToMul = true;
+    }
+    if (CanSimplifyToMul) {
+      auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
+      FMul->takeName(&II);
+      return IC.replaceInstUsesWith(II, FMul);
+    }
+    break;
+  }
   default: {
     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
             AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s
+
+; Simplify to +0.0.
+define float @test_zero(float %x) {
+; CHECK-LABEL: @test_zero(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %call = call float @llvm.amdgcn.fmul.legacy(float %x, float 0.0)
+  ret float %call
+}
+
+; Simplify to +0.0.
+define float @test_negzero(float %y) {
+; CHECK-LABEL: @test_negzero(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %call = call float @llvm.amdgcn.fmul.legacy(float -0.0, float %y)
+  ret float %call
+}
+
+; Combine to fmul because the constant is finite and non-zero.
+define float @test_const(float %x) {
+; CHECK-LABEL: @test_const(
+; CHECK-NEXT:    [[CALL:%.*]] = fmul float [[X:%.*]], 9.950000e+01
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %call = call float @llvm.amdgcn.fmul.legacy(float %x, float 99.5)
+  ret float %call
+}
+
+; Combine to fmul because neither argument can be infinity or NaN.
+define float @test_finite(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_finite(
+; CHECK-NEXT:    [[XF:%.*]] = sitofp i32 [[X:%.*]] to float
+; CHECK-NEXT:    [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float
+; CHECK-NEXT:    [[CALL:%.*]] = fmul float [[XF]], [[YF]]
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %xf = sitofp i32 %x to float
+  %yf = sitofp i32 %y to float
+  %call = call float @llvm.amdgcn.fmul.legacy(float %xf, float %yf)
+  ret float %call
+}
+
+declare float @llvm.amdgcn.fmul.legacy(float, float)