diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -467,6 +467,8 @@
 /// propagate signaling NaN inputs per IEEE 754-2008 (AMDGPU Only)
 CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1)
 
+CODEGENOPT(GPUFastRelaxedMath, 1, 0)
+
 // Whether to emit Swift Async function extended frame information: auto,
 // never, always.
 ENUM_CODEGENOPT(SwiftAsyncFramePointer, SwiftAsyncFramePointerKind, 2,
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -964,6 +964,10 @@
   Alias<fgpu_flush_denormals_to_zero>;
 def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">,
   Alias<fno_gpu_flush_denormals_to_zero>;
+defm gpu_fast_relaxed_math : BoolFOption<"gpu-fast-relaxed-math",
+  CodeGenOpts<"GPUFastRelaxedMath">, DefaultFalse,
+  PosFlag<SetTrue, [CC1Option], "Use relaxed math for the AMDGPU math library">,
+  NegFlag<SetFalse>>;
 defm gpu_rdc : BoolFOption<"gpu-rdc",
   LangOpts<"GPURelocatableDeviceCode">, DefaultFalse,
   PosFlag<SetTrue, [CC1Option], "Generate relocatable device code, also known as separate compilation mode">,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -919,6 +919,7 @@
   if (getCodeGenOpts().SkipRaxSetup)
     getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1);
 
+  getTargetCodeGenInfo().emitTargetGlobals(*this);
   getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames);
 
   EmitBackendOptionsMetadata(getCodeGenOpts());
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -63,6 +63,9 @@
       CodeGen::CodeGenModule &CGM,
       const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {}
 
+  /// Provides a convenient hook to handle extra target-specific globals.
+  virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {}
+
   /// Any further codegen related checks that need to be done on a function call
   /// in a target specific manner.
   virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -34,6 +34,7 @@
 #include "llvm/IR/IntrinsicsS390.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm> // std::sort
 
@@ -9307,6 +9308,8 @@
   void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
                                  CodeGenModule &CGM) const;
 
+  void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
+
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
@@ -9422,6 +9425,61 @@
   }
 }
 
+void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
+    CodeGen::CodeGenModule &CGM) const {
+  if (!CGM.getTriple().isAMDGCN())
+    return;
+  StringRef CPU = CGM.getTarget().getTargetOpts().CPU;
+  // Check if we have any function declarations of `__ocml` or `__oclc`
+  llvm::AMDGPU::GPUKind Kind = llvm::AMDGPU::parseArchAMDGCN(CPU);
+  unsigned Features = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
+  if (Kind == llvm::AMDGPU::GK_NONE)
+    return;
+
+  unsigned Minor;
+  unsigned Major;
+  StringRef Identifier = CPU.drop_while([](char C) { return !isDigit(C); });
+  if (Identifier.take_back(2).getAsInteger(16, Minor) ||
+      Identifier.drop_back(2).getAsInteger(10, Major))
+    return;
+
+  auto AddGlobal = [&](StringRef Name, unsigned Value, unsigned Size = 8) {
+    if (CGM.getModule().getNamedGlobal(Name))
+      return;
+
+    auto *Type =
+        llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), Size);
+    auto *GV = new llvm::GlobalVariable(
+        CGM.getModule(), Type, true,
+        llvm::GlobalValue::LinkageTypes::WeakODRLinkage,
+        llvm::ConstantInt::get(Type, Value), Name, nullptr,
+        llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
+    GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
+    GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
+    GV->setAlignment(CGM.getDataLayout().getABITypeAlign(Type));
+  };
+
+  // TODO: Add flags to toggle these as-needed.
+  bool DenormAtZero = !((Features & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
+                        (Features & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
+  bool Wavefront64 = !(Features & llvm::AMDGPU::FEATURE_WAVE32);
+  bool FastRelaxedMath = CGM.getCodeGenOpts().GPUFastRelaxedMath;
+  bool FiniteOnly = false;
+  bool UnsafeMath = false;
+  bool CorrectSqrt = true;
+
+  // Control constants for math operations.
+  AddGlobal("__oclc_daz_opt", DenormAtZero);
+  AddGlobal("__oclc_wavefrontsize64", Wavefront64);
+  AddGlobal("__oclc_finite_only_opt", FiniteOnly || FastRelaxedMath);
+  AddGlobal("__oclc_unsafe_math_opt", UnsafeMath || FastRelaxedMath);
+  AddGlobal("__oclc_correctly_rounded_sqrt32", CorrectSqrt);
+
+  // Control constants for the system.
+  AddGlobal("__oclc_ISA_version", Minor + Major * 1000, 32);
+  AddGlobal("__oclc_ABI_version", 400, 32);
+}
+
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
   if (requiresAMDGPUProtectedVisibility(D, GV)) {
diff --git a/clang/test/CodeGen/amdgcn-occl-constants.c b/clang/test/CodeGen/amdgcn-occl-constants.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/amdgcn-occl-constants.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -fgpu-fast-relaxed-math \
+// RUN:   -S -emit-llvm -o - %s | FileCheck %s --check-prefix=FAST
+
+void foo() {}
+
+// CHECK: @__oclc_daz_opt = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1
+// CHECK: @__oclc_wavefrontsize64 = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1
+// CHECK: @__oclc_finite_only_opt = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1
+// CHECK: @__oclc_unsafe_math_opt = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1
+// CHECK: @__oclc_correctly_rounded_sqrt32 = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1
+// CHECK: @__oclc_ISA_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 9010, align 4
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400, align 4
+
+// FAST: @__oclc_daz_opt = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 0, align 1
+// FAST: @__oclc_wavefrontsize64 = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1
+// FAST: @__oclc_finite_only_opt = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1
+// FAST: @__oclc_unsafe_math_opt = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1
+// FAST: @__oclc_correctly_rounded_sqrt32 = weak_odr hidden local_unnamed_addr addrspace(4) constant i8 1, align 1
+// FAST: @__oclc_ISA_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 9010, align 4
+// FAST: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400, align 4