diff --git a/clang/docs/HLSL/EntryFunctions.rst b/clang/docs/HLSL/EntryFunctions.rst
--- a/clang/docs/HLSL/EntryFunctions.rst
+++ b/clang/docs/HLSL/EntryFunctions.rst
@@ -46,7 +46,9 @@
 their semantic values populated, and a call to the user-defined function.
 After the call instruction the return value (if any) is saved using a
 target-appropriate intrinsic for storing outputs (for DirectX, the
-``llvm.dx.store.output``). Global destructors are not supported in HLSL.
+``llvm.dx.store.output``). Lastly, any present global destructors will be called
+immediately before the return. HLSL does not support C++ ``atexit``
+registrations, instead calls to global destructors are compile-time generated.
 
 .. note::
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -46,7 +46,7 @@
   virtual ~CGHLSLRuntime() {}
 
   void annotateHLSLResource(const VarDecl *D, llvm::GlobalVariable *GV);
-  void generateGlobalCtorCalls();
+  void generateGlobalCtorDtorCalls();
 
   void finishCodeGen();
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -55,7 +55,7 @@
   if (T.getArch() == Triple::ArchType::dxil)
     addDxilValVersion(TargetOpts.DxilValidatorVersion, M);
 
-  generateGlobalCtorCalls();
+  generateGlobalCtorDtorCalls();
 }
 
 void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) {
@@ -146,12 +146,13 @@
   B.CreateRetVoid();
 }
 
-void CGHLSLRuntime::generateGlobalCtorCalls() {
-  llvm::Module &M = CGM.getModule();
-  const auto *GlobalCtors = M.getNamedGlobal("llvm.global_ctors");
-  if (!GlobalCtors)
+static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M,
+                            bool CtorOrDtor) {
+  const auto *GV =
+      M.getNamedGlobal(CtorOrDtor ? "llvm.global_ctors" : "llvm.global_dtors");
+  if (!GV)
     return;
-  const auto *CA = dyn_cast<ConstantArray>(GlobalCtors->getInitializer());
+  const auto *CA = dyn_cast<ConstantArray>(GV->getInitializer());
   if (!CA)
     return;
   // The global_ctor array elements are a struct [Priority, Fn *, COMDat].
@@ -168,8 +169,16 @@
            "HLSL doesn't support setting priority for global ctors.");
     assert(isa<ConstantPointerNull>(CS->getOperand(2)) &&
            "HLSL doesn't support COMDat for global ctors.");
-    CtorFns.push_back(cast<Function>(CS->getOperand(1)));
+    Fns.push_back(cast<Function>(CS->getOperand(1)));
   }
+}
+
+void CGHLSLRuntime::generateGlobalCtorDtorCalls() {
+  llvm::Module &M = CGM.getModule();
+  SmallVector<Function *> CtorFns;
+  SmallVector<Function *> DtorFns;
+  gatherFunctions(CtorFns, M, true);
+  gatherFunctions(DtorFns, M, false);
 
   // Insert a call to the global constructor at the beginning of the entry block
   // to externally exported functions. This is a bit of a hack, but HLSL allows
@@ -180,5 +189,10 @@
     IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin());
     for (auto *Fn : CtorFns)
       B.CreateCall(FunctionCallee(Fn));
+
+    // Insert global dtors before the terminator of the last instruction
+    B.SetInsertPoint(F.back().getTerminator());
+    for (auto *Fn : DtorFns)
+      B.CreateCall(FunctionCallee(Fn));
   }
 }
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -2348,6 +2348,10 @@
   if (D.getTLSKind())
     return emitGlobalDtorWithTLRegDtor(CGF, D, Dtor, Addr);
 
+  // HLSL doesn't support atexit.
+  if (CGM.getLangOpts().HLSL)
+    return CGM.AddCXXDtorEntry(Dtor, Addr);
+
   // The default behavior is to use atexit.
   CGF.registerGlobalDtorWithAtExit(D, Dtor, Addr);
 }
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -10,6 +10,10 @@
   i = 12;
 }
 
+__attribute__((destructor)) void call_me_last(void) {
+  i = 0;
+}
+
 [numthreads(1,1,1)]
 void main(unsigned GI : SV_GroupIndex) {}
 
@@ -19,4 +23,5 @@
 //CHECK-NEXT:   call void @"?then_call_me@@YAXXZ"()
 //CHECK-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
 //CHECK-NEXT:   call void @"?main@@YAXI@Z"(i32 %0)
+//CHECK-NEXT:   call void @"?call_me_last@@YAXXZ"(
 //CHECK-NEXT:   ret void
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -S -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+
+struct Tail {
+  Tail() {
+    add(1);
+  }
+
+  ~Tail() {
+    add(-1);
+  }
+
+  void add(int V) {
+    static int Count = 0;
+    Count += V;
+  }
+};
+
+struct Pupper {
+  static int Count;
+
+  Pupper() {
+    Count += 1; // :)
+  }
+
+  ~Pupper() {
+    Count -= 1; // :(
+  }
+} GlobalPup;
+
+void Wag() {
+  static Tail T;
+  T.add(0);
+}
+
+int Pupper::Count = 0;
+
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+  Wag();
+}
+
+//CHECK:      define void @main()
+//CHECK-NEXT: entry:
+//CHECK-NEXT:   call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
+//CHECK-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+//CHECK-NEXT:   call void @"?main@@YAXI@Z"(i32 %0)
+//CHECK-NEXT:   call void @_GLOBAL__D_a()
+//CHECK-NEXT:   ret void
+
+// This is really just a sanity check I needed for myself to verify that
+// function scope static variables also get destroyed properly.
+
+//CHECK: define internal void @_GLOBAL__D_a()
+//CHECK-NEXT: entry:
+//CHECK-NEXT:   call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A")
+//CHECK-NEXT:   call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A")
+//CHECK-NEXT:   ret void