diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -46,7 +46,6 @@ virtual ~CGHLSLRuntime() {} void annotateHLSLResource(const VarDecl *D, llvm::GlobalVariable *GV); - void generateGlobalCtorDtorCalls(); void finishCodeGen(); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -61,7 +61,6 @@ if (T.getArch() == Triple::ArchType::dxil) addDxilValVersion(TargetOpts.DxilValidatorVersion, M); - generateGlobalCtorDtorCalls(); if (CGM.getCodeGenOpts().OptimizationLevel == 0) addDisableOptimizations(M); } @@ -162,64 +161,3 @@ // See: https://github.com/llvm/llvm-project/issues/57875 B.CreateRetVoid(); } - -static void gatherFunctions(SmallVectorImpl &Fns, llvm::Module &M, - bool CtorOrDtor) { - const auto *GV = - M.getNamedGlobal(CtorOrDtor ? "llvm.global_ctors" : "llvm.global_dtors"); - if (!GV) - return; - const auto *CA = dyn_cast(GV->getInitializer()); - if (!CA) - return; - // The global_ctor array elements are a struct [Priority, Fn *, COMDat]. - // HLSL neither supports priorities or COMDat values, so we will check those - // in an assert but not handle them. - - llvm::SmallVector CtorFns; - for (const auto &Ctor : CA->operands()) { - if (isa(Ctor)) - continue; - ConstantStruct *CS = cast(Ctor); - - assert(cast(CS->getOperand(0))->getValue() == 65535 && - "HLSL doesn't support setting priority for global ctors."); - assert(isa(CS->getOperand(2)) && - "HLSL doesn't support COMDat for global ctors."); - Fns.push_back(cast(CS->getOperand(1))); - } -} - -void CGHLSLRuntime::generateGlobalCtorDtorCalls() { - llvm::Module &M = CGM.getModule(); - SmallVector CtorFns; - SmallVector DtorFns; - gatherFunctions(CtorFns, M, true); - gatherFunctions(DtorFns, M, false); - - // Insert a call to the global constructor at the beginning of the entry block - // to externally exported functions. This is a bit of a hack, but HLSL allows - // global constructors, but doesn't support driver initialization of globals. - for (auto &F : M.functions()) { - if (!F.hasFnAttribute("hlsl.shader")) - continue; - IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin()); - for (auto *Fn : CtorFns) - B.CreateCall(FunctionCallee(Fn)); - - // Insert global dtors before the terminator of the last instruction - B.SetInsertPoint(F.back().getTerminator()); - for (auto *Fn : DtorFns) - B.CreateCall(FunctionCallee(Fn)); - } - - // No need to keep global ctors/dtors for non-lib profile after call to - // ctors/dtors added for entry. - Triple T(M.getTargetTriple()); - if (T.getEnvironment() != Triple::EnvironmentType::Library) { - if (auto *GV = M.getNamedGlobal("llvm.global_ctors")) - GV->eraseFromParent(); - if (auto *GV = M.getNamedGlobal("llvm.global_dtors")) - GV->eraseFromParent(); - } -} diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl deleted file mode 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -S -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s - -int i; - -__attribute__((constructor)) void call_me_first(void) { - i = 12; -} - -__attribute__((constructor)) void then_call_me(void) { - i = 12; -} - -__attribute__((destructor)) void call_me_last(void) { - i = 0; -} - -[numthreads(1,1,1)] -void main(unsigned GI : SV_GroupIndex) {} - -// Make sure global variable for ctors/dtors removed. -// CHECK-NOT:@llvm.global_ctors -// CHECK-NOT:@llvm.global_dtors - -//CHECK: define void @main() -//CHECK-NEXT: entry: -//CHECK-NEXT: call void @"?call_me_first@@YAXXZ"() -//CHECK-NEXT: call void @"?then_call_me@@YAXXZ"() -//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() -//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) -//CHECK-NEXT: call void @"?call_me_last@@YAXXZ"( -//CHECK-NEXT: ret void diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl deleted file mode 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl +++ /dev/null @@ -1,23 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -S -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s - -// Make sure global variable for ctors exist for lib profile. -// CHECK:@llvm.global_ctors - -RWBuffer Buffer; - -[shader("compute")] -[numthreads(1,1,1)] -void FirstEntry() {} - -// CHECK: define void @FirstEntry() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() - -[shader("compute")] -[numthreads(1,1,1)] -void SecondEntry() {} - -// CHECK: define void @SecondEntry() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() -// CHECK-NEXT: call void @"?SecondEntry@@YAXXZ"() diff --git a/clang/test/CodeGenHLSL/GlobalConstructors.hlsl b/clang/test/CodeGenHLSL/GlobalConstructors.hlsl deleted file mode 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructors.hlsl +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -S -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s - -RWBuffer Buffer; - -[numthreads(1,1,1)] -void main(unsigned GI : SV_GroupIndex) {} - -// Make sure global variable for ctors/dtors removed. -// CHECK-NOT:@llvm.global_ctors -// CHECK-NOT:@llvm.global_dtors -//CHECK: define void @main() -//CHECK-NEXT: entry: -//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructors.hlsl() -//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() -//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) -//CHECK-NEXT: ret void diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl deleted file mode 100644 --- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl +++ /dev/null @@ -1,66 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -S -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -S -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK - -// Make sure global variable for dtors exist for lib profile. -// LIB:@llvm.global_dtors -// Make sure global variable for dtors removed for compute profile. -// CS-NOT:llvm.global_dtors - -struct Tail { - Tail() { - add(1); - } - - ~Tail() { - add(-1); - } - - void add(int V) { - static int Count = 0; - Count += V; - } -}; - -struct Pupper { - static int Count; - - Pupper() { - Count += 1; // :) - } - - ~Pupper() { - Count -= 1; // :( - } -} GlobalPup; - -void Wag() { - static Tail T; - T.add(0); -} - -int Pupper::Count = 0; - -[numthreads(1,1,1)] -void main(unsigned GI : SV_GroupIndex) { - Wag(); -} - -// Make sure global variable for ctors/dtors removed. -// CHECK-NOT:@llvm.global_ctors -// CHECK-NOT:@llvm.global_dtors -//CHECK: define void @main() -//CHECK-NEXT: entry: -//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl() -//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() -//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) -//CHECK-NEXT: call void @_GLOBAL__D_a() -//CHECK-NEXT: ret void - -// This is really just a sanity check I needed for myself to verify that -// function scope static variables also get destroyed properly. - -//CHECK: define internal void @_GLOBAL__D_a() -//CHECK-NEXT: entry: -//CHECK-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A") -//CHECK-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A") -//CHECK-NEXT: ret void diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt --- a/llvm/lib/Target/DirectX/CMakeLists.txt +++ b/llvm/lib/Target/DirectX/CMakeLists.txt @@ -24,6 +24,7 @@ DXILResource.cpp DXILResourceAnalysis.cpp DXILTranslateMetadata.cpp + GlobalCtorDtorCalls.cpp PointerTypeAnalysis.cpp LINK_COMPONENTS diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -17,6 +17,7 @@ #include "DirectX.h" #include "DirectXSubtarget.h" #include "DirectXTargetTransformInfo.h" +#include "GlobalCtorDtorCalls.h" #include "TargetInfo/DirectXTargetInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" @@ -103,12 +104,21 @@ PM.addPass(DXILResourcePrinterPass(dbgs())); return true; } + if (PassName == "global-ctor-dtor-calls") { + PM.addPass(GlobalCtorDtorCalls()); + return true; + } return false; }); PB.registerAnalysisRegistrationCallback([](ModuleAnalysisManager &MAM) { MAM.registerPass([&] { return DXILResourceAnalysis(); }); }); + + PB.registerPipelineEarlySimplificationEPCallback( + [](ModulePassManager &PM, OptimizationLevel Level) { + PM.addPass(GlobalCtorDtorCalls()); + }); } bool DirectXTargetMachine::addPassesToEmitFile( diff --git a/llvm/lib/Target/DirectX/GlobalCtorDtorCalls.h b/llvm/lib/Target/DirectX/GlobalCtorDtorCalls.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/DirectX/GlobalCtorDtorCalls.h @@ -0,0 +1,30 @@ +//===- Target/DirectX/GlobalCtorDtorCalls.h - calls on global ctor/dtor ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Transform pass to generate calls on global ctor and dtor. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX__GLOBALCTORDTORCALLS_H +#define LLVM_TARGET_DIRECTX__GLOBALCTORDTORCALLS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Transform pass to generate calls on global ctors and dtors. +class GlobalCtorDtorCalls : public PassInfoMixin { + +public: + explicit GlobalCtorDtorCalls() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/DirectX/GlobalCtorDtorCalls.cpp b/llvm/lib/Target/DirectX/GlobalCtorDtorCalls.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/DirectX/GlobalCtorDtorCalls.cpp @@ -0,0 +1,84 @@ +//===- Target/DirectX/GlobalCtorDtorCalls.cpp - calls on global ctor/dtor -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Transform pass to generate calls on global ctor and dtor. +// +//===----------------------------------------------------------------------===// + +#include "GlobalCtorDtorCalls.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" + +using namespace llvm; + +static void gatherFunctions(SmallVectorImpl &Fns, llvm::Module &M, + bool CtorOrDtor) { + const auto *GV = + M.getNamedGlobal(CtorOrDtor ? "llvm.global_ctors" : "llvm.global_dtors"); + if (!GV) + return; + const auto *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return; + + // The global_ctor array elements are a struct [Priority, Fn *, COMDat]. + // HLSL neither supports priorities or COMDat values, so we will check those + // in an assert but not handle them. + + llvm::SmallVector CtorFns; + for (const auto &Ctor : CA->operands()) { + if (isa(Ctor)) + continue; + ConstantStruct *CS = cast(Ctor); + + assert(cast(CS->getOperand(0))->getValue() == 65535 && + "HLSL doesn't support setting priority for global ctors."); + assert(isa(CS->getOperand(2)) && + "HLSL doesn't support COMDat for global ctors."); + Fns.push_back(cast(CS->getOperand(1))); + } +} + +PreservedAnalyses GlobalCtorDtorCalls::run(Module &M, + ModuleAnalysisManager &AM) { + SmallVector CtorFns; + SmallVector DtorFns; + gatherFunctions(CtorFns, M, true); + gatherFunctions(DtorFns, M, false); + + // Insert a call to the global constructor at the beginning of the entry block + // to externally exported functions. This is a bit of a hack, but HLSL allows + // global constructors, but doesn't support driver initialization of globals. + for (auto &F : M.functions()) { + if (!F.hasFnAttribute("hlsl.shader")) + continue; + IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin()); + for (auto *Fn : CtorFns) + B.CreateCall(FunctionCallee(Fn)); + + // Insert global dtors before the terminator of the last instruction + B.SetInsertPoint(F.back().getTerminator()); + for (auto *Fn : DtorFns) + B.CreateCall(FunctionCallee(Fn)); + } + + // No need to keep global ctors/dtors for non-lib profile after call to + // ctors/dtors added for entry. + Triple T(M.getTargetTriple()); + if (T.getEnvironment() != Triple::EnvironmentType::Library) { + if (auto *GV = M.getNamedGlobal("llvm.global_ctors")) + GV->eraseFromParent(); + if (auto *GV = M.getNamedGlobal("llvm.global_dtors")) + GV->eraseFromParent(); + } + + return PreservedAnalyses::all(); +} diff --git a/llvm/test/CodeGen/DirectX/global_constructor_cs.ll b/llvm/test/CodeGen/DirectX/global_constructor_cs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/global_constructor_cs.ll @@ -0,0 +1,85 @@ +; RUN: opt -S -passes="global-ctor-dtor-calls" < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.0-compute" + +; Make sure global variable for ctors/dtors removed. +; CHECK-NOT:@llvm.global_ctors +; CHECK-NOT:@llvm.global_dtors + + +%"class.hlsl::RWBuffer" = type { ptr } + +@"?Buffer@@3V?$RWBuffer@M@hlsl@@A" = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_GlobalConstructors.hlsl, ptr null }] + +; Function Attrs: nounwind +define internal void @"??__EBuffer@@YAXXZ"() #0 { +entry: + %call = call noundef ptr @"??0?$RWBuffer@M@hlsl@@QAA@XZ"(ptr noundef nonnull align 4 dereferenceable(4) @"?Buffer@@3V?$RWBuffer@M@hlsl@@A") + ret void +} + +; Function Attrs: inlinehint nounwind +define linkonce_odr noundef ptr @"??0?$RWBuffer@M@hlsl@@QAA@XZ"(ptr noundef nonnull returned align 4 dereferenceable(4) %this) unnamed_addr #1 align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + %0 = call ptr @llvm.dx.create.handle(i8 1) + %h = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %this1, i32 0, i32 0 + store ptr %0, ptr %h, align 4 + ret ptr %this1 +} + +; Function Attrs: norecurse nounwind +define internal void @"?main@@YAXI@Z"(i32 noundef %GI) #2 { +entry: + %GI.addr = alloca i32, align 4 + store i32 %GI, ptr %GI.addr, align 4 + ret void +} + +;CHECK: define void @main() +;CHECK-NEXT: entry: +;CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructors.hlsl() +;CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +;CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +;CHECK-NEXT: ret void +; Function Attrs: norecurse +define void @main() #3 { +entry: + %0 = call i32 @llvm.dx.flattened.thread.id.in.group() + call void @"?main@@YAXI@Z"(i32 %0) + ret void +} + +; Function Attrs: nounwind readnone willreturn +declare i32 @llvm.dx.flattened.thread.id.in.group() #4 + +; Function Attrs: nounwind willreturn +declare ptr @llvm.dx.create.handle(i8) #5 + +; Function Attrs: nounwind +define internal void @_GLOBAL__sub_I_GlobalConstructors.hlsl() #0 { +entry: + call void @"??__EBuffer@@YAXXZ"() + ret void +} + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { inlinehint nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind readnone willreturn } +attributes #5 = { nounwind willreturn } + +!hlsl.uavs = !{!0} +!llvm.module.flags = !{!1, !2} +!dx.valver = !{!3} + + +!0 = !{ptr @"?Buffer@@3V?$RWBuffer@M@hlsl@@A", !"RWBuffer", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 1, i32 7} diff --git a/llvm/test/CodeGen/DirectX/global_constructor_function.ll b/llvm/test/CodeGen/DirectX/global_constructor_function.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/global_constructor_function.ll @@ -0,0 +1,71 @@ +; RUN: opt -S -passes="global-ctor-dtor-calls" < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.0-compute" + +; Make sure global variable for ctors/dtors removed. +; CHECK-NOT:@llvm.global_ctors +; CHECK-NOT:@llvm.global_dtors + +@"?i@@3HA" = global i32 0, align 4 +@llvm.global_ctors = appending global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @"?call_me_first@@YAXXZ", ptr null }, { i32, ptr, ptr } { i32 65535, ptr @"?then_call_me@@YAXXZ", ptr null }] +@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @"?call_me_last@@YAXXZ", ptr null }] + +; Function Attrs: nounwind +define void @"?call_me_first@@YAXXZ"() #0 { +entry: + store i32 12, ptr @"?i@@3HA", align 4 + ret void +} + +; Function Attrs: nounwind +define void @"?then_call_me@@YAXXZ"() #0 { +entry: + store i32 12, ptr @"?i@@3HA", align 4 + ret void +} + +; Function Attrs: nounwind +define void @"?call_me_last@@YAXXZ"() #0 { +entry: + store i32 0, ptr @"?i@@3HA", align 4 + ret void +} + +; Function Attrs: norecurse nounwind +define internal void @"?main@@YAXI@Z"(i32 noundef %GI) #1 { +entry: + %GI.addr = alloca i32, align 4 + store i32 %GI, ptr %GI.addr, align 4 + ret void +} +;CHECK: define void @main() +;CHECK-NEXT: entry: +;CHECK-NEXT: call void @"?call_me_first@@YAXXZ"() +;CHECK-NEXT: call void @"?then_call_me@@YAXXZ"() +;CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +;CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +;CHECK-NEXT: call void @"?call_me_last@@YAXXZ"( +;CHECK-NEXT: ret void +; Function Attrs: norecurse +define void @main() #2 { +entry: + %0 = call i32 @llvm.dx.flattened.thread.id.in.group() + call void @"?main@@YAXI@Z"(i32 %0) + ret void +} + +; Function Attrs: nounwind readnone willreturn +declare i32 @llvm.dx.flattened.thread.id.in.group() #3 + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { nounwind readnone willreturn } + +!llvm.module.flags = !{!0, !1} +!dx.valver = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 7} diff --git a/llvm/test/CodeGen/DirectX/global_constructor_lib.ll b/llvm/test/CodeGen/DirectX/global_constructor_lib.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/global_constructor_lib.ll @@ -0,0 +1,84 @@ +; RUN: opt -S -passes="global-ctor-dtor-calls" < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.3-library" + +; Make sure global variable for ctors exist for lib profile. +; CHECK:@llvm.global_ctors + +%"class.hlsl::RWBuffer" = type { ptr } + +@"?Buffer@@3V?$RWBuffer@M@hlsl@@A" = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_GlobalConstructorLib.hlsl, ptr null }] + +; Function Attrs: nounwind +define internal void @"??__EBuffer@@YAXXZ"() #0 { +entry: + %call = call noundef ptr @"??0?$RWBuffer@M@hlsl@@QAA@XZ"(ptr noundef nonnull align 4 dereferenceable(4) @"?Buffer@@3V?$RWBuffer@M@hlsl@@A") + ret void +} + +; Function Attrs: inlinehint nounwind +define linkonce_odr noundef ptr @"??0?$RWBuffer@M@hlsl@@QAA@XZ"(ptr noundef nonnull returned align 4 dereferenceable(4) %this) unnamed_addr #1 align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + %0 = call ptr @llvm.dx.create.handle(i8 1) + %h = getelementptr inbounds %"class.hlsl::RWBuffer", ptr %this1, i32 0, i32 0 + store ptr %0, ptr %h, align 4 + ret ptr %this1 +} + +; Function Attrs: nounwind +define internal void @"?FirstEntry@@YAXXZ"() #0 { +entry: + ret void +} +; CHECK: define void @FirstEntry() +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() +define void @FirstEntry() #2 { +entry: + call void @"?FirstEntry@@YAXXZ"() + ret void +} + +; Function Attrs: nounwind +define internal void @"?SecondEntry@@YAXXZ"() #0 { +entry: + ret void +} +; CHECK: define void @SecondEntry() +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() +; CHECK-NEXT: call void @"?SecondEntry@@YAXXZ"() +define void @SecondEntry() #2 { +entry: + call void @"?SecondEntry@@YAXXZ"() + ret void +} + +; Function Attrs: nounwind willreturn +declare ptr @llvm.dx.create.handle(i8) #3 + +; Function Attrs: nounwind +define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() #0 { +entry: + call void @"??__EBuffer@@YAXXZ"() + ret void +} + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { inlinehint nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { nounwind willreturn } + +!hlsl.uavs = !{!0} +!llvm.module.flags = !{!1, !2} +!dx.valver = !{!3} + +!0 = !{ptr @"?Buffer@@3V?$RWBuffer@M@hlsl@@A", !"RWBuffer", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 1, i32 7} diff --git a/llvm/test/CodeGen/DirectX/global_destructor_cs.ll b/llvm/test/CodeGen/DirectX/global_destructor_cs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/global_destructor_cs.ll @@ -0,0 +1,185 @@ +; RUN: opt -S -passes="global-ctor-dtor-calls" < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.0-compute" + +; Make sure global variable for ctors/dtors removed. +; CHECK-NOT:@llvm.global_ctors +; CHECK-NOT:@llvm.global_dtors + +;CHECK: define void @main() +;CHECK-NEXT: entry: +;CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl() +;CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +;CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +;CHECK-NEXT: call void @_GLOBAL__D_a() +;CHECK-NEXT: ret void + +; This is really just a sanity check I needed for myself to verify that +; function scope static variables also get destroyed properly. + +;CHECK: define internal void @_GLOBAL__D_a() +;CHECK-NEXT: entry: +;CHECK-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A") +;CHECK-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A") +;CHECK-NEXT: ret void + +%struct.Pupper = type { i8 } +%struct.Tail = type { i8 } + +$"??1Pupper@@QAA@XZ" = comdat any + +$"??1Tail@@QAA@XZ" = comdat any + +@"?GlobalPup@@3UPupper@@A" = global %struct.Pupper zeroinitializer, align 1 +@"?T@?1??Wag@@YAXXZ@4UTail@@A" = internal global %struct.Tail zeroinitializer, align 1 +@"?$TSS0@?1??Wag@@YAXXZ@4HA" = internal global i32 0, align 4 +@_Init_thread_epoch = external thread_local global i32, align 4 +@"?Count@Pupper@@2HA" = global i32 0, align 4 +@"?Count@?1??add@Tail@@QAAXH@Z@4HA" = linkonce_odr global i32 0, align 4 +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_GlobalDestructors.hlsl, ptr null }] +@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__D_a, ptr null }] + +; Function Attrs: nounwind +define internal void @"??__EGlobalPup@@YAXXZ"() #0 { +entry: + %call = call noundef ptr @"??0Pupper@@QAA@XZ"(ptr noundef nonnull align 1 dereferenceable(1) @"?GlobalPup@@3UPupper@@A") + ret void +} + +; Function Attrs: nounwind +define linkonce_odr noundef ptr @"??0Pupper@@QAA@XZ"(ptr noundef nonnull returned align 1 dereferenceable(1) %this) unnamed_addr #0 align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + %0 = load i32, ptr @"?Count@Pupper@@2HA", align 4 + %add = add nsw i32 %0, 1 + store i32 %add, ptr @"?Count@Pupper@@2HA", align 4 + ret ptr %this1 +} + +; Function Attrs: nounwind +define linkonce_odr void @"??1Pupper@@QAA@XZ"(ptr noundef nonnull align 1 dereferenceable(1) %this) unnamed_addr #0 comdat align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + %0 = load i32, ptr @"?Count@Pupper@@2HA", align 4 + %sub = sub nsw i32 %0, 1 + store i32 %sub, ptr @"?Count@Pupper@@2HA", align 4 + ret void +} + +; Function Attrs: nounwind +define void @"?Wag@@YAXXZ"() #0 { +entry: + %0 = load atomic i32, ptr @"?$TSS0@?1??Wag@@YAXXZ@4HA" unordered, align 4 + %1 = load i32, ptr @_Init_thread_epoch, align 4 + %2 = icmp sgt i32 %0, %1 + br i1 %2, label %init.attempt, label %init.end + +init.attempt: ; preds = %entry + call void @_Init_thread_header(ptr @"?$TSS0@?1??Wag@@YAXXZ@4HA") #1 + %3 = load atomic i32, ptr @"?$TSS0@?1??Wag@@YAXXZ@4HA" unordered, align 4 + %4 = icmp eq i32 %3, -1 + br i1 %4, label %init, label %init.end + +init: ; preds = %init.attempt + %call = call noundef ptr @"??0Tail@@QAA@XZ"(ptr noundef nonnull align 1 dereferenceable(1) @"?T@?1??Wag@@YAXXZ@4UTail@@A") + call void @_Init_thread_footer(ptr @"?$TSS0@?1??Wag@@YAXXZ@4HA") #1 + br label %init.end + +init.end: ; preds = %init, %init.attempt, %entry + call void @"?add@Tail@@QAAXH@Z"(ptr noundef nonnull align 1 dereferenceable(1) @"?T@?1??Wag@@YAXXZ@4UTail@@A", i32 noundef 0) + ret void +} + +; Function Attrs: nounwind +declare void @_Init_thread_header(ptr) #1 + +; Function Attrs: nounwind +define linkonce_odr noundef ptr @"??0Tail@@QAA@XZ"(ptr noundef nonnull returned align 1 dereferenceable(1) %this) unnamed_addr #0 align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + call void @"?add@Tail@@QAAXH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this1, i32 noundef 1) + ret ptr %this1 +} + +; Function Attrs: nounwind +define linkonce_odr void @"??1Tail@@QAA@XZ"(ptr noundef nonnull align 1 dereferenceable(1) %this) unnamed_addr #0 comdat align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + call void @"?add@Tail@@QAAXH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this1, i32 noundef -1) + ret void +} + +; Function Attrs: nounwind +declare void @_Init_thread_footer(ptr) #1 + +; Function Attrs: nounwind +define linkonce_odr void @"?add@Tail@@QAAXH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this, i32 noundef %V) #0 align 2 { +entry: + %V.addr = alloca i32, align 4 + %this.addr = alloca ptr, align 4 + store i32 %V, ptr %V.addr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + %0 = load i32, ptr %V.addr, align 4 + %1 = load i32, ptr @"?Count@?1??add@Tail@@QAAXH@Z@4HA", align 4 + %add = add nsw i32 %1, %0 + store i32 %add, ptr @"?Count@?1??add@Tail@@QAAXH@Z@4HA", align 4 + ret void +} + +; Function Attrs: norecurse nounwind +define internal void @"?main@@YAXI@Z"(i32 noundef %GI) #2 { +entry: + %GI.addr = alloca i32, align 4 + store i32 %GI, ptr %GI.addr, align 4 + call void @"?Wag@@YAXXZ"() + ret void +} + +; Function Attrs: norecurse +define void @main() #3 { +entry: + %0 = call i32 @llvm.dx.flattened.thread.id.in.group() + call void @"?main@@YAXI@Z"(i32 %0) + ret void +} + +; Function Attrs: nounwind readnone willreturn +declare i32 @llvm.dx.flattened.thread.id.in.group() #4 + +; Function Attrs: nounwind +define internal void @_GLOBAL__sub_I_GlobalDestructors.hlsl() #0 { +entry: + call void @"??__EGlobalPup@@YAXXZ"() + ret void +} + +; Function Attrs: nounwind +define internal void @_GLOBAL__D_a() #0 { +entry: + call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A") + call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A") + ret void +} + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nounwind } +attributes #2 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind readnone willreturn } + +!llvm.module.flags = !{!0, !1} +!dx.valver = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 7} diff --git a/llvm/test/CodeGen/DirectX/global_destructor_lib.ll b/llvm/test/CodeGen/DirectX/global_destructor_lib.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/global_destructor_lib.ll @@ -0,0 +1,186 @@ +; RUN: opt -S -passes="global-ctor-dtor-calls" < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.3-library" + +; Make sure global variable for ctors/dtors exist. +; CHECK:@llvm.global_ctors +; CHECK:@llvm.global_dtors + +;CHECK: define void @main() +;CHECK-NEXT: entry: +;CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl() +;CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +;CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +;CHECK-NEXT: call void @_GLOBAL__D_a() +;CHECK-NEXT: ret void + +; This is really just a sanity check I needed for myself to verify that +; function scope static variables also get destroyed properly. + +;CHECK: define internal void @_GLOBAL__D_a() +;CHECK-NEXT: entry: +;CHECK-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A") +;CHECK-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A") +;CHECK-NEXT: ret void + +%struct.Pupper = type { i8 } +%struct.Tail = type { i8 } + +$"??1Pupper@@QAA@XZ" = comdat any + +$"??1Tail@@QAA@XZ" = comdat any + +@"?GlobalPup@@3UPupper@@A" = global %struct.Pupper zeroinitializer, align 1 +@"?T@?1??Wag@@YAXXZ@4UTail@@A" = internal global %struct.Tail zeroinitializer, align 1 +@"?$TSS0@?1??Wag@@YAXXZ@4HA" = internal global i32 0, align 4 +@_Init_thread_epoch = external thread_local global i32, align 4 +@"?Count@Pupper@@2HA" = global i32 0, align 4 +@"?Count@?1??add@Tail@@QAAXH@Z@4HA" = linkonce_odr global i32 0, align 4 +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_GlobalDestructors.hlsl, ptr null }] +@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__D_a, ptr null }] + +; Function Attrs: nounwind +define internal void @"??__EGlobalPup@@YAXXZ"() #0 { +entry: + %call = call noundef ptr @"??0Pupper@@QAA@XZ"(ptr noundef nonnull align 1 dereferenceable(1) @"?GlobalPup@@3UPupper@@A") + ret void +} + +; Function Attrs: nounwind +define linkonce_odr noundef ptr @"??0Pupper@@QAA@XZ"(ptr noundef nonnull returned align 1 dereferenceable(1) %this) unnamed_addr #0 align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + %0 = load i32, ptr @"?Count@Pupper@@2HA", align 4 + %add = add nsw i32 %0, 1 + store i32 %add, ptr @"?Count@Pupper@@2HA", align 4 + ret ptr %this1 +} + +; Function Attrs: nounwind +define linkonce_odr void @"??1Pupper@@QAA@XZ"(ptr noundef nonnull align 1 dereferenceable(1) %this) unnamed_addr #0 comdat align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + %0 = load i32, ptr @"?Count@Pupper@@2HA", align 4 + %sub = sub nsw i32 %0, 1 + store i32 %sub, ptr @"?Count@Pupper@@2HA", align 4 + ret void +} + +; Function Attrs: nounwind +define void @"?Wag@@YAXXZ"() #0 { +entry: + %0 = load atomic i32, ptr @"?$TSS0@?1??Wag@@YAXXZ@4HA" unordered, align 4 + %1 = load i32, ptr @_Init_thread_epoch, align 4 + %2 = icmp sgt i32 %0, %1 + br i1 %2, label %init.attempt, label %init.end + +init.attempt: ; preds = %entry + call void @_Init_thread_header(ptr @"?$TSS0@?1??Wag@@YAXXZ@4HA") #1 + %3 = load atomic i32, ptr @"?$TSS0@?1??Wag@@YAXXZ@4HA" unordered, align 4 + %4 = icmp eq i32 %3, -1 + br i1 %4, label %init, label %init.end + +init: ; preds = %init.attempt + %call = call noundef ptr @"??0Tail@@QAA@XZ"(ptr noundef nonnull align 1 dereferenceable(1) @"?T@?1??Wag@@YAXXZ@4UTail@@A") + call void @_Init_thread_footer(ptr @"?$TSS0@?1??Wag@@YAXXZ@4HA") #1 + br label %init.end + +init.end: ; preds = %init, %init.attempt, %entry + call void @"?add@Tail@@QAAXH@Z"(ptr noundef nonnull align 1 dereferenceable(1) @"?T@?1??Wag@@YAXXZ@4UTail@@A", i32 noundef 0) + ret void +} + +; Function Attrs: nounwind +declare void @_Init_thread_header(ptr) #1 + +; Function Attrs: nounwind +define linkonce_odr noundef ptr @"??0Tail@@QAA@XZ"(ptr noundef nonnull returned align 1 dereferenceable(1) %this) unnamed_addr #0 align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + call void @"?add@Tail@@QAAXH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this1, i32 noundef 1) + ret ptr %this1 +} + +; Function Attrs: nounwind +define linkonce_odr void @"??1Tail@@QAA@XZ"(ptr noundef nonnull align 1 dereferenceable(1) %this) unnamed_addr #0 comdat align 2 { +entry: + %this.addr = alloca ptr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + call void @"?add@Tail@@QAAXH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this1, i32 noundef -1) + ret void +} + +; Function Attrs: nounwind +declare void @_Init_thread_footer(ptr) #1 + +; Function Attrs: nounwind +define linkonce_odr void @"?add@Tail@@QAAXH@Z"(ptr noundef nonnull align 1 dereferenceable(1) %this, i32 noundef %V) #0 align 2 { +entry: + %V.addr = alloca i32, align 4 + %this.addr = alloca ptr, align 4 + store i32 %V, ptr %V.addr, align 4 + store ptr %this, ptr %this.addr, align 4 + %this1 = load ptr, ptr %this.addr, align 4 + %0 = load i32, ptr %V.addr, align 4 + %1 = load i32, ptr @"?Count@?1??add@Tail@@QAAXH@Z@4HA", align 4 + %add = add nsw i32 %1, %0 + store i32 %add, ptr @"?Count@?1??add@Tail@@QAAXH@Z@4HA", align 4 + ret void +} + +; Function Attrs: norecurse nounwind +define internal void @"?main@@YAXI@Z"(i32 noundef %GI) #2 { +entry: + %GI.addr = alloca i32, align 4 + store i32 %GI, ptr %GI.addr, align 4 + call void @"?Wag@@YAXXZ"() + ret void +} + +; Function Attrs: norecurse +define void @main() #3 { +entry: + %0 = call i32 @llvm.dx.flattened.thread.id.in.group() + call void @"?main@@YAXI@Z"(i32 %0) + ret void +} + +; Function Attrs: nounwind readnone willreturn +declare i32 @llvm.dx.flattened.thread.id.in.group() #4 + +; Function Attrs: nounwind +define internal void @_GLOBAL__sub_I_GlobalDestructors.hlsl() #0 { +entry: + call void @"??__EGlobalPup@@YAXXZ"() + ret void +} + +; Function Attrs: nounwind +define internal void @_GLOBAL__D_a() #0 { +entry: + call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A") + call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A") + ret void +} + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nounwind } +attributes #2 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="library" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind readnone willreturn } + +!llvm.module.flags = !{!0, !1} +!dx.valver = !{!2} + + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 7}