diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -15,6 +15,7 @@ #include "DirectXIRPasses/PointerTypeAnalysis.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" @@ -79,6 +80,55 @@ Attr); } +static void collectDeadStringAttrs(AttributeMask &DeadAttrs, AttributeSet &&AS, + StringSet<> LiveKeys) { + for (auto &Attr : AS) { + if (!Attr.isStringAttribute()) + continue; + StringRef Key = Attr.getKindAsString(); + if (LiveKeys.contains(Key)) + continue; + DeadAttrs.addAttribute(Key); + } +} + +static void removeStringFunctionAttributes(Function &F) { + AttributeList Attrs = F.getAttributes(); + StringSet<> LiveKeys = {"waveops-include-helper-lanes" + "fp32-denorm-mode"}; + // Collect DeadKeys in FnAttrs. + AttributeMask DeadAttrs; + collectDeadStringAttrs(DeadAttrs, Attrs.getFnAttrs(), LiveKeys); + collectDeadStringAttrs(DeadAttrs, Attrs.getRetAttrs(), LiveKeys); + + F.removeFnAttrs(DeadAttrs); + F.removeRetAttrs(DeadAttrs); +} + +static void cleanModuleFlags(Module &M) { + constexpr StringLiteral DeadKeys[] = {"frame-pointer"}; + // Collect DeadKeys in ModuleFlags. + StringSet<> DeadKeySet; + for (auto &Key : DeadKeys) { + if (M.getModuleFlag(Key)) + DeadKeySet.insert(Key); + } + if (DeadKeySet.empty()) + return; + + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + NamedMDNode *MDFlags = M.getModuleFlagsMetadata(); + MDFlags->eraseFromParent(); + // Add ModuleFlag which not dead. + for (auto &Flag : ModuleFlags) { + StringRef Key = Flag.Key->getString(); + if (DeadKeySet.contains(Key)) + continue; + M.addModuleFlag(Flag.Behavior, Key, Flag.Val); + } +} + class DXILPrepareModule : public ModulePass { static Value *maybeGenerateBitcast(IRBuilder<> &Builder, @@ -112,6 +162,7 @@ for (auto &F : M.functions()) { F.removeFnAttrs(AttrMask); F.removeRetAttrs(AttrMask); + removeStringFunctionAttributes(F); for (size_t Idx = 0, End = F.arg_size(); Idx < End; ++Idx) F.removeParamAttrs(Idx, AttrMask); @@ -170,6 +221,8 @@ } } } + // Remove flags not in llvm3.7. + cleanModuleFlags(M); return true; } diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -76,8 +76,8 @@ FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } void addCodeGenPrepare() override { addPass(createDXILOpLoweringLegacyPass()); - addPass(createDXILPrepareModulePass()); addPass(createDXILTranslateMetadataPass()); + addPass(createDXILPrepareModulePass()); } }; diff --git a/llvm/test/CodeGen/DirectX/strip-string-fn-attrs-module-flags.ll b/llvm/test/CodeGen/DirectX/strip-string-fn-attrs-module-flags.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/strip-string-fn-attrs-module-flags.ll @@ -0,0 +1,47 @@ +; RUN: opt -S -dxil-prepare < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.0-compute" + +; make sure string function attribute and module flags are clear for llvm3.7. + +; CHECK-LABEL:define void @CSMain() +; CHECK:attributes #0 = { nounwind memory(readwrite, inaccessiblemem: read) } +; CHECK:!llvm.module.flags = !{!0} +; CHECK:!0 = !{i32 1, !"wchar_size", i32 4} + +%"class.hlsl::RWBuffer" = type { ptr } +%dx.types.Handle = type { ptr } +%dx.types.ResRet.i32 = type { i32, i32, i32, i32, i32 } + +@"?srcBuffer@@3V?$RWBuffer@T?$__vector@H$01@__clang@@@hlsl@@A" = local_unnamed_addr global %"class.hlsl::RWBuffer" zeroinitializer, align 4 +@"?dstBuffer@@3V?$RWBuffer@T?$__vector@H$01@__clang@@@hlsl@@A" = local_unnamed_addr global %"class.hlsl::RWBuffer" zeroinitializer, align 4 + +; Function Attrs: mustprogress nofree nounwind willreturn memory(readwrite, inaccessiblemem: read) +define void @CSMain() local_unnamed_addr #0 { +entry: + %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 0, i1 false) + %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 1, i1 false) + %2 = call i32 @dx.op.threadId.i32(i32 93, i32 0) + %3 = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %1, i32 %2, i32 poison) + %4 = extractvalue %dx.types.ResRet.i32 %3, 0 + %5 = extractvalue %dx.types.ResRet.i32 %3, 1 + %add.i.i0 = add i32 %4, 10 + %add.i.i1 = add i32 %5, 10 + call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %0, i32 %2, i32 poison, i32 %add.i.i0, i32 %add.i.i1, i32 %add.i.i0, i32 %add.i.i0, i8 15) + ret void +} + +declare %dx.types.Handle @dx.op.createHandle(i32 %0, i8 %1, i32 %2, i32 %3, i1 %4) + +declare %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 %0, %dx.types.Handle %1, i32 %2, i32 %3) + +declare void @dx.op.bufferStore.i32(i32 %0, %dx.types.Handle %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i8 %8) + +declare i32 @dx.op.threadId.i32(i32 %0, i32 %1) + +attributes #0 = { mustprogress nofree nounwind willreturn memory(readwrite, inaccessiblemem: read) "frame-pointer"="all" "hlsl.numthreads"="1024,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2}