diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -31,7 +31,7 @@ std::unique_ptr TLOF; StringRef getGPUName(const Function &F) const; - StringRef getFeatureString(const Function &F) const; + std::string getFeatureString(const Function &F) const; public: static bool EnableLateStructurizeCFG; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -571,11 +571,27 @@ return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU(); } -StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const { +std::string AMDGPUTargetMachine::getFeatureString(const Function &F) const { Attribute FSAttr = F.getFnAttribute("target-features"); + StringRef TargetFS = getTargetFeatureString(); + + if (FSAttr.isValid()) { + StringRef FunctionFS = FSAttr.getValueAsString(); + + // Functions from extended-image-intrinsics.ll from device_libs have the + // attribute "target-features"="+extended-image-insts" When compiling in + // wave64 on a gpu that defaults to wave32, dropping the TargetFS string + // makes those functions be compiled in wave32. + bool EnableExtendedImageInstsForFunction = + FunctionFS == "+extended-image-insts" && + !TargetFS.contains("-extended-image-insts"); + if (EnableExtendedImageInstsForFunction) { + return (FunctionFS + "," + TargetFS).str(); + } + return FunctionFS.str(); + } - return FSAttr.isValid() ? FSAttr.getValueAsString() - : getTargetFeatureString(); + return TargetFS.str(); } /// Predicate for Internalize pass. @@ -829,7 +845,7 @@ const TargetSubtargetInfo * GCNTargetMachine::getSubtargetImpl(const Function &F) const { StringRef GPU = getGPUName(F); - StringRef FS = getFeatureString(F); + auto FS = getFeatureString(F); SmallString<128> SubtargetKey(GPU); SubtargetKey.append(FS); diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp @@ -66,7 +66,7 @@ const TargetSubtargetInfo * R600TargetMachine::getSubtargetImpl(const Function &F) const { StringRef GPU = getGPUName(F); - StringRef FS = getFeatureString(F); + auto FS = getFeatureString(F); SmallString<128> SubtargetKey(GPU); SubtargetKey.append(FS); diff --git a/llvm/test/CodeGen/AMDGPU/extended-image-insts-wave32-wave64.ll b/llvm/test/CodeGen/AMDGPU/extended-image-insts-wave32-wave64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/extended-image-insts-wave32-wave64.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize64" -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s + +; GCN-LABEL: has_extended_image_insts +; WAVE32: s_and_b32 vcc_lo, exec_lo, {{.*}} +; WAVE64: s_and_b64 vcc, exec, {{.*}} +; WAVE32: amdhsa_wavefront_size32 1 +; WAVE64: amdhsa_wavefront_size32 0 + +define amdgpu_kernel void @has_extended_image_insts(float %arg10) #0 { +.entry: + %tmp100 = fcmp ogt float %arg10, 0.25 + br i1 %tmp100, label %if, label %endif +if: + %tmp101 = fadd float %arg10, 0.125 + br label %endif +endif: + %tmp102 = phi float [ %arg10, %.entry ], [ %tmp101, %if ] + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp102, float %tmp102, float %tmp102, float %tmp102, i1 true, i1 true) + ret void +} + +declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) + +attributes #0 = { nounwind "target-features"="+extended-image-insts" }