Index: llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp @@ -107,6 +107,21 @@ } return Result; } + +void reportFunctionRemoved(Function &F, unsigned Feature) { + OptimizationRemarkEmitter ORE(&F); + ORE.emit([&]() { + // Note: we print the function name as part of the diagnostic because if + // debug info is not present, users get ":0:0" as the debug + // loc. If we didn't print the function name there would be no way to + // tell which function got removed. + return OptimizationRemark(DEBUG_TYPE, "AMDGPUIncompatibleFnRemoved", &F) + << "removing function '" << F.getName() << "': +" + << getFeatureName(Feature) + << " is not supported on the current target"; + }); + return; +} } // end anonymous namespace bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) { @@ -143,21 +158,20 @@ // GPU's feature set. We only check a predetermined set of features. for (unsigned Feature : FeaturesToCheck) { if (ST->hasFeature(Feature) && !GPUFeatureBits.test(Feature)) { - OptimizationRemarkEmitter ORE(&F); - ORE.emit([&]() { - // Note: we print the function name as part of the diagnostic because if - // debug info is not present, users get ":0:0" as the debug - // loc. If we didn't print the function name there would be no way to - // tell which function got removed. - return OptimizationRemark(DEBUG_TYPE, "AMDGPUIncompatibleFnRemoved", &F) - << "removing function '" << F.getName() << "': +" - << getFeatureName(Feature) - << " is not supported on the current target"; - }); + reportFunctionRemoved(F, Feature); return true; } } + // Delete FeatureWavefrontSize32 functions for + // gfx9 and below targets that don't support the mode. + // gfx10+ is implied to support both wave32 and 64 features. + // They are not in the feature set. So, we need a separate check + if (ST->getGeneration() < AMDGPUSubtarget::GFX10 && + ST->hasFeature(AMDGPU::FeatureWavefrontSize32)) { + reportFunctionRemoved(F, AMDGPU::FeatureWavefrontSize32); + return true; + } return false; } Index: llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+wavefrontsize64 -stop-after=amdgpu-remove-incompatible-functions\ +; RUN: -pass-remarks=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX906 %s +; RUN: FileCheck --check-prefix=WARN-GFX906 %s < %t +; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+wavefrontsize64 -verify-machineinstrs < %s + +; RUN: llc -march=amdgcn -mcpu=gfx90a -mattr=+wavefrontsize64 -stop-after=amdgpu-remove-incompatible-functions\ +; RUN: -pass-remarks=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX90A %s +; RUN: FileCheck --check-prefix=WARN-GFX90A %s < %t +; RUN: llc -march=amdgcn -mcpu=gfx90a -mattr=+wavefrontsize64 -verify-machineinstrs < %s + +; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=+wavefrontsize64 -stop-after=amdgpu-remove-incompatible-functions\ +; RUN: -pass-remarks=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=+wavefrontsize64 -verify-machineinstrs < %s + +; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -stop-after=amdgpu-remove-incompatible-functions\ +; RUN: -pass-remarks=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s + +; WARN-GFX906: removing function 'needs_wavefrontsize32': +wavefrontsize32 is not supported on the current target +; WARN-GFX906-NOT: not supported + +; WARN-GFX90A: removing function 'needs_wavefrontsize32': +wavefrontsize32 is not supported on the current target +; WARN-GFX90A-NOT: not supported + +define void @needs_wavefrontsize32(ptr %out) #0 { +; GFX906-NOT: @needs_wavefrontsize32 +; GFX90A-NOT: @needs_wavefrontsize32 +; GFX10: define void @needs_wavefrontsize32( +; GFX11: define void @needs_wavefrontsize32( + %1 = tail call i32 @llvm.read_register.i32(metadata !0) + %2 = tail call i32 @llvm.ctpop.i32(i32 %1) + store i32 %2, ptr %out, align 4 + ret void +} + +define void @caller(ptr %out) { + ; GFX906: call void null( + ; GFX90A: call void null( + ; GFX10: call void @needs_wavefrontsize32( + ; GFX11: call void @needs_wavefrontsize32( + call void @needs_wavefrontsize32(ptr %out) + ret void +} + +declare i32 @llvm.read_register.i32(metadata) +declare i32 @llvm.ctpop.i32(i32) + +!0 = !{!"exec_lo"} + +attributes #0 = { "target-features"="+wavefrontsize32" }