diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -222,17 +222,16 @@ return false; }); - // FIXME: these passes are causing numerical discrepancies, investigate and - // re-enable. - - // PB.registerPipelineStartEPCallback( - // [this, DebugPassManager](ModulePassManager &PM, - // PassBuilder::OptimizationLevel Level) { - // FunctionPassManager FPM(DebugPassManager); - // FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion())); - // FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion())); - // PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - // }); + PB.registerPipelineStartEPCallback( + [this, DebugPassManager](ModulePassManager &PM, + PassBuilder::OptimizationLevel Level) { + FunctionPassManager FPM(DebugPassManager); + FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion())); + // FIXME: NVVMIntrRangePass is causing numerical discrepancies, + // investigate and re-enable. + // FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion())); + PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + }); } TargetTransformInfo diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll @@ -1,10 +1,9 @@ ; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type. ; Verify that __nvvm_reflect() is replaced with an appropriate value. ; -; FIXME: fix pass and re-enable under new PM -; RUN: opt %s -S -nvvm-reflect -O2 -enable-new-pm=0 -mtriple=nvptx64 \ +; RUN: opt %s -S -nvvm-reflect -O2 -mtriple=nvptx64 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM20 -; RUN: opt %s -S -nvvm-reflect -O2 -enable-new-pm=0 -mtriple=nvptx64 -mcpu=sm_35 \ +; RUN: opt %s -S -nvvm-reflect -O2 -mtriple=nvptx64 -mcpu=sm_35 \ ; RUN: | FileCheck %s --check-prefixes=COMMON,SM35 @"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"