Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -697,6 +697,8 @@ HelpText<"Emit Clang AST files for source inputs">; def emit_llvm : Flag<["-"], "emit-llvm">, Flags<[CC1Option]>, Group, HelpText<"Use the LLVM representation for assembler and object files">; +def emit_pch : Flag<["-"], "emit-pch">, Flags<[CC1Option]>, Group, + HelpText<"Generate pre-compiled header file">; def emit_interface_stubs : Flag<["-"], "emit-interface-stubs">, Flags<[CC1Option]>, Group, HelpText<"Generate Interface Stub Files.">; def emit_merged_ifs : Flag<["-"], "emit-merged-ifs">, @@ -4113,8 +4115,6 @@ HelpText<"Generate pre-compiled module file from a C++ module interface">; def emit_header_module : Flag<["-"], "emit-header-module">, HelpText<"Generate pre-compiled module file from a set of header files">; -def emit_pch : Flag<["-"], "emit-pch">, - HelpText<"Generate pre-compiled header file">; def emit_llvm_bc : Flag<["-"], "emit-llvm-bc">, HelpText<"Build ASTs then convert to LLVM, emit .bc file">; def emit_llvm_only : Flag<["-"], "emit-llvm-only">, Index: clang/include/clang/Driver/Types.def =================================================================== --- clang/include/clang/Driver/Types.def +++ clang/include/clang/Driver/Types.def @@ -41,9 +41,9 @@ TYPE("cuda-cpp-output", PP_CUDA, INVALID, "cui", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("cuda", CUDA, PP_CUDA, "cu", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("cuda", CUDA_DEVICE, PP_CUDA, "cu", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) -TYPE("hip-cpp-output", PP_HIP, INVALID, "cui", phases::Compile, phases::Backend, phases::Assemble, phases::Link) -TYPE("hip", HIP, PP_HIP, "cu", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) -TYPE("hip", HIP_DEVICE, PP_HIP, "cu", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("hip-cpp-output", PP_HIP, INVALID, "cui", phases::Precompile, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("hip", HIP, PP_HIP, "cu", phases::Preprocess, phases::Precompile, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("hip", HIP_DEVICE, PP_HIP, "cu", phases::Preprocess, phases::Precompile, phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("objective-c-cpp-output", PP_ObjC, INVALID, "mi", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("objc-cpp-output", PP_ObjC_Alias, INVALID, "mi", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("objective-c", ObjC, PP_ObjC, "m", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -292,7 +292,8 @@ FinalPhase = phases::Preprocess; // --precompile only runs up to precompilation. - } else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) { + } else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile)) || + (PhaseArg = DAL.getLastArg(options::OPT_emit_pch))) { FinalPhase = phases::Precompile; // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. @@ -2394,6 +2395,7 @@ bool CompileDeviceOnly = false; bool EmitLLVM = false; bool EmitAsm = false; + bool EmitPCH = false; /// ID to identify each device compilation. For CUDA it is simply the /// GPU arch string. For HIP it is either the GPU arch string or GPU @@ -2584,6 +2586,7 @@ options::OPT_cuda_device_only); EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); + EmitPCH = Args.getLastArg(options::OPT_emit_pch); // Collect all cuda_gpu_arch parameters, removing duplicates. std::set GpuArchs; @@ -2806,6 +2809,24 @@ if (CudaDeviceActions.empty()) return ABRT_Success; + // In EmitPCH mode, construct Phase Actoms up to Precompile, + // and ignore phases coming afterwards. + // Otherwise, ignore the Precompile phase. + if (EmitPCH) { + if (CurPhase == phases::Precompile) { + for (Action *&A : CudaDeviceActions) { + A = C.getDriver().ConstructPhaseAction( + C, Args, phases::Precompile, A, AssociatedOffloadKind); + } + return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; + } + if (CurPhase >= phases::Compile) + return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; + } else { + if (CurPhase == phases::Precompile) + return ABRT_Success; + } + assert(((CurPhase == phases::Link && Relocatable) || CudaDeviceActions.size() == GpuArchList.size()) && "Expecting one action per GPU architecture."); @@ -3597,6 +3618,13 @@ break; } + // Skip HIP Precompile step for Host Compilation. + if (Phase == phases::Precompile && + InputType == types::TY_HIP) { + continue; + } + + // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6318,6 +6318,7 @@ // Disable warnings for clang -E -emit-llvm foo.c Args.ClaimAllArgs(options::OPT_emit_llvm); + Args.ClaimAllArgs(options::OPT_emit_pch); } Clang::Clang(const ToolChain &TC) Index: clang/lib/Driver/Types.cpp =================================================================== --- clang/lib/Driver/Types.cpp +++ clang/lib/Driver/Types.cpp @@ -171,9 +171,7 @@ case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: case TY_CXXModule: case TY_PP_CXXModule: case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE: - case TY_HIP: - case TY_PP_HIP: - case TY_HIP_DEVICE: + case TY_HIP: case TY_PP_HIP: case TY_HIP_DEVICE: return true; } } @@ -333,7 +331,8 @@ // --precompile only runs up to precompilation. // This is a clang extension and is not compatible with GCC. - else if (DAL.getLastArg(options::OPT__precompile)) + else if (DAL.getLastArg(options::OPT__precompile) || + DAL.getLastArg(options::OPT_emit_pch)) LastPhase = phases::Precompile; // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. Index: clang/test/Driver/hip-binding.hip =================================================================== --- clang/test/Driver/hip-binding.hip +++ clang/test/Driver/hip-binding.hip @@ -20,6 +20,14 @@ // RDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]"], output: "[[HOSTOBJ:.*o]]" // RDCS: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[BC1]]", "[[BC2]]", "[[HOSTOBJ]]"], output: "{{.*}}" +// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -emit-pch 2>&1 | FileCheck -check-prefix=PCH %s +// PCH: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[PCH1:.*gch]]" +// PCH: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN]]"], output: "[[PCH2:.*gch]]" +// PCH: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]"], output: "[[CUI:.*cui]]" +// PCH: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[PCH1]]", "[[PCH2]]", "[[CUI]]"], output: "{{.*}}" + // RUN: touch %t.o // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\ Index: clang/test/Driver/hip-device-compile.hip =================================================================== --- clang/test/Driver/hip-device-compile.hip +++ clang/test/Driver/hip-device-compile.hip @@ -26,11 +26,19 @@ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM %s +// RUN: %clang -emit-pch --cuda-device-only -### -target x86_64-linux-gnu \ +// RUN: -o a.cu.pch -x hip --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,PCH %s + // CHECK: {{".*clang.*"}} "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" // BC-SAME: "-emit-llvm-bc" // LL-SAME: "-emit-llvm" // ASM-NOT: "-emit-llvm" +// PCH-SAME: "-emit-pch" // CHECK-SAME: "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" // CHECK-SAME: {{".*lib1.bc"}} @@ -38,6 +46,7 @@ // BC-SAME: "-o" "a.bc" // LL-SAME: "-o" "a.ll" // ASM-SAME: "-o" "a.s" +// PCH-SAME: "-o" "a.cu.pch" // CHECK-SAME: {{".*a.cu"}} // CHECK-NOT: {{"*.llvm-link"}} Index: clang/test/Driver/hip-phases.hip =================================================================== --- clang/test/Driver/hip-phases.hip +++ clang/test/Driver/hip-phases.hip @@ -231,6 +231,16 @@ // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) // DASM-NOT: host +// +// Test single gpu architecture up to the precompile phase in device-only mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -emit-pch 2>&1 \ +// RUN: | FileCheck -check-prefixes=PCH %s +// PCH-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// PCH-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// PCH-DAG: [[P2:[0-9]+]]: precompiler, {[[P1]]}, precompiled-header, (device-[[T]], [[ARCH]]) +// PCH-NOT: host // // Test two gpu architectures with complete compilation in device-only @@ -261,6 +271,20 @@ // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) // DASM2-NOT: host +// +// Test two gpu architecture up to the precompile phase in device-only mode. +// +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu \ +// RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: --cuda-device-only -emit-pch 2>&1 \ +// RUN: | FileCheck -check-prefixes=PCH2 %s +// PCH2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) +// PCH2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// PCH2-DAG: [[P2:[0-9]+]]: precompiler, {[[P1]]}, precompiled-header, (device-[[T]], [[ARCH]]) +// PCH2-DAG: [[P4:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx900]]) +// PCH2-DAG: [[P5:[0-9]+]]: preprocessor, {[[P4]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) +// PCH2-DAG: [[P6:[0-9]+]]: precompiler, {[[P5]]}, precompiled-header, (device-[[T]], [[ARCH]]) +// PCH2-NOT: host // // Test linking two objects with two gpu architectures.