Index: lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -154,6 +154,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, std::vector &Worklist) { char GlobalFlags = 0; + bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs"); for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; @@ -168,6 +169,11 @@ } else if (TII->get(Opcode).mayStore() && (MI.getDesc().TSFlags & SIInstrFlags::VM_CNT)) { Flags = StateExact; + } else if (WQMOutputs && Opcode == AMDGPU::COPY) { + if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0))) + Flags = StateWQM; + else + continue; } else { // Handle export instructions with the exec mask valid flag set if (Opcode == AMDGPU::EXP) { @@ -184,6 +190,13 @@ Worklist.push_back(&MI); GlobalFlags |= Flags; } + + if (WQMOutputs && MBB.succ_empty()) { + // This is a prolog shader. Make sure we go back to exact mode at the end. + Blocks[&MBB].OutNeeds = StateExact; + Worklist.push_back(&MBB); + GlobalFlags |= StateExact; + } } return GlobalFlags; Index: test/CodeGen/AMDGPU/wqm.ll =================================================================== --- test/CodeGen/AMDGPU/wqm.ll +++ test/CodeGen/AMDGPU/wqm.ll @@ -332,6 +332,19 @@ ret <4 x float> %tex } +; Check prolog shaders. +; +; CHECK-LABEL: {{^}}test_prolog_1: +; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec +; CHECK: s_wqm_b64 exec, exec +; CHECK: v_add_f32_e32 v0, +; CHECK: s_and_b64 exec, exec, [[ORIG]] +define amdgpu_ps float @test_prolog_1(float %a, float %b) #4 { +main_body: + %s = fadd float %a, %b + ret float %s +} + declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2 @@ -345,3 +358,4 @@ attributes #1 = { nounwind } attributes #2 = { nounwind readonly } attributes #3 = { nounwind readnone } +attributes #4 = { "amdgpu-ps-wqm-outputs" }