Index: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -325,9 +325,7 @@ unsigned Opcode = MI.getOpcode(); char Flags = 0; - if (TII->isDS(Opcode) && CallingConv == CallingConv::AMDGPU_PS) { - Flags = StateWQM; - } else if (TII->isWQM(Opcode)) { + if (TII->isWQM(Opcode)) { // Sampling instructions don't need to produce results for all pixels // in a quad, they just require all inputs of a quad to have been // computed for derivatives. Index: llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -355,7 +355,7 @@ ; GCN: v_mov_b32_e32 v0, 2.0 ; GCN: s_or_b64 exec, exec -; GCN: s_and_b64 exec, exec +; GCN-NOT: s_and_b64 exec, exec ; GCN: v_mov_b32_e32 v0, 1.0 ; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock Index: llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll +++ llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll @@ -95,7 +95,8 @@ if: ; preds = %main_body %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0 - %lds_data = load float, float addrspace(3)* %lds_ptr + %lds_data_ = load float, float addrspace(3)* %lds_ptr + %lds_data = call float @llvm.amdgcn.wqm.f32(float %lds_data_) br label %endif else: ; preds = %main_body @@ -208,6 +209,7 @@ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 +declare float @llvm.amdgcn.wqm.f32(float) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone }