Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1493,14 +1493,31 @@ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be // enabled too. - if (CallConv == CallingConv::AMDGPU_PS && - ((Info->getPSInputAddr() & 0x7F) == 0 || - ((Info->getPSInputAddr() & 0xF) == 0 && - Info->isPSInputAllocated(11)))) { - CCInfo.AllocateReg(AMDGPU::VGPR0); - CCInfo.AllocateReg(AMDGPU::VGPR1); - Info->markPSInputAllocated(0); - Info->markPSInputEnabled(0); + if (CallConv == CallingConv::AMDGPU_PS) { + if ((Info->getPSInputAddr() & 0x7F) == 0 || + ((Info->getPSInputAddr() & 0xF) == 0 && + Info->isPSInputAllocated(11))) { + CCInfo.AllocateReg(AMDGPU::VGPR0); + CCInfo.AllocateReg(AMDGPU::VGPR1); + Info->markPSInputAllocated(0); + Info->markPSInputEnabled(0); + } + if (Subtarget->isAmdPalOS()) { + // For isAmdPalOS, the user does not enable some bits after compilation + // based on run-time states; the register values being generated here are + // the final ones set in hardware. Therefore we need to apply the + // workaround to PSInputAddr and PSInputEnable together. (The case where + // a bit is set in PSInputAddr but not PSInputEnable is where the + // frontend set up an input arg for a particular interpolation mode, but + // nothing uses that input arg. Really we should have an earlier pass + // that removes such an arg.) + unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); + if ((PsInputBits & 0x7F) == 0 || + ((PsInputBits & 0xF) == 0 && + (PsInputBits >> 11 & 1))) + Info->markPSInputEnabled( + countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); + } } assert(!Info->hasDispatchPtr() && Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal-psenable.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal-psenable.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal-psenable.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; This pixel shader does not use the result of its interpolation, so it would +; end up with an interpolation mode set in PSAddr but not PSEnable. This test tests +; the workaround that ensures that an interpolation mode is also set in PSEnable. +; GCN-LABEL: {{^}}amdpal_psenable: +; GCN: .amd_amdgpu_pal_metadata{{.*}}0xa1b3,0x2,0xa1b4,0x2, +define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <2 x float> %pos) #6 { + %inst23 = extractelement <2 x float> %pos, i32 0 + %inst24 = extractelement <2 x float> %pos, i32 1 + %inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0) + %inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0) + ret void +} + +declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2 +declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2 + +attributes #6 = { nounwind "InitialPSInputAddr"="2" }