Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -821,19 +821,17 @@ CCInfo.AllocateReg(Reg); } - if (Info->hasPrivateSegmentWaveByteOffset()) { - // Scratch wave offset passed in system SGPR. - unsigned PrivateSegmentWaveByteOffsetReg; + // Scratch wave offset passed in system SGPR. + unsigned PrivateSegmentWaveByteOffsetReg; - if (AMDGPU::isShader(CallConv)) { - PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); - Info->setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); - } else - PrivateSegmentWaveByteOffsetReg = Info->addPrivateSegmentWaveByteOffset(); + if (AMDGPU::isShader(CallConv)) { + PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); + Info->setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); + } else + PrivateSegmentWaveByteOffsetReg = Info->addPrivateSegmentWaveByteOffset(); - MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); - CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); - } + MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); // Now that we've figured out where the scratch register inputs are, see if // should reserve the arguments and use them directly. Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -96,7 +96,6 @@ bool WorkGroupIDY : 1; bool WorkGroupIDZ : 1; bool WorkGroupInfo : 1; - bool PrivateSegmentWaveByteOffset : 1; bool WorkItemIDX : 1; // Always initialized. bool WorkItemIDY : 1; @@ -225,10 +224,6 @@ return WorkGroupInfo; } - bool hasPrivateSegmentWaveByteOffset() const { - return PrivateSegmentWaveByteOffset; - } - bool hasWorkItemIDX() const { return WorkItemIDX; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -71,7 +71,6 @@ WorkGroupIDY(false), WorkGroupIDZ(false), WorkGroupInfo(false), - PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false) { @@ -105,15 +104,10 @@ if (WorkItemIDZ) WorkItemIDY = true; - bool MaySpill = ST.isVGPRSpillingEnabled(*F); bool HasStackObjects = FrameInfo->hasStackObjects(); - if (HasStackObjects || MaySpill) - PrivateSegmentWaveByteOffset = true; - if (ST.isAmdHsaOS()) { - if (HasStackObjects || MaySpill) - PrivateSegmentBuffer = true; + PrivateSegmentBuffer = true; if (F->hasFnAttribute("amdgpu-dispatch-ptr")) DispatchPtr = true; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -283,11 +283,7 @@ return; } - MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - - BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg) - .addReg(UnusedCarry, RegState::Define | RegState::Dead) + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg) .addImm(Offset) .addFrameIndex(FrameIdx); } @@ -335,13 +331,10 @@ assert(Offset != 0 && "Non-zero offset expected"); - unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - // In the case the instruction already had an immediate offset, here only // the requested new offset is added because we are leaving the original // immediate in place. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), NewReg) - .addReg(UnusedCarry, RegState::Define | RegState::Dead) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e32), NewReg) .addImm(Offset) .addReg(BaseReg); Index: test/CodeGen/AMDGPU/local-stack-slot-bug.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s + +; This used to fail due to a v_add_i32 instruction with an illegal immediate +; operand that was created during Local Stack Slot Allocation. Test case derived +; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 +; +; CHECK-LABEL: {{^}}main: +define amdgpu_ps float @main(i32 %idx) { +main_body: + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %r = fadd float %v1, %v2 + ret float %r +}