Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -285,10 +285,13 @@ MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) + .addImm(Offset); BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead) - .addImm(Offset) + .addReg(OffsetReg, RegState::Kill) .addFrameIndex(FrameIdx); } @@ -335,13 +338,16 @@ assert(Offset != 0 && "Non-zero offset expected"); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); // In the case the instruction already had an immediate offset, here only // the requested new offset is added because we are leaving the original // immediate in place. + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) + .addImm(Offset); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), NewReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead) - .addImm(Offset) + .addReg(OffsetReg, RegState::Kill) .addReg(BaseReg); FIOp->ChangeToRegister(NewReg, false); Index: llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s + +; This used to fail due to a v_add_i32 instruction with an illegal immediate +; operand that was created during Local Stack Slot Allocation. Test case derived +; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 +; +; CHECK-LABEL: {{^}}main: +; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 +; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200 +; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0 +; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]] +; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]] +; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen +; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen +define amdgpu_ps float @main(i32 %idx) { +main_body: + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} Index: llvm/trunk/test/CodeGen/AMDGPU/selected-stack-object.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/selected-stack-object.ll +++ llvm/trunk/test/CodeGen/AMDGPU/selected-stack-object.ll @@ -0,0 +1,12 @@ +; XFAIL: * +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s + +; See also local-stack-slot-bug.ll +; This fails because a stack object is created during instruction selection. + +; CHECK-LABEL: {{^}}main: +define amdgpu_ps float @main(i32 %idx) { +main_body: + %v1 = extractelement <81 x float> , i32 %idx + ret float %v1 +}