Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -479,17 +479,16 @@ if (LoadStoreOp == -1) return false; - unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg(); - + const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) - .addReg(Reg, getDefRegState(!IsStore)) - .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) - .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) - .addImm(Offset) - .addImm(0) // glc - .addImm(0) // slc - .addImm(0) // tfe - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + .add(*Reg) + .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) + .addImm(Offset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); return true; } Index: test/CodeGen/AMDGPU/frame-index-elimination.ll =================================================================== --- test/CodeGen/AMDGPU/frame-index-elimination.ll +++ test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-function-calls -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Test that non-entry function frame indices are expanded properly to ; give an index relative to the scratch wave offset register @@ -165,4 +165,28 @@ ret void } +declare void @func(<4 x float>* nocapture) #0 + +; undef flag not preserved in eliminateFrameIndex when handling the +; stores in the middle block. + +; GCN-LABEL: {{^}}undefined_stack_store_reg: +define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 { +bb: + %tmp = alloca <4 x float>, align 16 + %tmp2 = insertelement <4 x float> undef, float %arg, i32 0 + store <4 x float> %tmp2, <4 x float>* undef + %tmp3 = icmp eq i32 %arg1, 0 + br i1 %tmp3, label %bb4, label %bb5 + +bb4: + call void @func(<4 x float>* nonnull undef) + store <4 x float> %tmp2, <4 x float>* %tmp, align 16 + call void @func(<4 x float>* nonnull %tmp) + br label %bb5 + +bb5: + ret void +} + attributes #0 = { nounwind }