diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -992,6 +992,16 @@ if (Opc == AMDGPU::V_CNDMASK_B32_e32) return false; + if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) { + if (!Src0->isReg() && !Src0->isImm()) + return false; + } + + if (MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1)) { + if (!Src1->isReg() && !Src1->isImm()) + return false; + } + return true; } diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir b/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sdwa-stack.mir @@ -0,0 +1,32 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +# Do not fold stack objects into SDWA. + +--- +# GCN-LABEL: name: sdwa_stack_object_src0 +# GCN: V_ADD_U32_e64 %stack.0, killed %1 +name: sdwa_stack_object_src0 +stack: + - { id: 0, type: default, offset: 0, size: 32, alignment: 4, stack-id: default } +body: | + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = V_AND_B32_e32 255, %0, implicit $exec + %2:vgpr_32 = V_ADD_U32_e64 %stack.0, killed %1, 0, implicit $exec + S_ENDPGM 0, implicit %2 + +... +--- +name: sdwa_stack_object_src1 +# GCN-LABEL: name: sdwa_stack_object_src1 +# GCN: V_ADD_U32_e64 killed %1, %stack.0 +stack: + - { id: 0, type: default, offset: 0, size: 32, alignment: 4, stack-id: default } +body: | + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vgpr_32 = V_AND_B32_e32 255, %0, implicit $exec + %2:vgpr_32 = V_ADD_U32_e64 killed %1, %stack.0, 0, implicit $exec + S_ENDPGM 0, implicit %2 + +...