diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1709,6 +1709,9 @@ if (!I->isCopy() && !I->isRegSequence()) return false; Register DstReg = I->getOperand(0).getReg(); + // Physical registers may have more than one instruction definitions + if (DstReg.isPhysical()) + return false; if (TRI->isAGPR(*MRI, DstReg)) continue; MoveRegs.push_back(DstReg); diff --git a/llvm/test/CodeGen/AMDGPU/swdev373493.ll b/llvm/test/CodeGen/AMDGPU/swdev373493.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/swdev373493.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -O3 < %s +@a0000000000000000000000000000000004040 = external protected addrspace(4) externally_initialized global [4096 x i64], align 16 + +; Function Attrs: noinline +define hidden fastcc void @func1(ptr %a015194, ptr %a015195, ptr %a14919, ptr %a14642, ptr %a11358, ptr %a10670) unnamed_addr #0 align 2 { +entry: + switch i32 undef, label %sw.epilog [ + i32 3, label %sw.bb8 + i32 1, label %sw.bb2 + ] + +sw.bb2: ; preds = %entry + %a015194.val = load ptr, ptr undef, align 8 + tail call fastcc void @func2(ptr noundef addrspacecast (ptr addrspace(4) getelementptr inbounds ([4096 x i64], ptr addrspace(4) @a0000000000000000000000000000000004040, i64 0, i64 243) to ptr), ptr %a015194.val, ptr undef, ptr noundef nonnull align 8 dereferenceable(24) %a015195, ptr noundef %a14919, ptr noundef %a14642, ptr noundef %a11358) #2 + br label %sw.epilog + +sw.bb8: ; preds = %entry + tail call fastcc void @func3(ptr noundef nonnull align 8 dereferenceable(24) %a015194, ptr noundef nonnull align 8 dereferenceable(24) %a015195, ptr noundef %a14919, ptr noundef %a14642, ptr noundef %a11358, ptr noundef nonnull align 8 dereferenceable(8) %a10670) #2 + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb8, %sw.bb2, %entry + ret void +} + +declare dso_local fastcc void @func2(ptr, ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr #1 align 2 + +declare dso_local fastcc void @func3(ptr, ptr, ptr, ptr, ptr, ptr) unnamed_addr #1 align 2 + +attributes #0 = { noinline } +attributes #1 = { "target-features"="+16-bit-insts,+add-no-carry-insts,+aperture-regs,+atomic-fadd-no-rtn-insts,+atomic-fadd-rtn-insts,+atomic-pk-fadd-no-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+dpp-64bit,+enable-ds128,+enable-prt-strict-null,+fast-denormal-f32,+fast-fmaf,+flat-address-space,+flat-for-global,+flat-global-insts,+flat-inst-offsets,+flat-scratch-insts,+fma-mix-insts,+fp64,+full-rate-64-ops,+gcn3-encoding,+gfx7-gfx8-gfx9-insts,+gfx8-insts,+gfx9,+gfx9-insts,+gfx90a-insts,+image-insts,+int-clamp-insts,+inv-2pi-inline-imm,+ldsbankcount32,+load-store-opt,+localmemorysize65536,+mad-mac-f32-insts,+mai-insts,+negative-scratch-offset-bug,+packed-fp32-ops,+packed-tid,+pk-fmac-f16-inst,+promote-alloca,+r128-a16,+s-memrealtime,+s-memtime-inst,+scalar-atomics,+scalar-flat-scratch-insts,+scalar-stores,+sdwa,+sdwa-omod,+sdwa-scalar,+sdwa-sdst,+sramecc-support,+trap-handler,+unaligned-access-mode,+unaligned-buffer-access,+unaligned-ds-access,+vgpr-index-mode,+vop3p,-wavefrontsize16,-wavefrontsize32,+wavefrontsize64,+xnack-support" } +attributes #2 = { convergent nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"AMD clang version 15.0.0 (ssh://gerritgit/lightning/ec/llvm-project amd-mainline-open 22343 2c8ca8227af55c93a857bb3b4d8b118c917fca7c)"}