diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10353,12 +10353,28 @@ case Intrinsic::ppc_vsx_disassemble_pair: { int NumVecs = 2; SDValue WideVec = Op.getOperand(1); + + bool FromAssemble = false; + if (auto *VecIntrinsic = dyn_cast(WideVec.getOperand(0))) + if (VecIntrinsic->getZExtValue() == Intrinsic::ppc_mma_assemble_acc || + VecIntrinsic->getZExtValue() == Intrinsic::ppc_vsx_assemble_pair) + FromAssemble = true; + if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) { NumVecs = 4; WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec); } SmallVector RetOps; for (int VecNo = 0; VecNo < NumVecs; VecNo++) { + // Propagate undef values if the source is just an assemble, otherwise + // we'll get incorrect code for subreg tracking. + if (FromAssemble) { + SDValue Assembler = (NumVecs == 4) ? WideVec.getOperand(0) : WideVec; + if (Assembler.getOperand(VecNo + 1).isUndef()) { + RetOps.push_back(DAG.getUNDEF(MVT::v16i8)); + continue; + } + } SDValue Extract = DAG.getNode( PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec, DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo diff --git a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc-bugfix.ll b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc-bugfix.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc-bugfix.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc-bugfix.ll @@ -1,20 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \ -; RUN: | FileCheck %s +; RUN: -ppc-track-subreg-liveness | FileCheck %s define void @copy_novsrp() local_unnamed_addr { ; CHECK-LABEL: copy_novsrp: ; CHECK: # %bb.0: # %dmblvi_entry -; CHECK-NEXT: xxlxor v2, v2, v2 +; CHECK-NEXT: xxlxor vs3, vs3, vs3 ; CHECK-NEXT: xxlxor vs0, vs0, vs0 -; CHECK-NEXT: xxlor vs3, v2, v2 -; CHECK-NEXT: stxv vs1, 0(0) +; CHECK-NEXT: stxv vs3, 0(0) dmblvi_entry: %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> undef, <16 x i8> undef, <16 x i8> zeroinitializer) %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 + %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 store <16 x i8> %2, <16 x i8>* null, align 1 + store <16 x i8> %3, <16 x i8>* null, align 1 unreachable }