diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5170,6 +5170,10 @@ MachineInstr *MI = nullptr; if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) MI = MRI.getUniqueVRegDef(MO.getReg()); + // Ignore No-op COPYs in FMUL(COPY(DUP(..))) + if (MI && MI->getOpcode() == TargetOpcode::COPY && + MI->getOperand(1).getReg().isVirtual()) + MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg()); if (MI && MI->getOpcode() == Opcode) { Patterns.push_back(Pattern); return true; @@ -5441,6 +5445,9 @@ MachineInstr *Dup = MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg()); + if (Dup->getOpcode() == TargetOpcode::COPY) + Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg()); + Register DupSrcReg = Dup->getOperand(1).getReg(); MRI.clearKillFlags(DupSrcReg); MRI.constrainRegClass(DupSrcReg, RC); diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll --- a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll @@ -18,16 +18,15 @@ ; CHECK-NEXT: add x10, x1, #16 ; CHECK-NEXT: add x11, x0, #16 ; CHECK-NEXT: mov x12, x9 -; CHECK-NEXT: dup v1.8h, v0.h[0] ; CHECK-NEXT: .LBB0_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q2, q3, [x11, #-16] +; CHECK-NEXT: ldp q1, q2, [x11, #-16] ; CHECK-NEXT: subs x12, x12, #16 ; CHECK-NEXT: add x11, x11, #32 -; CHECK-NEXT: ldp q4, q5, [x10, #-16] -; CHECK-NEXT: fmla v4.8h, v2.8h, v1.8h -; CHECK-NEXT: fmla v5.8h, v3.8h, v0.h[0] -; CHECK-NEXT: stp q4, q5, [x10, #-16] +; CHECK-NEXT: ldp q3, q4, [x10, #-16] +; CHECK-NEXT: fmla v3.8h, v1.8h, v0.h[0] +; CHECK-NEXT: fmla v4.8h, v2.8h, v0.h[0] +; CHECK-NEXT: stp q3, q4, [x10, #-16] ; CHECK-NEXT: add x10, x10, #32 ; CHECK-NEXT: b.ne .LBB0_4 ; CHECK-NEXT: // %bb.5: // %middle.block diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir @@ -588,12 +588,12 @@ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY [[COPY2]] - ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane killed [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr64 = COPY [[DUPv2i32lane]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: [[FMULv2f32_:%[0-9]+]]:fpr64 = FMULv2f32 [[COPY5]], [[COPY6]] - ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2f32_]], [[COPY4]] + ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]] ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64), align 16) ; CHECK-NEXT: B %bb.1 bb.0: