Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -190,17 +190,6 @@ unsigned char OpFlags) const; // Optimization methods. - - // Helper function to check if a reg def is an MI with a given opcode and - // returns it if so. - MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc, - MachineIRBuilder &MIB) const { - auto *Def = MIB.getMRI()->getVRegDef(Reg); - if (!Def || Def->getOpcode() != Opc) - return nullptr; - return Def; - } - bool tryOptVectorShuffle(MachineInstr &I) const; bool tryOptVectorDup(MachineInstr &MI) const; bool tryOptSelect(MachineInstr &MI) const; @@ -3342,12 +3331,12 @@ // Begin matching the insert. auto *InsMI = - findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB); + getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI); if (!InsMI) return false; // Match the undef vector operand. auto *UndefMI = - findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB); + getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI); if (!UndefMI) return false; // Match the scalar being splatted. @@ -3359,7 +3348,7 @@ return false; // The shuffle's second operand doesn't matter if the mask is all zero. - auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB); + auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI); if (!ZeroVec) return false; int64_t Zero = 0; Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-shuffle-splat.mir @@ -108,3 +108,25 @@ RET_ReallyLR implicit $q0 ... +--- +name: splat_2xf64_copies +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0 + + ; This test is exactly the same as splat_2xf64, except it adds two copies. + ; These copies shouldn't get in the way of matching the dup pattern. + %0:fpr(s64) = COPY $d0 + %2:fpr(<2 x s64>) = G_IMPLICIT_DEF + %6:fpr(<2 x s64>) = COPY %2 + %3:gpr(s32) = G_CONSTANT i32 0 + %5:fpr(<2 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32) + %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s32) + %7:fpr(<2 x s64>) = COPY %1 + %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, %5(<2 x s32>) + $q0 = COPY %4(<2 x s64>) + RET_ReallyLR implicit $q0