diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -3194,14 +3194,18 @@ // difficult because at RBS we may end up pessimizing the fpr case if we // decided to add an anyextend to fix this. Manual selection is the most // robust solution for now. - Register SrcReg = I.getOperand(1).getReg(); - if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID) + if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != + AArch64::GPRRegBankID) return false; // We expect the fpr regbank case to be imported. - LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy.getSizeInBits() == 16) - I.setDesc(TII.get(AArch64::DUPv8i16gpr)); - else if (SrcTy.getSizeInBits() == 8) + LLT VecTy = MRI.getType(I.getOperand(0).getReg()); + if (VecTy == LLT::vector(8, 8)) + I.setDesc(TII.get(AArch64::DUPv8i8gpr)); + else if (VecTy == LLT::vector(16, 8)) I.setDesc(TII.get(AArch64::DUPv16i8gpr)); + else if (VecTy == LLT::vector(4, 16)) + I.setDesc(TII.get(AArch64::DUPv4i16gpr)); + else if (VecTy == LLT::vector(8, 16)) + I.setDesc(TII.get(AArch64::DUPv8i16gpr)); else return false; return constrainSelectedInstRegOperands(I, TII, TRI, RBI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir @@ -246,6 +246,28 @@ $q0 = COPY %dup(<8 x s16>) RET_ReallyLR implicit $q0 +... +--- +name: DUPv4s16gpr_s16_src +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: DUPv4s16gpr_s16_src + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr64 = DUPv4i16gpr %copy + ; CHECK: $d0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $d0 + %copy:gpr(s32) = COPY $w0 + %trunc:gpr(s16) = G_TRUNC %copy + %dup:fpr(<4 x s16>) = G_DUP %trunc(s16) + $d0 = COPY %dup(<4 x s16>) + RET_ReallyLR implicit $d0 + ... --- name: DUPv8i8gpr @@ -267,6 +289,28 @@ $d0 = COPY %dup(<8 x s8>) RET_ReallyLR implicit $d0 +... +--- +name: DUPv8i8gpr_s8_src +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: DUPv8i8gpr_s8_src + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %dup:fpr64 = DUPv8i8gpr %copy + ; CHECK: $d0 = COPY %dup + ; CHECK: RET_ReallyLR implicit $d0 + %copy:gpr(s32) = COPY $w0 + %trunc:gpr(s8) = G_TRUNC %copy(s32) + %dup:fpr(<8 x s8>) = G_DUP %trunc(s8) + $d0 = COPY %dup(<8 x s8>) + RET_ReallyLR implicit $d0 + ... --- name: DUPv16i8gpr