Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12653,13 +12653,13 @@ } } - // Let's try to generate two DUPs and VECTOR_SHUFFLE. For example, + // Let's try to generate VECTOR_SHUFFLE. For example, // // t24: v8i8 = BUILD_VECTOR t25, t25, t25, t25, t26, t26, t26, t26 // ==> - // t28: v8i8 = AArch64ISD::DUP t25 - // t30: v8i8 = AArch64ISD::DUP t26 - // t31: v8i8 = vector_shuffle<0,0,0,0,8,8,8,8> t28, t30 + // t27: v8i8 = BUILD_VECTOR t26, t26, t26, t26, t26, t26, t26, t26 + // t28: v8i8 = BUILD_VECTOR t25, t25, t25, t25, t25, t25, t25, t25 + // t29: v8i8 = vector_shuffle<0,1,2,3,12,13,14,15> t27, t28 if (NumElts >= 8) { SmallVector MaskVec; // Build mask for VECTOR_SHUFLLE. @@ -12667,17 +12667,17 @@ for (unsigned i = 0; i < NumElts; ++i) { SDValue Val = Op.getOperand(i); if (FirstLaneVal == Val) - MaskVec.push_back(0); + MaskVec.push_back(i); else - MaskVec.push_back(NumElts); + MaskVec.push_back(i + NumElts); } SmallVector Ops1(NumElts, Vals[0]); SmallVector Ops2(NumElts, Vals[1]); - SDValue DUP1 = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops1), DAG); - SDValue DUP2 = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops2), DAG); + SDValue VEC1 = DAG.getBuildVector(VT, dl, Ops1); + SDValue VEC2 = DAG.getBuildVector(VT, dl, Ops2); SDValue VECTOR_SHUFFLE = - DAG.getVectorShuffle(VT, dl, DUP1, DUP2, MaskVec); + DAG.getVectorShuffle(VT, dl, VEC1, VEC2, MaskVec); return VECTOR_SHUFFLE; } } Index: llvm/test/CodeGen/AArch64/build-vector-two-dup.ll =================================================================== --- llvm/test/CodeGen/AArch64/build-vector-two-dup.ll +++ llvm/test/CodeGen/AArch64/build-vector-two-dup.ll @@ -98,12 +98,10 @@ define <8 x i8> @test6(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { ; CHECK-LABEL: test6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ld1r { v0.8b }, [x1] -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: ld1r { v1.8b }, [x0] -; CHECK-NEXT: mov v1.d[1], v0.d[0] -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b +; CHECK-NEXT: ld1r { v1.8b }, [x1] +; CHECK-NEXT: ld1r { v0.8b }, [x0] +; CHECK-NEXT: mov v0.s[1], v1.s[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %0 = load i8, ptr %a, align 1 @@ -119,12 +117,10 @@ define <8 x i8> @test7(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ld1r { v0.8b }, [x0] -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: ld1r { v1.8b }, [x1] -; CHECK-NEXT: mov v1.d[1], v0.d[0] -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b +; CHECK-NEXT: ld1r { v1.8b }, [x0] +; CHECK-NEXT: ld1r { v0.8b }, [x1] +; CHECK-NEXT: mov v0.s[1], v1.s[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %0 = load i8, ptr %a, align 1