Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12213,6 +12213,7 @@ unsigned NumUndefLanes = 0; SDValue Value; SDValue ConstantValue; + SmallMapVector DifferentValueMap; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) @@ -12240,6 +12241,9 @@ usesOnlyOneValue = false; ++NumDifferentLanes; } + + if (!DifferentValueMap.contains(V.getNode())) + DifferentValueMap[V.getNode()] = i; } if (!Value.getNode()) { @@ -12431,6 +12435,35 @@ return Shuffle; } + // If vector has two different values and it can be splitted into two sub + // vectors with same length, generate two DUP and CONCAT_VECTORS with them. + // For example, + // + // t22: v16i8 = BUILD_VECTOR t23, t23, t23, t23, t23, t23, t23, t23, + // t24, t24, t24, t24, t24, t24, t24, t24 + // ==> + // t26: v8i8 = AArch64ISD::DUP t23 + // t28: v8i8 = AArch64ISD::DUP t24 + // t29: v16i8 = concat_vectors t26, t28 + if (DifferentValueMap.size() == 2 && + DifferentValueMap[Op.getOperand(NumElts / 2).getNode()] == + (NumElts / 2)) { + EVT SubVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); + if (isTypeLegal(SubVT) && SubVT.isVector() && + SubVT.getVectorNumElements() > 1) { + unsigned SubNumElts = NumElts / 2; + SmallVector Ops1(SubNumElts, Op.getOperand(0)); + SmallVector Ops2(SubNumElts, Op.getOperand(SubNumElts)); + SDValue SubVector1 = + LowerBUILD_VECTOR(DAG.getBuildVector(SubVT, dl, Ops1), DAG); + SDValue SubVector2 = + LowerBUILD_VECTOR(DAG.getBuildVector(SubVT, dl, Ops2), DAG); + SDValue ConcatVector = + DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SubVector1, SubVector2); + return ConcatVector; + } + } + if (PreferDUPAndInsert) { // First, build a constant vector with the common element. SmallVector Ops(NumElts, Value); Index: llvm/test/CodeGen/AArch64/arm64-tbl.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-tbl.ll +++ llvm/test/CodeGen/AArch64/arm64-tbl.ll @@ -160,7 +160,7 @@ ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s4, w0 -; CHECK-NEXT: mov w8, #32 +; CHECK-NEXT: mov w8, #32 // =0x20 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: mov.b v4[1], w0 @@ -173,19 +173,19 @@ ; CHECK-NEXT: mov.b v4[6], w0 ; CHECK-NEXT: mov.b v4[7], w0 ; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: mov w8, #36 +; CHECK-NEXT: mov w8, #36 // =0x24 ; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: mov w8, #40 +; CHECK-NEXT: mov w8, #40 // =0x28 ; CHECK-NEXT: mov.b v4[10], w8 -; CHECK-NEXT: mov w8, #44 +; CHECK-NEXT: mov w8, #44 // =0x2c ; CHECK-NEXT: mov.b v4[11], w8 -; CHECK-NEXT: mov w8, #48 +; CHECK-NEXT: mov w8, #48 // =0x30 ; CHECK-NEXT: mov.b v4[12], w8 -; CHECK-NEXT: mov w8, #52 +; CHECK-NEXT: mov w8, #52 // =0x34 ; CHECK-NEXT: mov.b v4[13], w8 -; CHECK-NEXT: mov w8, #56 +; CHECK-NEXT: mov w8, #56 // =0x38 ; CHECK-NEXT: mov.b v4[14], w8 -; CHECK-NEXT: mov w8, #60 +; CHECK-NEXT: mov w8, #60 // =0x3c ; CHECK-NEXT: mov.b v4[15], w8 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-NEXT: ret @@ -214,7 +214,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 @@ -226,22 +226,22 @@ ; CHECK-NEXT: mov.b v4[4], w8 ; CHECK-NEXT: mov.b v4[5], w8 ; CHECK-NEXT: mov.b v4[6], w8 -; CHECK-NEXT: mov w8, #32 +; CHECK-NEXT: mov w8, #32 // =0x20 ; CHECK-NEXT: mov.b v4[7], w0 ; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: mov w8, #36 +; CHECK-NEXT: mov w8, #36 // =0x24 ; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: mov w8, #40 +; CHECK-NEXT: mov w8, #40 // =0x28 ; CHECK-NEXT: mov.b v4[10], w8 -; CHECK-NEXT: mov w8, #44 +; CHECK-NEXT: mov w8, #44 // =0x2c ; CHECK-NEXT: mov.b v4[11], w8 -; CHECK-NEXT: mov w8, #48 +; CHECK-NEXT: mov w8, #48 // =0x30 ; CHECK-NEXT: mov.b v4[12], w8 -; CHECK-NEXT: mov w8, #52 +; CHECK-NEXT: mov w8, #52 // =0x34 ; CHECK-NEXT: mov.b v4[13], w8 -; CHECK-NEXT: mov w8, #56 +; CHECK-NEXT: mov w8, #56 // =0x38 ; CHECK-NEXT: mov.b v4[14], w8 -; CHECK-NEXT: mov w8, #31 +; CHECK-NEXT: mov w8, #31 // =0x1f ; CHECK-NEXT: mov.b v4[15], w8 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-NEXT: ret @@ -315,23 +315,18 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: dup.16b v4, w0 -; CHECK-NEXT: adrp x9, .LCPI13_0 +; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff +; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: dup.8b v5, w0 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI13_0] -; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5 -; CHECK-NEXT: mov.b v4[10], w8 -; CHECK-NEXT: mov.b v4[11], w8 -; CHECK-NEXT: mov.b v4[12], w8 -; CHECK-NEXT: mov.b v4[13], w8 +; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: adrp x8, .LCPI13_1 -; CHECK-NEXT: tbl.16b v3, { v0, v1 }, v4 +; CHECK-NEXT: mov.d v5[1], v4[0] +; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v6 +; CHECK-NEXT: tbl.16b v3, { v0, v1 }, v5 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_1] ; CHECK-NEXT: tbl.16b v0, { v2, v3 }, v0 ; CHECK-NEXT: ret