Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6706,16 +6706,20 @@ // select the values we'll be overwriting for the non-constant // lanes such that we can directly materialize the vector // some other way (MOVI, e.g.), we can be sneaky. + // 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; bool usesOnlyOneConstantValue = true; bool isConstant = true; + bool AllLanesExtractElt = true; unsigned NumConstantLanes = 0; SDValue Value; SDValue ConstantValue; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); + if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + AllLanesExtractElt = false; if (V.isUndef()) continue; if (i > 0) @@ -6748,6 +6752,61 @@ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); } + if (AllLanesExtractElt) { + SDNode *Vector = nullptr; + bool Even = false; + bool Odd = false; + // Check whether the extract elements match the Even pattern <0,2,4,...> or + // the Odd pattern <1,3,5,...>. + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + const SDNode *N = V.getNode(); + if (!isa(N->getOperand(1))) + break; + + // All elements are extracted from the same vector. + if (!Vector) + Vector = N->getOperand(0).getNode(); + else if (Vector != N->getOperand(0).getNode()) { + Odd = false; + Even = false; + break; + } + + // Extracted values are either at Even indices <0,2,4,...> or at Odd + // indices <1,3,5,...>. + uint64_t Val = N->getConstantOperandVal(1); + if (Val == 2 * i) { + Even = true; + continue; + } + if (Val - 1 == 2 * i) { + Odd = true; + continue; + } + + // Something does not match: abort. + Odd = false; + Even = false; + break; + } + if (Even || Odd) { + SDValue LHS = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0), + DAG.getConstant(0, dl, MVT::i64)); + SDValue RHS = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0), + DAG.getConstant(NumElts, dl, MVT::i64)); + + if (Even && !Odd) + return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS, + RHS); + if (Odd && !Even) + return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS, + RHS); + } + } + // Use DUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. if (usesOnlyOneValue) { Index: llvm/test/CodeGen/AArch64/aarch64-vuzp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-vuzp.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +; CHECK-LABEL: fun1: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NOT: mov +define i32 @fun1() { +entry: + %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> , <16 x i8> undef) + %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> + %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1 + store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1 + ret i32 undef +} + +; CHECK-LABEL: fun2: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NOT: mov +define i32 @fun2() { +entry: + %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> , <16 x i8> undef) + %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> + %scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1 + store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1 + ret i32 undef +} + +declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)