diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -16209,6 +16209,33 @@ vector index constant type (for most targets this will be an integer pointer type). +'``llvm.experimental.vector.reverse``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a) + declare @llvm.experimental.vector.reverse.nxv4i32( %a) + +Overview: +""""""""" + +The '``llvm.experimental.vector.reverse.*``' intrinsics reverse a vector. +The intrinsic takes a single vector and returns a vector of matching type but +with the original lane order reversed. These intrinsics work for both fixed +and scalable vectors. While this intrinsic is marked as experimental the +recommended way to express reverse operations for fixed-width vectors is still +to use a shufflevector, as that may allow for more optimization opportunities. + +Arguments: +"""""""""" + +The argument to this intrinsic must be a vector. + Matrix Intrinsics ----------------- diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -540,6 +540,11 @@ /// vector, but not the other way around. EXTRACT_SUBVECTOR, + /// VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, + /// whose elements are shuffled using the following algorithm: + /// RESULT[i] = VECTOR[VECTOR.ElementCount - 1 - i] + VECTOR_REVERSE, + /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as /// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int /// values that indicate which value (or undef) each result element will diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1622,6 +1622,12 @@ ImmArg>, ImmArg>]>; +//===------------ Intrinsics to perform common vector shuffles ------------===// + +def int_experimental_vector_reverse : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>], + [IntrNoMem]>; + //===---------- Intrinsics to query properties of scalable vectors --------===// def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -254,6 +254,9 @@ SDTCisFP<0>, SDTCisVec<1> ]>; +def SDTVecReverse : SDTypeProfile<1, 1, [ // vector reverse + SDTCisVec<0>, SDTCisSameAs<0,1> +]>; def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract SDTCisSubVecOfVec<0,1>, SDTCisInt<2> @@ -651,6 +654,7 @@ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>; +def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>; def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>; def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>; def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>, diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5313,6 +5313,12 @@ return Op0; break; } + case Intrinsic::experimental_vector_reverse: + // experimental.vector.reverse(experimental.vector.reverse(x)) -> x + if (match(Op0, + m_Intrinsic(m_Value(X)))) + return X; + break; default: break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -96,6 +96,8 @@ case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; + case ISD::VECTOR_REVERSE: + Res = PromoteIntRes_VECTOR_REVERSE(N); break; case ISD::VECTOR_SHUFFLE: Res = PromoteIntRes_VECTOR_SHUFFLE(N); break; case ISD::INSERT_VECTOR_ELT: @@ -4648,6 +4650,14 @@ return DAG.getBuildVector(NOutVT, dl, Ops); } +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) { + SDLoc dl(N); + + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + EVT OutVT = V0.getValueType(); + + return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0); +} SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *SV = cast(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -298,6 +298,7 @@ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N); SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); @@ -833,6 +834,7 @@ void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -930,6 +930,9 @@ case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; + case ISD::VECTOR_REVERSE: + SplitVecRes_VECTOR_REVERSE(N, Lo, Hi); + break; case ISD::VECTOR_SHUFFLE: SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); break; @@ -5492,3 +5495,13 @@ Ops[Idx] = FillVal; return DAG.getBuildVector(NVT, dl, Ops); } + +void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue InLo, InHi; + GetSplitVector(N->getOperand(0), InLo, InHi); + SDLoc DL(N); + + Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi); + Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -773,6 +773,7 @@ void visitGCResult(const GCResultInst &I); void visitVectorReduce(const CallInst &I, unsigned Intrinsic); + void visitVectorReverse(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6998,6 +6998,9 @@ setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); return; } + case Intrinsic::experimental_vector_reverse: + visitVectorReverse(I); + return; } } @@ -10805,6 +10808,29 @@ } } +void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + SDLoc DL = getCurSDLoc(); + SDValue V = getValue(I.getOperand(0)); + assert(VT == V.getValueType() && "Malformed vector.reverse!"); + + if (VT.isScalableVector()) { + setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V)); + return; + } + + // Use VECTOR_SHUFFLE for the fixed-length vector + // to maintain existing behavior. + SmallVector Mask; + unsigned NumElts = VT.getVectorMinNumElements(); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(NumElts - 1 - i); + + setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask)); +} + void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { SmallVector ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -289,6 +289,8 @@ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; case ISD::SPLAT_VECTOR: return "splat_vector"; + case ISD::VECTOR_REVERSE: + return "vector_reverse"; case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -3894,7 +3894,7 @@ return false; // Vectors (of > 1 lane) in big endian need tricky handling. - if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && + if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && !Subtarget->isLittleEndian()) return false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -292,7 +292,6 @@ CLASTB_N, LASTA, LASTB, - REV, TBL, // Floating-point reductions. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1856,7 +1856,6 @@ MAKE_CASE(AArch64ISD::CLASTB_N) MAKE_CASE(AArch64ISD::LASTA) MAKE_CASE(AArch64ISD::LASTB) - MAKE_CASE(AArch64ISD::REV) MAKE_CASE(AArch64ISD::REINTERPRET_CAST) MAKE_CASE(AArch64ISD::TBL) MAKE_CASE(AArch64ISD::FADD_PRED) @@ -3596,7 +3595,7 @@ return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::aarch64_sve_rev: - return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(), + return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(), Op.getOperand(1)); case Intrinsic::aarch64_sve_tbl: return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -249,9 +249,6 @@ def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; -def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>; - def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>; @@ -587,8 +584,8 @@ defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>; defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>; - defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>; - defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>; + defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>; + defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>; defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>; defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>; diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll @@ -0,0 +1,230 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG %s +; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + + +target triple = "aarch64-unknown-linux-gnu" + +; +; VECTOR_REVERSE +; + +define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 { +; CHECK-LABEL: .LCPI0_0: +; CHECK: .byte 15 // 0xf +; CHECK-NEXT: .byte 14 // 0xe +; CHECK-NEXT: .byte 13 // 0xd +; CHECK-NEXT: .byte 12 // 0xc +; CHECK-NEXT: .byte 11 // 0xb +; CHECK-NEXT: .byte 10 // 0xa +; CHECK-NEXT: .byte 9 // 0x9 +; CHECK-NEXT: .byte 8 // 0x8 +; CHECK-NEXT: .byte 7 // 0x7 +; CHECK-NEXT: .byte 6 // 0x6 +; CHECK-NEXT: .byte 5 // 0x5 +; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 3 // 0x3 +; CHECK-NEXT: .byte 2 // 0x2 +; CHECK-NEXT: .byte 1 // 0x1 +; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-LABEL: reverse_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: ret + + %res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a) + ret <16 x i8> %res +} + +define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 { +; CHECK-LABEL: .LCPI1_0: +; CHECK: .byte 14 // 0xe +; CHECK-NEXT: .byte 15 // 0xf +; CHECK-NEXT: .byte 12 // 0xc +; CHECK-NEXT: .byte 13 // 0xd +; CHECK-NEXT: .byte 10 // 0xa +; CHECK-NEXT: .byte 11 // 0xb +; CHECK-NEXT: .byte 8 // 0x8 +; CHECK-NEXT: .byte 9 // 0x9 +; CHECK-NEXT: .byte 6 // 0x6 +; CHECK-NEXT: .byte 7 // 0x7 +; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 5 // 0x5 +; CHECK-NEXT: .byte 2 // 0x2 +; CHECK-NEXT: .byte 3 // 0x3 +; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 1 // 0x1 +; CHECK-LABEL: reverse_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: ret + + %res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a) + ret <8 x i16> %res +} + +define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 { +; CHECK-LABEL: reverse_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret + + %res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a) + ret <4 x i32> %res +} + +define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 { +; CHECK-LABEL: reverse_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret + + %res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a) + ret <2 x i64> %res +} + +define <8 x half> @reverse_v8f16(<8 x half> %a) #0 { +; CHECK-LABEL: .LCPI4_0: +; CHECK: .byte 14 // 0xe +; CHECK-NEXT: .byte 15 // 0xf +; CHECK-NEXT: .byte 12 // 0xc +; CHECK-NEXT: .byte 13 // 0xd +; CHECK-NEXT: .byte 10 // 0xa +; CHECK-NEXT: .byte 11 // 0xb +; CHECK-NEXT: .byte 8 // 0x8 +; CHECK-NEXT: .byte 9 // 0x9 +; CHECK-NEXT: .byte 6 // 0x6 +; CHECK-NEXT: .byte 7 // 0x7 +; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 5 // 0x5 +; CHECK-NEXT: .byte 2 // 0x2 +; CHECK-NEXT: .byte 3 // 0x3 +; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 1 // 0x1 +; CHECK-LABEL: reverse_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: ret + + %res = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> %a) + ret <8 x half> %res +} + +define <4 x float> @reverse_v4f32(<4 x float> %a) #0 { +; CHECK-LABEL: reverse_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret + + %res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a) + ret <4 x float> %res +} + +define <2 x double> @reverse_v2f64(<2 x double> %a) #0 { +; CHECK-LABEL: reverse_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret + + %res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a) + ret <2 x double> %res +} + +; Verify promote type legalisation works as expected. +define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 { +; CHECK-LABEL: reverse_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: rev64 v0.2s, v0.2s +; CHECK-NEXT: ret + + %res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a) + ret <2 x i8> %res +} + +; Verify splitvec type legalisation works as expected. +define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 { +; CHECK-LABEL: reverse_v8i32: +; CHECK-SELDAG: // %bb.0: +; CHECK-SELDAG-NEXT: rev64 v1.4s, v1.4s +; CHECK-SELDAG-NEXT: rev64 v2.4s, v0.4s +; CHECK-SELDAG-NEXT: ext v0.16b, v1.16b, v1.16b, #8 +; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8 +; CHECK-SELDAG-NEXT: ret +; CHECK-FASTISEL: // %bb.0: +; CHECK-FASTISEL-NEXT: sub sp, sp, #16 +; CHECK-FASTISEL-NEXT: str q1, [sp] +; CHECK-FASTISEL-NEXT: mov v1.16b, v0.16b +; CHECK-FASTISEL-NEXT: ldr q0, [sp] +; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s +; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s +; CHECK-FASTISEL-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-FASTISEL-NEXT: add sp, sp, #16 +; CHECK-FASTISEL-NEXT: ret + + %res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a) + ret <8 x i32> %res +} + +; Verify splitvec type legalisation works as expected. +define <16 x float> @reverse_v16f32(<16 x float> %a) #0 { +; CHECK-LABEL: reverse_v16f32: +; CHECK-SELDAG: // %bb.0: +; CHECK-SELDAG-NEXT: rev64 v3.4s, v3.4s +; CHECK-SELDAG-NEXT: rev64 v2.4s, v2.4s +; CHECK-SELDAG-NEXT: rev64 v4.4s, v1.4s +; CHECK-SELDAG-NEXT: rev64 v5.4s, v0.4s +; CHECK-SELDAG-NEXT: ext v0.16b, v3.16b, v3.16b, #8 +; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8 +; CHECK-SELDAG-NEXT: ext v2.16b, v4.16b, v4.16b, #8 +; CHECK-SELDAG-NEXT: ext v3.16b, v5.16b, v5.16b, #8 +; CHECK-SELDAG-NEXT: ret +; CHECK-FASTISEL: // %bb.0: +; CHECK-FASTISEL-NEXT: sub sp, sp, #32 +; CHECK-FASTISEL-NEXT: str q3, [sp, #16] +; CHECK-FASTISEL-NEXT: str q2, [sp] +; CHECK-FASTISEL-NEXT: mov v2.16b, v1.16b +; CHECK-FASTISEL-NEXT: ldr q1, [sp] +; CHECK-FASTISEL-NEXT: mov v3.16b, v0.16b +; CHECK-FASTISEL-NEXT: ldr q0, [sp, #16] +; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s +; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s +; CHECK-FASTISEL-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-FASTISEL-NEXT: rev64 v2.4s, v2.4s +; CHECK-FASTISEL-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-FASTISEL-NEXT: rev64 v3.4s, v3.4s +; CHECK-FASTISEL-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-FASTISEL-NEXT: add sp, sp, #32 +; CHECK-FASTISEL-NEXT: ret + + %res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a) + ret <16 x float> %res +} + + +declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>) +declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>) +declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>) +declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>) +declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>) +declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>) +declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>) +declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>) +declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>) +declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>) + +attributes #0 = { nounwind "target-features"="+neon" } diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG %s +; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +target triple = "aarch64-unknown-linux-gnu" + +; +; VECTOR_REVERSE - PPR +; + +define @reverse_nxv2i1( %a) #0 { +; CHECK-LABEL: reverse_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: rev p0.d, p0.d +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv2i1( %a) + ret %res +} + +define @reverse_nxv4i1( %a) #0 { +; CHECK-LABEL: reverse_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: rev p0.s, p0.s +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv4i1( %a) + ret %res +} + +define @reverse_nxv8i1( %a) #0 { +; CHECK-LABEL: reverse_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: rev p0.h, p0.h +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv8i1( %a) + ret %res +} + +define @reverse_nxv16i1( %a) #0 { +; CHECK-LABEL: reverse_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: rev p0.b, p0.b +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv16i1( %a) + ret %res +} + +; Verify splitvec type legalisation works as expected. +define @reverse_nxv32i1( %a) #0 { +; CHECK-LABEL: reverse_nxv32i1: +; CHECK-SELDAG: // %bb.0: +; CHECK-SELDAG-NEXT: rev p2.b, p1.b +; CHECK-SELDAG-NEXT: rev p1.b, p0.b +; CHECK-SELDAG-NEXT: mov p0.b, p2.b +; CHECK-SELDAG-NEXT: ret +; CHECK-FASTISEL: // %bb.0: +; CHECK-FASTISEL-NEXT: str x29, [sp, #-16] +; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1 +; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl] +; CHECK-FASTISEL-NEXT: mov p1.b, p0.b +; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl] +; CHECK-FASTISEL-NEXT: rev p0.b, p0.b +; CHECK-FASTISEL-NEXT: rev p1.b, p1.b +; CHECK-FASTISEL-NEXT: addvl sp, sp, #1 +; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 +; CHECK-FASTISEL-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv32i1( %a) + ret %res +} + +; +; VECTOR_REVERSE - ZPR +; + +define @reverse_nxv16i8( %a) #0 { +; CHECK-LABEL: reverse_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: rev z0.b, z0.b +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv16i8( %a) + ret %res +} + +define @reverse_nxv8i16( %a) #0 { +; CHECK-LABEL: reverse_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: rev z0.h, z0.h +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv8i16( %a) + ret %res +} + +define @reverse_nxv4i32( %a) #0 { +; CHECK-LABEL: reverse_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: rev z0.s, z0.s +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv4i32( %a) + ret %res +} + +define @reverse_nxv2i64( %a) #0 { +; CHECK-LABEL: reverse_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: rev z0.d, z0.d +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv2i64( %a) + ret %res +} + +define @reverse_nxv8f16( %a) #0 { +; CHECK-LABEL: reverse_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: rev z0.h, z0.h +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv8f16( %a) + ret %res +} + +define @reverse_nxv4f32( %a) #0 { +; CHECK-LABEL: reverse_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: rev z0.s, z0.s +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv4f32( %a) ret %res +} + +define @reverse_nxv2f64( %a) #0 { +; CHECK-LABEL: reverse_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: rev z0.d, z0.d +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv2f64( %a) + ret %res +} + +; Verify promote type legalisation works as expected. +define @reverse_nxv2i8( %a) #0 { +; CHECK-LABEL: reverse_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: rev z0.d, z0.d +; CHECK-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv2i8( %a) + ret %res +} + +; Verify splitvec type legalisation works as expected. +define @reverse_nxv8i32( %a) #0 { +; CHECK-LABEL: reverse_nxv8i32: +; CHECK-SELDAG: // %bb.0: +; CHECK-SELDAG-NEXT: rev z2.s, z1.s +; CHECK-SELDAG-NEXT: rev z1.s, z0.s +; CHECK-SELDAG-NEXT: mov z0.d, z2.d +; CHECK-SELDAG-NEXT: ret +; CHECK-FASTISEL: // %bb.0: +; CHECK-FASTISEL-NEXT: str x29, [sp, #-16] +; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1 +; CHECK-FASTISEL-NEXT: str z1, [sp] +; CHECK-FASTISEL-NEXT: mov z1.d, z0.d +; CHECK-FASTISEL-NEXT: ldr z0, [sp] +; CHECK-FASTISEL-NEXT: rev z0.s, z0.s +; CHECK-FASTISEL-NEXT: rev z1.s, z1.s +; CHECK-FASTISEL-NEXT: addvl sp, sp, #1 +; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 +; CHECK-FASTISEL-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv8i32( %a) + ret %res +} + +; Verify splitvec type legalisation works as expected. +define @reverse_nxv16f32( %a) #0 { +; CHECK-LABEL: reverse_nxv16f32: +; CHECK-SELDAG: // %bb.0: +; CHECK-SELDAG-NEXT: rev z5.s, z3.s +; CHECK-SELDAG-NEXT: rev z4.s, z2.s +; CHECK-SELDAG-NEXT: rev z2.s, z1.s +; CHECK-SELDAG-NEXT: rev z3.s, z0.s +; CHECK-SELDAG-NEXT: mov z0.d, z5.d +; CHECK-SELDAG-NEXT: mov z1.d, z4.d +; CHECK-SELDAG-NEXT: ret +; CHECK-FASTISEL: // %bb.0: +; CHECK-FASTISEL-NEXT: str x29, [sp, #-16] +; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2 +; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl] +; CHECK-FASTISEL-NEXT: str z2, [sp] +; CHECK-FASTISEL-NEXT: mov z2.d, z1.d +; CHECK-FASTISEL-NEXT: ldr z1, [sp] +; CHECK-FASTISEL-NEXT: mov z3.d, z0.d +; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl] +; CHECK-FASTISEL-NEXT: rev z0.s, z0.s +; CHECK-FASTISEL-NEXT: rev z1.s, z1.s +; CHECK-FASTISEL-NEXT: rev z2.s, z2.s +; CHECK-FASTISEL-NEXT: rev z3.s, z3.s +; CHECK-FASTISEL-NEXT: addvl sp, sp, #2 +; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16 +; CHECK-FASTISEL-NEXT: ret + + %res = call @llvm.experimental.vector.reverse.nxv16f32( %a) + ret %res +} + + +declare @llvm.experimental.vector.reverse.nxv2i1() +declare @llvm.experimental.vector.reverse.nxv4i1() +declare @llvm.experimental.vector.reverse.nxv8i1() +declare @llvm.experimental.vector.reverse.nxv16i1() +declare @llvm.experimental.vector.reverse.nxv32i1() +declare @llvm.experimental.vector.reverse.nxv2i8() +declare @llvm.experimental.vector.reverse.nxv16i8() +declare @llvm.experimental.vector.reverse.nxv8i16() +declare @llvm.experimental.vector.reverse.nxv4i32() +declare @llvm.experimental.vector.reverse.nxv8i32() +declare @llvm.experimental.vector.reverse.nxv2i64() +declare @llvm.experimental.vector.reverse.nxv8f16() +declare @llvm.experimental.vector.reverse.nxv4f32() +declare @llvm.experimental.vector.reverse.nxv16f32() +declare @llvm.experimental.vector.reverse.nxv2f64() + + +attributes #0 = { nounwind "target-features"="+sve" } diff --git a/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/named-vector-shuffle-reverse.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + + +target triple = "x86_64-unknown-unknown" + +; +; VECTOR_REVERSE +; + +define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 { +; CHECK-LABEL: reverse_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NEXT: punpcklbw %xmm1, %xmm +; CHECK-NEXT: pshufd $78, %xmm2, %xmm2 +; CHECK-NEXT: pshuflw $27, %xmm2, %xmm2 +; CHECK-NEXT: pshufhw $27, %xmm2, %xmm2 +; CHECK-NEXT: punpckhbw %xmm1, %xmm0 +; CHECK-NEXT: pshufd $78, %xmm0, %xmm0 +; CHECK-NEXT: pshuflw $27, %xmm0, %xmm0 +; CHECK-NEXT: pshufhw $27, %xmm0, %xmm0 +; CHECK-NEXT: packuswb %xmm2, %xmm0 +; CHECK-NEXT: retq + + %res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a) + ret <16 x i8> %res +} + +define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 { +; CHECK-LABEL: reverse_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pshufd $78, %xmm0, %xmm +; CHECK-NEXT: pshuflw $27, %xmm0, %xmm0 +; CHECK-NEXT: pshufhw $27, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a) + ret <8 x i16> %res +} + +define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 { +; CHECK-LABEL: reverse_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: pshufd $27, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a) + ret <4 x i32> %res +} + +define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 { +; CHECK-LABEL: reverse_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pshufd $78, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a) + ret <2 x i64> %res +} + +define <4 x float> @reverse_v4f32(<4 x float> %a) #0 { +; CHECK-LABEL: reverse_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: shufps $27, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a) + ret <4 x float> %res +} + +define <2 x double> @reverse_v2f64(<2 x double> %a) #0 { +; CHECK-LABEL: reverse_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: shufps $78, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a) + ret <2 x double> %res +} + +; Verify promote type legalisation works as expected. +define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 { +; CHECK-LABEL: reverse_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa %xmm0, %xmm1 +; CHECK-NEXT: psrlw $8, %xmm1 +; CHECK-NEXT: psllw $8, %xmm0 +; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a) + ret <2 x i8> %res +} + +; Verify splitvec type legalisation works as expected. +define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 { +; CHECK-LABEL: reverse_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: pshufd $27, %xmm1, %xmm2 +; CHECK-NEXT: pshufd $27, %xmm0, %xmm1 +; CHECK-NEXT: movdqa %xmm2, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a) + ret <8 x i32> %res +} + +; Verify splitvec type legalisation works as expected. +define <16 x float> @reverse_v16f32(<16 x float> %a) #0 { +; CHECK-LABEL: reverse_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: movaps %xmm1, %xmm4 +; CHECK-NEXT: movaps %xmm0, %xmm5 +; CHECK-NEXT: shufps $27, %xmm3, %xmm +; CHECK-NEXT: shufps $27, %xmm2, %xmm2 +; CHECK-NEXT: shufps $27, %xmm1, %xmm4 +; CHECK-NEXT: shufps $27, %xmm0, %xmm5 +; CHECK-NEXT: movaps %xmm3, %xmm0 +; CHECK-NEXT: movaps %xmm2, %xmm1 +; CHECK-NEXT: movaps %xmm4, %xmm2 +; CHECK-NEXT: movaps %xmm5, %xmm3 + + %res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a) + ret <16 x float> %res +} + + +declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>) +declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>) +declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>) +declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>) +declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>) +declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>) +declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>) +declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>) +declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>) +declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>) + +attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll b/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll @@ -0,0 +1,17 @@ +; RUN: opt -instsimplify -S < %s 2>%t | FileCheck %s + +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Test back to back reverse shuffles are eliminated. +define @shuffle_b2b_reverse( %a) { +; CHECK-LABEL: @shuffle_b2b_reverse( +; CHECK: ret %a + %rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %a) + %rev.rev = tail call @llvm.experimental.vector.reverse.nxv4i32( %rev) + ret %rev.rev +} + +declare @llvm.experimental.vector.reverse.nxv4i32()