diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10600,13 +10600,11 @@ /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { - assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && - "Only possible block sizes for REV are: 16, 32, 64"); + assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64 || + BlockSize == 128) && + "Only possible block sizes for REV are: 16, 32, 64, 128"); unsigned EltSz = VT.getScalarSizeInBits(); - if (EltSz == 64) - return false; - unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; // If the first shuffle index is UNDEF, be optimistic. @@ -23176,6 +23174,18 @@ } } + if (Subtarget->hasSVE2p1() && isREVMask(ShuffleMask, VT, 128) && + VT.getScalarSizeInBits() == 64) { + if (!VT.isFloatingPoint()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU); + + EVT NewVT = getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), 64)); + Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1); + Op = LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU); + Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op); + return convertFromScalableVector(DAG, VT, Op); + } + unsigned WhichResult; if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0) return convertFromScalableVector( diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll @@ -223,6 +223,37 @@ ret void } +; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse +; the double-words within quard-words. +define void @test_revdv4i64_sve2p1(ptr %a) #2 { +; CHECK-LABEL: test_revdv4i64_sve2p1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: revd z0.q, p0/m, z0.q +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %tmp1 = load <4 x i64>, ptr %a + %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> + store <4 x i64> %tmp2, ptr %a + ret void +} + +define void @test_revdv4f64_sve2p1(ptr %a) #2 { +; CHECK-LABEL: test_revdv4f64_sve2p1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: revd z0.q, p1/m, z0.q +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %tmp1 = load <4 x double>, ptr %a + %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> + store <4 x double> %tmp2, ptr %a + ret void +} + ; REV instruction will reverse the order of all elements in the vector. ; When the vector length and the target register size are inconsistent, ; the correctness of generated REV instruction for shuffle pattern cannot be guaranteed. @@ -472,3 +503,4 @@ attributes #0 = { "target-features"="+sve" } attributes #1 = { "target-features"="+sve" vscale_range(2,2) } +attributes #2 = { "target-features"="+sve2p1" } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll @@ -203,6 +203,38 @@ ret void } +; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse +; the double-words within quard-words. +define void @test_revdv4i64_sve2p1(ptr %a) #1 { +; CHECK-LABEL: test_revdv4i64_sve2p1: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: revd z0.q, p0/m, z0.q +; CHECK-NEXT: revd z1.q, p0/m, z1.q +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %tmp1 = load <4 x i64>, ptr %a + %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> + store <4 x i64> %tmp2, ptr %a + ret void +} + +define void @test_revdv4f64_sve2p1(ptr %a) #1 { +; CHECK-LABEL: test_revdv4f64_sve2p1: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: revd z0.q, p0/m, z0.q +; CHECK-NEXT: revd z1.q, p0/m, z1.q +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %tmp1 = load <4 x double>, ptr %a + %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> + store <4 x double> %tmp2, ptr %a + ret void +} + define void @test_revv8i32(ptr %a) #0 { ; CHECK-LABEL: test_revv8i32: ; CHECK: // %bb.0: @@ -238,3 +270,4 @@ } attributes #0 = { "target-features"="+sve" } +attributes #1 = { "target-features"="+sve2p1" }