Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9035,6 +9035,15 @@ if (isREVMask(ShuffleMask, VT, 16)) return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2); + if (V1.getValueType() == VT && + ((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) || + (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) && + ShuffleVectorInst::isReverseMask(ShuffleMask)) { + SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1); + return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev, + DAG.getConstant(8, dl, MVT::i32)); + } + bool ReverseEXT = false; unsigned Imm; if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) { Index: llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll =================================================================== --- llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll +++ llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll @@ -9,28 +9,10 @@ ; define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 { -; CHECK-LABEL: .LCPI0_0: -; CHECK: .byte 15 // 0xf -; CHECK-NEXT: .byte 14 // 0xe -; CHECK-NEXT: .byte 13 // 0xd -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 11 // 0xb -; CHECK-NEXT: .byte 10 // 0xa -; CHECK-NEXT: .byte 9 // 0x9 -; CHECK-NEXT: .byte 8 // 0x8 -; CHECK-NEXT: .byte 7 // 0x7 -; CHECK-NEXT: .byte 6 // 0x6 -; CHECK-NEXT: .byte 5 // 0x5 -; CHECK-NEXT: .byte 4 // 0x4 -; CHECK-NEXT: .byte 3 // 0x3 -; CHECK-NEXT: .byte 2 // 0x2 -; CHECK-NEXT: .byte 1 // 0x1 -; CHECK-NEXT: .byte 0 // 0x0 ; CHECK-LABEL: reverse_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: rev64 v0.16b, v0.16b +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ret %res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a) @@ -38,28 +20,10 @@ } define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 { -; CHECK-LABEL: .LCPI1_0: -; CHECK: .byte 14 // 0xe -; CHECK-NEXT: .byte 15 // 0xf -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 13 // 0xd -; CHECK-NEXT: .byte 10 // 0xa -; CHECK-NEXT: .byte 11 // 0xb -; CHECK-NEXT: .byte 8 // 0x8 -; CHECK-NEXT: .byte 9 // 0x9 -; CHECK-NEXT: .byte 6 // 0x6 -; CHECK-NEXT: .byte 7 // 0x7 -; CHECK-NEXT: .byte 4 // 0x4 -; CHECK-NEXT: .byte 5 // 0x5 -; CHECK-NEXT: .byte 2 // 0x2 -; CHECK-NEXT: .byte 3 // 0x3 -; CHECK-NEXT: .byte 0 // 0x0 -; CHECK-NEXT: .byte 1 // 0x1 ; CHECK-LABEL: reverse_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: rev64 v0.8h, v0.8h +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ret %res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a) @@ -88,28 +52,10 @@ } define <8 x half> @reverse_v8f16(<8 x half> %a) #0 { -; CHECK-LABEL: .LCPI4_0: -; CHECK: .byte 14 // 0xe -; CHECK-NEXT: .byte 15 // 0xf -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 13 // 0xd -; CHECK-NEXT: .byte 10 // 0xa -; CHECK-NEXT: .byte 11 // 0xb -; CHECK-NEXT: .byte 8 // 0x8 -; CHECK-NEXT: .byte 9 // 0x9 -; CHECK-NEXT: .byte 6 // 0x6 -; CHECK-NEXT: .byte 7 // 0x7 -; CHECK-NEXT: .byte 4 // 0x4 -; CHECK-NEXT: .byte 5 // 0x5 -; CHECK-NEXT: .byte 2 // 0x2 -; CHECK-NEXT: .byte 3 // 0x3 -; CHECK-NEXT: .byte 0 // 0x0 -; CHECK-NEXT: .byte 1 // 0x1 ; CHECK-LABEL: reverse_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: rev64 v0.8h, v0.8h +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ret %res = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> %a) Index: llvm/test/CodeGen/AArch64/neon-reverseshuffle.patch =================================================================== --- llvm/test/CodeGen/AArch64/neon-reverseshuffle.patch +++ llvm/test/CodeGen/AArch64/neon-reverseshuffle.patch @@ -35,9 +35,8 @@ define <8 x i16> @v8i16(<8 x i16> %a) { ; CHECK-LABEL: v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: rev64 v0.8h, v0.8h +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ret entry: %V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> @@ -69,9 +68,8 @@ define <16 x i8> @v16i8(<16 x i8> %a) { ; CHECK-LABEL: v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: rev64 v0.16b, v0.16b +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ret entry: %V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> @@ -136,9 +134,8 @@ define <8 x half> @v8f16(<8 x half> %a) { ; CHECK-LABEL: v8f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: rev64 v0.8h, v0.8h +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ret entry: %V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32>