Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11023,6 +11023,26 @@ return true; } +/// is256bRevDupMask - Special case for <2 x double> shuffles of the form: +/// "vector_shuffle v, undef, <1, 1, 0, 0>" +static bool is256bRevDupMask(ArrayRef M, EVT VT) { + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; + if (VT.getSizeInBits() != 256 || VT.getScalarSizeInBits() != 64) + return false; + for (unsigned i = 0; i != NumElts; i ++) { + if (i < NumElts / 2) { + if (M[i] != 1) + return false; + } else { + if (M[i] != 0) + return false; + } + } + return true; +} + static bool isINSMask(ArrayRef M, int NumInputElements, bool &DstIsLeft, int &Anomaly) { if (M.size() != static_cast(NumInputElements)) @@ -24723,6 +24743,13 @@ return convertFromScalableVector( DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1)); } + + if (is256bRevDupMask(ShuffleMask, VT)) { + Op = DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op1); + Op = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, Op, Op, + DAG.getConstant(2, DL, MVT::i64)); + return convertFromScalableVector(DAG, VT, Op); + } } return SDValue(); Index: llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll @@ -50,7 +50,7 @@ define void @shuffle_ext_byone_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] @@ -93,7 +93,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v128i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl128 -; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: mov w8, #127 // =0x7f ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: whilels p1.b, xzr, x8 @@ -127,7 +127,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v256i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl256 -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov w8, #255 // =0xff ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: whilels p1.b, xzr, x8 @@ -215,7 +215,7 @@ define void @shuffle_ext_byone_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] @@ -254,7 +254,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v64i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: mov w8, #63 +; CHECK-NEXT: mov w8, #63 // =0x3f ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: whilels p1.h, xzr, x8 @@ -280,7 +280,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v128i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: mov w8, #127 // =0x7f ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: whilels p1.h, xzr, x8 @@ -351,7 +351,7 @@ define void @shuffle_ext_byone_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] @@ -388,7 +388,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v32i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: mov w8, #31 +; CHECK-NEXT: mov w8, #31 // =0x1f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: whilels p1.s, xzr, x8 @@ -410,7 +410,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v64i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: mov w8, #63 +; CHECK-NEXT: mov w8, #63 // =0x3f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: whilels p1.s, xzr, x8 @@ -463,7 +463,7 @@ define void @shuffle_ext_byone_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] @@ -499,7 +499,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v16i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: mov w8, #15 +; CHECK-NEXT: mov w8, #15 // =0xf ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: whilels p1.d, xzr, x8 @@ -519,7 +519,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v32i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl32 -; CHECK-NEXT: mov w8, #31 +; CHECK-NEXT: mov w8, #31 // =0x1f ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: whilels p1.d, xzr, x8 @@ -578,7 +578,7 @@ define void @shuffle_ext_byone_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] @@ -614,7 +614,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v64f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: mov w8, #63 +; CHECK-NEXT: mov w8, #63 // =0x3f ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: whilels p1.h, xzr, x8 @@ -640,7 +640,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v128f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: mov w8, #127 // =0x7f ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: whilels p1.h, xzr, x8 @@ -710,7 +710,7 @@ define void @shuffle_ext_byone_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] @@ -744,7 +744,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: mov w8, #31 +; CHECK-NEXT: mov w8, #31 // =0x1f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: whilels p1.s, xzr, x8 @@ -766,7 +766,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v64f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: mov w8, #63 +; CHECK-NEXT: mov w8, #63 // =0x3f ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: whilels p1.s, xzr, x8 @@ -818,7 +818,7 @@ define void @shuffle_ext_byone_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] @@ -851,7 +851,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: mov w8, #15 +; CHECK-NEXT: mov w8, #15 // =0xf ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: whilels p1.d, xzr, x8 @@ -871,7 +871,7 @@ ; CHECK-LABEL: shuffle_ext_byone_v32f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl32 -; CHECK-NEXT: mov w8, #31 +; CHECK-NEXT: mov w8, #31 // =0x1f ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: whilels p1.d, xzr, x8 @@ -938,4 +938,19 @@ ret void } +define void @shuffle_256b_rev_and_dup(ptr %a) vscale_range(2,2) #0 { +; CHECK-LABEL: shuffle_256b_rev_and_dup: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: zip1 z0.d, z0.d, z0.d +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %op = load <2 x double>, ptr %a + %ret = shufflevector <2 x double> %op, <2 x double> poison, <4 x i32> + store <4 x double> %ret, ptr %a + ret void +} + attributes #0 = { "target-features"="+sve" }