diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -178,6 +178,35 @@ return true; } + bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) { + if (N.getOpcode() != AArch64ISD::VLSHR) + return false; + SDValue Op = N->getOperand(0); + EVT VT = Op.getValueType(); + unsigned ShtAmt = N->getConstantOperandVal(1); + if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD) + return false; + + APInt Imm; + if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift) + Imm = APInt(VT.getScalarSizeInBits(), + Op.getOperand(1).getConstantOperandVal(0) + << Op.getOperand(1).getConstantOperandVal(1)); + else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP && + isa(Op.getOperand(1).getOperand(0))) + Imm = APInt(VT.getScalarSizeInBits(), + Op.getOperand(1).getConstantOperandVal(0)); + else + return false; + + if (Imm != 1 << (ShtAmt - 1)) + return false; + + Res1 = Op.getOperand(0); + Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32); + return true; + } + bool SelectDupZeroOrUndef(SDValue N) { switch(N->getOpcode()) { case ISD::UNDEF: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18031,6 +18031,17 @@ case Intrinsic::aarch64_neon_sshl: case Intrinsic::aarch64_neon_ushl: return tryCombineShiftImm(IID, N, DAG); + case Intrinsic::aarch64_neon_rshrn: { + EVT VT = N->getOperand(1).getValueType(); + SDLoc DL(N); + SDValue Imm = + DAG.getConstant(1LLU << (N->getConstantOperandVal(2) - 1), DL, VT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(1), Imm); + SDValue Sht = + DAG.getNode(ISD::SRL, DL, VT, Add, + DAG.getConstant(N->getConstantOperandVal(2), DL, VT)); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Sht); + } case Intrinsic::aarch64_crc32b: case Intrinsic::aarch64_crc32cb: return tryCombineCRC32(0xff, N, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -759,6 +759,7 @@ def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), [(AArch64addp_n node:$Rn, node:$Rm), (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; +def AArch64roundingvlshr : ComplexPattern; def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -6810,7 +6811,7 @@ defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", int_aarch64_neon_vcvtfxs2fp>; defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", - int_aarch64_neon_rshrn>; + BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>; defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; @@ -6860,29 +6861,31 @@ TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; // RADDHN patterns for when RSHRN shifts by half the size of the vector element -def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), +def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; -def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))), +def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; -def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), +let AddedComplexity = 5 in +def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; // RADDHN2 patterns for when RSHRN shifts by half the size of the vector element def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Vd), - (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))), + (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), (RADDHNv8i16_v16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Vd), - (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))), + (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), (RADDHNv4i32_v8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; +let AddedComplexity = 5 in def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Vd), - (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))), + (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), (RADDHNv2i64_v4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; diff --git a/llvm/test/CodeGen/AArch64/neon-rshrn.ll b/llvm/test/CodeGen/AArch64/neon-rshrn.ll --- a/llvm/test/CodeGen/AArch64/neon-rshrn.ll +++ b/llvm/test/CodeGen/AArch64/neon-rshrn.ll @@ -4,11 +4,8 @@ define <16 x i8> @rshrn_v16i16_1(<16 x i16> %a) { ; CHECK-LABEL: rshrn_v16i16_1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.8h, #1 -; CHECK-NEXT: add v0.8h, v0.8h, v2.8h -; CHECK-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-NEXT: shrn v0.8b, v0.8h, #1 -; CHECK-NEXT: shrn2 v0.16b, v1.8h, #1 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #1 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #1 ; CHECK-NEXT: ret entry: %b = add <16 x i16> %a, @@ -20,11 +17,8 @@ define <16 x i8> @rshrn_v16i16_2(<16 x i16> %a) { ; CHECK-LABEL: rshrn_v16i16_2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.8h, #2 -; CHECK-NEXT: add v0.8h, v0.8h, v2.8h -; CHECK-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-NEXT: shrn v0.8b, v0.8h, #2 -; CHECK-NEXT: shrn2 v0.16b, v1.8h, #2 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #2 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #2 ; CHECK-NEXT: ret entry: %b = add <16 x i16> %a, @@ -36,11 +30,8 @@ define <16 x i8> @rshrn_v16i16_3(<16 x i16> %a) { ; CHECK-LABEL: rshrn_v16i16_3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.8h, #4 -; CHECK-NEXT: add v0.8h, v0.8h, v2.8h -; CHECK-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-NEXT: shrn v0.8b, v0.8h, #3 -; CHECK-NEXT: shrn2 v0.16b, v1.8h, #3 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #3 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #3 ; CHECK-NEXT: ret entry: %b = add <16 x i16> %a, @@ -52,11 +43,8 @@ define <16 x i8> @rshrn_v16i16_4(<16 x i16> %a) { ; CHECK-LABEL: rshrn_v16i16_4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.8h, #8 -; CHECK-NEXT: add v0.8h, v0.8h, v2.8h -; CHECK-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-NEXT: shrn v0.8b, v0.8h, #4 -; CHECK-NEXT: shrn2 v0.16b, v1.8h, #4 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #4 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #4 ; CHECK-NEXT: ret entry: %b = add <16 x i16> %a, @@ -68,11 +56,8 @@ define <16 x i8> @rshrn_v16i16_5(<16 x i16> %a) { ; CHECK-LABEL: rshrn_v16i16_5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.8h, #16 -; CHECK-NEXT: add v0.8h, v0.8h, v2.8h -; CHECK-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-NEXT: shrn v0.8b, v0.8h, #5 -; CHECK-NEXT: shrn2 v0.16b, v1.8h, #5 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #5 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #5 ; CHECK-NEXT: ret entry: %b = add <16 x i16> %a, @@ -84,11 +69,8 @@ define <16 x i8> @rshrn_v16i16_6(<16 x i16> %a) { ; CHECK-LABEL: rshrn_v16i16_6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.8h, #32 -; CHECK-NEXT: add v0.8h, v0.8h, v2.8h -; CHECK-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-NEXT: shrn v0.8b, v0.8h, #6 -; CHECK-NEXT: shrn2 v0.16b, v1.8h, #6 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #6 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #6 ; CHECK-NEXT: ret entry: %b = add <16 x i16> %a, @@ -100,11 +82,8 @@ define <16 x i8> @rshrn_v16i16_7(<16 x i16> %a) { ; CHECK-LABEL: rshrn_v16i16_7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.8h, #64 -; CHECK-NEXT: add v0.8h, v0.8h, v2.8h -; CHECK-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-NEXT: shrn v0.8b, v0.8h, #7 -; CHECK-NEXT: shrn2 v0.16b, v1.8h, #7 +; CHECK-NEXT: rshrn v0.8b, v0.8h, #7 +; CHECK-NEXT: rshrn2 v0.16b, v1.8h, #7 ; CHECK-NEXT: ret entry: %b = add <16 x i16> %a, @@ -116,9 +95,9 @@ define <16 x i8> @rshrn_v16i16_8(<16 x i16> %a) { ; CHECK-LABEL: rshrn_v16i16_8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.8h, #128 -; CHECK-NEXT: addhn v0.8b, v0.8h, v2.8h -; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.8b, v0.8h, v2.8h +; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h ; CHECK-NEXT: ret entry: %b = add <16 x i16> %a, @@ -147,11 +126,8 @@ define <8 x i16> @rshrn_v8i32_1(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #1 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #1 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #1 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #1 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -163,11 +139,8 @@ define <8 x i16> @rshrn_v8i32_2(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #2 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #2 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #2 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #2 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #2 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -179,11 +152,8 @@ define <8 x i16> @rshrn_v8i32_3(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #4 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #3 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #3 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #3 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #3 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -195,11 +165,8 @@ define <8 x i16> @rshrn_v8i32_4(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #8 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #4 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #4 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #4 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #4 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -211,11 +178,8 @@ define <8 x i16> @rshrn_v8i32_5(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #16 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #5 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #5 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #5 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #5 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -227,11 +191,8 @@ define <8 x i16> @rshrn_v8i32_6(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #32 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #6 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #6 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #6 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #6 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -243,11 +204,8 @@ define <8 x i16> @rshrn_v8i32_7(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #64 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #7 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #7 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #7 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #7 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -259,11 +217,8 @@ define <8 x i16> @rshrn_v8i32_8(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #128 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #8 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #8 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #8 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #8 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -275,11 +230,8 @@ define <8 x i16> @rshrn_v8i32_9(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_9: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #1, lsl #8 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #9 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #9 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #9 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #9 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -291,11 +243,8 @@ define <8 x i16> @rshrn_v8i32_10(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_10: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #2, lsl #8 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #10 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #10 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #10 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #10 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -307,11 +256,8 @@ define <8 x i16> @rshrn_v8i32_11(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_11: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #4, lsl #8 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #11 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #11 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #11 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #11 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -323,11 +269,8 @@ define <8 x i16> @rshrn_v8i32_12(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_12: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #8, lsl #8 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #12 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #12 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #12 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #12 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -339,11 +282,8 @@ define <8 x i16> @rshrn_v8i32_13(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_13: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #16, lsl #8 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #13 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #13 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #13 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #13 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -355,11 +295,8 @@ define <8 x i16> @rshrn_v8i32_14(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_14: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #32, lsl #8 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #14 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #14 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #14 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #14 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -371,11 +308,8 @@ define <8 x i16> @rshrn_v8i32_15(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_15: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #64, lsl #8 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: shrn v0.4h, v0.4s, #15 -; CHECK-NEXT: shrn2 v0.8h, v1.4s, #15 +; CHECK-NEXT: rshrn v0.4h, v0.4s, #15 +; CHECK-NEXT: rshrn2 v0.8h, v1.4s, #15 ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -387,9 +321,9 @@ define <8 x i16> @rshrn_v8i32_16(<8 x i32> %a) { ; CHECK-LABEL: rshrn_v8i32_16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.4s, #128, lsl #8 -; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s -; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.4h, v0.4s, v2.4s +; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s ; CHECK-NEXT: ret entry: %b = add <8 x i32> %a, @@ -418,12 +352,8 @@ define <4 x i32> @rshrn_v4i64_1(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #1 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #1 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #1 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #1 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -435,12 +365,8 @@ define <4 x i32> @rshrn_v4i64_2(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #2 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #2 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #2 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #2 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -452,12 +378,8 @@ define <4 x i32> @rshrn_v4i64_3(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #3 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #3 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #3 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #3 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -469,12 +391,8 @@ define <4 x i32> @rshrn_v4i64_4(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #4 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #4 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #4 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #4 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -486,12 +404,8 @@ define <4 x i32> @rshrn_v4i64_5(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #5 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #5 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #5 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #5 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -503,12 +417,8 @@ define <4 x i32> @rshrn_v4i64_6(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #6 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #6 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #6 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #6 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -520,12 +430,8 @@ define <4 x i32> @rshrn_v4i64_7(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #7 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #7 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #7 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #7 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -537,12 +443,8 @@ define <4 x i32> @rshrn_v4i64_8(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #128 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #8 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #8 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #8 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #8 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -554,12 +456,8 @@ define <4 x i32> @rshrn_v4i64_9(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_9: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #256 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #9 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #9 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #9 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #9 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -571,12 +469,8 @@ define <4 x i32> @rshrn_v4i64_10(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_10: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #512 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #10 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #10 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #10 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #10 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -588,12 +482,8 @@ define <4 x i32> @rshrn_v4i64_11(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_11: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #11 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #11 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #11 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #11 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -605,12 +495,8 @@ define <4 x i32> @rshrn_v4i64_12(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_12: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2048 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #12 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #12 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #12 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #12 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -622,12 +508,8 @@ define <4 x i32> @rshrn_v4i64_13(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_13: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #4096 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #13 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #13 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #13 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #13 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -639,12 +521,8 @@ define <4 x i32> @rshrn_v4i64_14(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_14: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #8192 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #14 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #14 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #14 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #14 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -656,12 +534,8 @@ define <4 x i32> @rshrn_v4i64_15(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_15: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #16384 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #15 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #15 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #15 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #15 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -673,12 +547,8 @@ define <4 x i32> @rshrn_v4i64_16(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #32768 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #16 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #16 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #16 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #16 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -690,12 +560,8 @@ define <4 x i32> @rshrn_v4i64_17(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_17: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #65536 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #17 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #17 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #17 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #17 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -707,12 +573,8 @@ define <4 x i32> @rshrn_v4i64_18(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_18: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #131072 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #18 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #18 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #18 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #18 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -724,12 +586,8 @@ define <4 x i32> @rshrn_v4i64_19(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_19: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #262144 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #19 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #19 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #19 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #19 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -741,12 +599,8 @@ define <4 x i32> @rshrn_v4i64_20(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_20: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #524288 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #20 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #20 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #20 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #20 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -758,12 +612,8 @@ define <4 x i32> @rshrn_v4i64_21(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_21: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1048576 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #21 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #21 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #21 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #21 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -775,12 +625,8 @@ define <4 x i32> @rshrn_v4i64_22(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_22: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2097152 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #22 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #22 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #22 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #22 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -792,12 +638,8 @@ define <4 x i32> @rshrn_v4i64_23(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_23: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #4194304 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #23 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #23 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #23 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #23 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -809,12 +651,8 @@ define <4 x i32> @rshrn_v4i64_24(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_24: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #8388608 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #24 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #24 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #24 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #24 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -826,12 +664,8 @@ define <4 x i32> @rshrn_v4i64_25(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_25: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #16777216 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #25 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #25 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #25 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #25 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -843,12 +677,8 @@ define <4 x i32> @rshrn_v4i64_26(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_26: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #33554432 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #26 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #26 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #26 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #26 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -860,12 +690,8 @@ define <4 x i32> @rshrn_v4i64_27(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_27: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #67108864 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #27 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #27 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #27 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #27 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -877,12 +703,8 @@ define <4 x i32> @rshrn_v4i64_28(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_28: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #134217728 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #28 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #28 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #28 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #28 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -894,12 +716,8 @@ define <4 x i32> @rshrn_v4i64_29(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_29: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #268435456 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #29 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #29 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #29 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #29 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -911,12 +729,8 @@ define <4 x i32> @rshrn_v4i64_30(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_30: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #536870912 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #30 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #30 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #30 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #30 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -928,12 +742,8 @@ define <4 x i32> @rshrn_v4i64_31(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_31: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1073741824 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: shrn v0.2s, v0.2d, #31 -; CHECK-NEXT: shrn2 v0.4s, v1.2d, #31 +; CHECK-NEXT: rshrn v0.2s, v0.2d, #31 +; CHECK-NEXT: rshrn2 v0.4s, v1.2d, #31 ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a, @@ -945,10 +755,9 @@ define <4 x i32> @rshrn_v4i64_32(<4 x i64> %a) { ; CHECK-LABEL: rshrn_v4i64_32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: addhn v0.2s, v0.2d, v2.2d -; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.2s, v0.2d, v2.2d +; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d ; CHECK-NEXT: ret entry: %b = add <4 x i64> %a,