diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20683,6 +20683,56 @@ Store->getMemOperand()); } +static SDValue tryCombineStoredNarrowShift(StoreSDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!Subtarget->hasSVE2()) + return SDValue(); + + EVT MemVT = N->getMemoryVT(); + EVT MemVTEltTy = MemVT.getVectorElementType(); + EVT VT = N->getValue().getValueType(); + EVT VTEltTy = VT.getVectorElementType(); + + unsigned Opc; + if (MemVTEltTy == MVT::i8 && VTEltTy == MVT::i16) + Opc = AArch64::RSHRNB_ZZI_B; + else if (MemVTEltTy == MVT::i16 && VTEltTy == MVT::i32) + Opc = AArch64::RSHRNB_ZZI_H; + else + return SDValue(); + + SDValue Srl = N->getOperand(1); + if (Srl->getOpcode() == ISD::SRL) { + auto SrlOp1 = + dyn_cast(DAG.getSplatValue(Srl->getOperand(1))); + if (!SrlOp1) + return SDValue(); + unsigned ShiftValue = SrlOp1->getZExtValue(); + + SDValue Add = Srl->getOperand(0); + if (Add->getOpcode() != ISD::ADD) + return SDValue(); + auto AddOp1 = + dyn_cast(DAG.getSplatValue(Add->getOperand(1))); + if (!AddOp1) + return SDValue(); + int64_t AddValue = AddOp1->getZExtValue(); + + if (AddValue != 1 << (ShiftValue - 1)) + return SDValue(); + + SDLoc DL(N); + SDValue RSHRNB = SDValue( + DAG.getMachineNode(Opc, DL, VT, + {Add->getOperand(0), + DAG.getTargetConstant(ShiftValue, DL, MVT::i32)}), + 0); + return DAG.getTruncStore(N->getChain(), N, RSHRNB, N->getBasePtr(), + N->getMemoryVT(), N->getMemOperand()); + } + return SDValue(); +} + static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, @@ -20724,6 +20774,10 @@ if (SDValue Store = combineBoolVectorAndTruncateStore(DAG, ST)) return Store; + if (ST->isTruncatingStore()) + if (SDValue Store = tryCombineStoredNarrowShift(ST, DAG, Subtarget)) + return Store; + return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll +++ /dev/null @@ -1,635 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s - -; -; SHRNB -; - -define @shrnb_h( %a) { -; CHECK-LABEL: shrnb_h: -; CHECK: // %bb.0: -; CHECK-NEXT: shrnb z0.b, z0.h, #8 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.shrnb.nxv8i16( %a, - i32 8) - ret %out -} - -define @shrnb_s( %a) { -; CHECK-LABEL: shrnb_s: -; CHECK: // %bb.0: -; CHECK-NEXT: shrnb z0.h, z0.s, #16 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.shrnb.nxv4i32( %a, - i32 16) - ret %out -} - -define @shrnb_d( %a) { -; CHECK-LABEL: shrnb_d: -; CHECK: // %bb.0: -; CHECK-NEXT: shrnb z0.s, z0.d, #32 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.shrnb.nxv2i64( %a, - i32 32) - ret %out -} - -; -; RSHRNB -; - -define @rshrnb_h( %a) { -; CHECK-LABEL: rshrnb_h: -; CHECK: // %bb.0: -; CHECK-NEXT: rshrnb z0.b, z0.h, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.rshrnb.nxv8i16( %a, - i32 2) - ret %out -} - -define @rshrnb_s( %a) { -; CHECK-LABEL: rshrnb_s: -; CHECK: // %bb.0: -; CHECK-NEXT: rshrnb z0.h, z0.s, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.rshrnb.nxv4i32( %a, - i32 2) - ret %out -} - -define @rshrnb_d( %a) { -; CHECK-LABEL: rshrnb_d: -; CHECK: // %bb.0: -; CHECK-NEXT: rshrnb z0.s, z0.d, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.rshrnb.nxv2i64( %a, - i32 2) - ret %out -} - -; -; UQSHRNB -; - -define @uqshrnb_h( %a) { -; CHECK-LABEL: uqshrnb_h: -; CHECK: // %bb.0: -; CHECK-NEXT: uqshrnb z0.b, z0.h, #1 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqshrnb.nxv8i16( %a, - i32 1) - ret %out -} - -define @uqshrnb_s( %a) { -; CHECK-LABEL: uqshrnb_s: -; CHECK: // %bb.0: -; CHECK-NEXT: uqshrnb z0.h, z0.s, #1 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqshrnb.nxv4i32( %a, - i32 1) - ret %out -} - -define @uqshrnb_d( %a) { -; CHECK-LABEL: uqshrnb_d: -; CHECK: // %bb.0: -; CHECK-NEXT: uqshrnb z0.s, z0.d, #1 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqshrnb.nxv2i64( %a, - i32 1) - ret %out -} - -; -; SQSHRNB -; - -define @sqshrnb_h( %a) { -; CHECK-LABEL: sqshrnb_h: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrnb z0.b, z0.h, #1 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrnb.nxv8i16( %a, - i32 1) - ret %out -} - -define @sqshrnb_s( %a) { -; CHECK-LABEL: sqshrnb_s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrnb z0.h, z0.s, #1 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrnb.nxv4i32( %a, - i32 1) - ret %out -} - -define @sqshrnb_d( %a) { -; CHECK-LABEL: sqshrnb_d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrnb z0.s, z0.d, #1 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrnb.nxv2i64( %a, - i32 1) - ret %out -} - -; -; SQSHRUNB -; - -define @sqshrunb_h( %a) { -; CHECK-LABEL: sqshrunb_h: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrunb z0.b, z0.h, #7 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrunb.nxv8i16( %a, - i32 7) - ret %out -} - -define @sqshrunb_s( %a) { -; CHECK-LABEL: sqshrunb_s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrunb z0.h, z0.s, #15 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrunb.nxv4i32( %a, - i32 15) - ret %out -} - -define @sqshrunb_d( %a) { -; CHECK-LABEL: sqshrunb_d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrunb z0.s, z0.d, #31 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrunb.nxv2i64( %a, - i32 31) - ret %out -} - -; -; UQRSHRNB -; - -define @uqrshrnb_h( %a) { -; CHECK-LABEL: uqrshrnb_h: -; CHECK: // %bb.0: -; CHECK-NEXT: uqrshrnb z0.b, z0.h, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqrshrnb.nxv8i16( %a, - i32 2) - ret %out -} - -define @uqrshrnb_s( %a) { -; CHECK-LABEL: uqrshrnb_s: -; CHECK: // %bb.0: -; CHECK-NEXT: uqrshrnb z0.h, z0.s, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqrshrnb.nxv4i32( %a, - i32 2) - ret %out -} - -define @uqrshrnb_d( %a) { -; CHECK-LABEL: uqrshrnb_d: -; CHECK: // %bb.0: -; CHECK-NEXT: uqrshrnb z0.s, z0.d, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqrshrnb.nxv2i64( %a, - i32 2) - ret %out -} - -; -; SQRSHRNB -; - -define @sqrshrnb_h( %a) { -; CHECK-LABEL: sqrshrnb_h: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrnb z0.b, z0.h, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrnb.nxv8i16( %a, - i32 2) - ret %out -} - -define @sqrshrnb_s( %a) { -; CHECK-LABEL: sqrshrnb_s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrnb z0.h, z0.s, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrnb.nxv4i32( %a, - i32 2) - ret %out -} - -define @sqrshrnb_d( %a) { -; CHECK-LABEL: sqrshrnb_d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrnb z0.s, z0.d, #2 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrnb.nxv2i64( %a, - i32 2) - ret %out -} - -; -; SQRSHRUNB -; - -define @sqrshrunb_h( %a) { -; CHECK-LABEL: sqrshrunb_h: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrunb z0.b, z0.h, #6 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrunb.nxv8i16( %a, - i32 6) - ret %out -} - -define @sqrshrunb_s( %a) { -; CHECK-LABEL: sqrshrunb_s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrunb z0.h, z0.s, #14 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrunb.nxv4i32( %a, - i32 14) - ret %out -} - -define @sqrshrunb_d( %a) { -; CHECK-LABEL: sqrshrunb_d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrunb z0.s, z0.d, #30 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrunb.nxv2i64( %a, - i32 30) - ret %out -} - -; -; SHRNT -; - -define @shrnt_h( %a, %b) { -; CHECK-LABEL: shrnt_h: -; CHECK: // %bb.0: -; CHECK-NEXT: shrnt z0.b, z1.h, #3 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.shrnt.nxv8i16( %a, - %b, - i32 3) - ret %out -} - -define @shrnt_s( %a, %b) { -; CHECK-LABEL: shrnt_s: -; CHECK: // %bb.0: -; CHECK-NEXT: shrnt z0.h, z1.s, #3 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.shrnt.nxv4i32( %a, - %b, - i32 3) - ret %out -} - -define @shrnt_d( %a, %b) { -; CHECK-LABEL: shrnt_d: -; CHECK: // %bb.0: -; CHECK-NEXT: shrnt z0.s, z1.d, #3 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.shrnt.nxv2i64( %a, - %b, - i32 3) - ret %out -} - -; -; RSHRNT -; - -define @rshrnt_h( %a, %b) { -; CHECK-LABEL: rshrnt_h: -; CHECK: // %bb.0: -; CHECK-NEXT: rshrnt z0.b, z1.h, #1 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.rshrnt.nxv8i16( %a, - %b, - i32 1) - ret %out -} - -define @rshrnt_s( %a, %b) { -; CHECK-LABEL: rshrnt_s: -; CHECK: // %bb.0: -; CHECK-NEXT: rshrnt z0.h, z1.s, #5 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.rshrnt.nxv4i32( %a, - %b, - i32 5) - ret %out -} - -define @rshrnt_d( %a, %b) { -; CHECK-LABEL: rshrnt_d: -; CHECK: // %bb.0: -; CHECK-NEXT: rshrnt z0.s, z1.d, #5 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.rshrnt.nxv2i64( %a, - %b, - i32 5) - ret %out -} - -; -; UQSHRNT -; - -define @uqshrnt_h( %a, %b) { -; CHECK-LABEL: uqshrnt_h: -; CHECK: // %bb.0: -; CHECK-NEXT: uqshrnt z0.b, z1.h, #5 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqshrnt.nxv8i16( %a, - %b, - i32 5) - ret %out -} - -define @uqshrnt_s( %a, %b) { -; CHECK-LABEL: uqshrnt_s: -; CHECK: // %bb.0: -; CHECK-NEXT: uqshrnt z0.h, z1.s, #13 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqshrnt.nxv4i32( %a, - %b, - i32 13) - ret %out -} - -define @uqshrnt_d( %a, %b) { -; CHECK-LABEL: uqshrnt_d: -; CHECK: // %bb.0: -; CHECK-NEXT: uqshrnt z0.s, z1.d, #29 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqshrnt.nxv2i64( %a, - %b, - i32 29) - ret %out -} - -; -; SQSHRNT -; - -define @sqshrnt_h( %a, %b) { -; CHECK-LABEL: sqshrnt_h: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrnt z0.b, z1.h, #5 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrnt.nxv8i16( %a, - %b, - i32 5) - ret %out -} - -define @sqshrnt_s( %a, %b) { -; CHECK-LABEL: sqshrnt_s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrnt z0.h, z1.s, #13 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrnt.nxv4i32( %a, - %b, - i32 13) - ret %out -} - -define @sqshrnt_d( %a, %b) { -; CHECK-LABEL: sqshrnt_d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrnt z0.s, z1.d, #29 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrnt.nxv2i64( %a, - %b, - i32 29) - ret %out -} - -; -; SQSHRUNT -; - -define @sqshrunt_h( %a, %b) { -; CHECK-LABEL: sqshrunt_h: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrunt z0.b, z1.h, #4 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrunt.nxv8i16( %a, - %b, - i32 4) - ret %out -} - -define @sqshrunt_s( %a, %b) { -; CHECK-LABEL: sqshrunt_s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrunt z0.h, z1.s, #4 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrunt.nxv4i32( %a, - %b, - i32 4) - ret %out -} - -define @sqshrunt_d( %a, %b) { -; CHECK-LABEL: sqshrunt_d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqshrunt z0.s, z1.d, #4 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqshrunt.nxv2i64( %a, - %b, - i32 4) - ret %out -} - -; -; UQRSHRNT -; - -define @uqrshrnt_h( %a, %b) { -; CHECK-LABEL: uqrshrnt_h: -; CHECK: // %bb.0: -; CHECK-NEXT: uqrshrnt z0.b, z1.h, #8 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqrshrnt.nxv8i16( %a, - %b, - i32 8) - ret %out -} - -define @uqrshrnt_s( %a, %b) { -; CHECK-LABEL: uqrshrnt_s: -; CHECK: // %bb.0: -; CHECK-NEXT: uqrshrnt z0.h, z1.s, #12 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqrshrnt.nxv4i32( %a, - %b, - i32 12) - ret %out -} - -define @uqrshrnt_d( %a, %b) { -; CHECK-LABEL: uqrshrnt_d: -; CHECK: // %bb.0: -; CHECK-NEXT: uqrshrnt z0.s, z1.d, #28 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.uqrshrnt.nxv2i64( %a, - %b, - i32 28) - ret %out -} - -; -; SQRSHRNT -; - -define @sqrshrnt_h( %a, %b) { -; CHECK-LABEL: sqrshrnt_h: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrnt z0.b, z1.h, #8 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrnt.nxv8i16( %a, - %b, - i32 8) - ret %out -} - -define @sqrshrnt_s( %a, %b) { -; CHECK-LABEL: sqrshrnt_s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrnt z0.h, z1.s, #12 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrnt.nxv4i32( %a, - %b, - i32 12) - ret %out -} - -define @sqrshrnt_d( %a, %b) { -; CHECK-LABEL: sqrshrnt_d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrnt z0.s, z1.d, #28 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrnt.nxv2i64( %a, - %b, - i32 28) - ret %out -} - -; -; SQRSHRUNT -; - -define @sqrshrunt_h( %a, %b) { -; CHECK-LABEL: sqrshrunt_h: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrunt z0.b, z1.h, #1 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrunt.nxv8i16( %a, - %b, - i32 1) - ret %out -} - -define @sqrshrunt_s( %a, %b) { -; CHECK-LABEL: sqrshrunt_s: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrunt z0.h, z1.s, #5 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrunt.nxv4i32( %a, - %b, - i32 5) - ret %out -} - -define @sqrshrunt_d( %a, %b) { -; CHECK-LABEL: sqrshrunt_d: -; CHECK: // %bb.0: -; CHECK-NEXT: sqrshrunt z0.s, z1.d, #5 -; CHECK-NEXT: ret - %out = call @llvm.aarch64.sve.sqrshrunt.nxv2i64( %a, - %b, - i32 5) - ret %out -} - -declare @llvm.aarch64.sve.shrnb.nxv8i16(, i32) -declare @llvm.aarch64.sve.shrnb.nxv4i32(, i32) -declare @llvm.aarch64.sve.shrnb.nxv2i64(, i32) - -declare @llvm.aarch64.sve.rshrnb.nxv8i16(, i32) -declare @llvm.aarch64.sve.rshrnb.nxv4i32(, i32) -declare @llvm.aarch64.sve.rshrnb.nxv2i64(, i32) - -declare @llvm.aarch64.sve.uqshrnb.nxv8i16(, i32) -declare @llvm.aarch64.sve.uqshrnb.nxv4i32(, i32) -declare @llvm.aarch64.sve.uqshrnb.nxv2i64(, i32) - -declare @llvm.aarch64.sve.sqshrnb.nxv8i16(, i32) -declare @llvm.aarch64.sve.sqshrnb.nxv4i32(, i32) -declare @llvm.aarch64.sve.sqshrnb.nxv2i64(, i32) - -declare @llvm.aarch64.sve.uqrshrnb.nxv8i16(, i32) -declare @llvm.aarch64.sve.uqrshrnb.nxv4i32(, i32) -declare @llvm.aarch64.sve.uqrshrnb.nxv2i64(, i32) - -declare @llvm.aarch64.sve.sqrshrnb.nxv8i16(, i32) -declare @llvm.aarch64.sve.sqrshrnb.nxv4i32(, i32) -declare @llvm.aarch64.sve.sqrshrnb.nxv2i64(, i32) - -declare @llvm.aarch64.sve.sqshrunb.nxv8i16(, i32) -declare @llvm.aarch64.sve.sqshrunb.nxv4i32(, i32) -declare @llvm.aarch64.sve.sqshrunb.nxv2i64(, i32) - -declare @llvm.aarch64.sve.sqrshrunb.nxv8i16(, i32) -declare @llvm.aarch64.sve.sqrshrunb.nxv4i32(, i32) -declare @llvm.aarch64.sve.sqrshrunb.nxv2i64(, i32) - -declare @llvm.aarch64.sve.shrnt.nxv8i16(, , i32) -declare @llvm.aarch64.sve.shrnt.nxv4i32(, , i32) -declare @llvm.aarch64.sve.shrnt.nxv2i64(, , i32) - -declare @llvm.aarch64.sve.rshrnt.nxv8i16(, , i32) -declare @llvm.aarch64.sve.rshrnt.nxv4i32(, , i32) -declare @llvm.aarch64.sve.rshrnt.nxv2i64(, , i32) - -declare @llvm.aarch64.sve.uqshrnt.nxv8i16(, , i32) -declare @llvm.aarch64.sve.uqshrnt.nxv4i32(, , i32) -declare @llvm.aarch64.sve.uqshrnt.nxv2i64(, , i32) - -declare @llvm.aarch64.sve.sqshrnt.nxv8i16(, , i32) -declare @llvm.aarch64.sve.sqshrnt.nxv4i32(, , i32) -declare @llvm.aarch64.sve.sqshrnt.nxv2i64(, , i32) - -declare @llvm.aarch64.sve.sqshrunt.nxv8i16(, , i32) -declare @llvm.aarch64.sve.sqshrunt.nxv4i32(, , i32) -declare @llvm.aarch64.sve.sqshrunt.nxv2i64(, , i32) - -declare @llvm.aarch64.sve.uqrshrnt.nxv8i16(, , i32) -declare @llvm.aarch64.sve.uqrshrnt.nxv4i32(, , i32) -declare @llvm.aarch64.sve.uqrshrnt.nxv2i64(, , i32) - -declare @llvm.aarch64.sve.sqrshrnt.nxv8i16(, , i32) -declare @llvm.aarch64.sve.sqrshrnt.nxv4i32(, , i32) -declare @llvm.aarch64.sve.sqrshrnt.nxv2i64(, , i32) - -declare @llvm.aarch64.sve.sqrshrunt.nxv8i16(, , i32) -declare @llvm.aarch64.sve.sqrshrunt.nxv4i32(, , i32) -declare @llvm.aarch64.sve.sqrshrunt.nxv2i64(, , i32) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +define void @add_lshr_rshrnb_b_6(ptr %ptr, ptr %dst, i64 %index12){ +; CHECK-LABEL: add_lshr_rshrnb_b_6: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: rshrnb z0.b, z0.h, #6 +; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] +; CHECK-NEXT: ret + %wide.load13 = load , ptr %ptr, align 2 + %1 = add %wide.load13, trunc ( shufflevector ( insertelement ( poison, i32 32, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 6, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i8, ptr %dst, i64 %index12 + store %3, ptr %4, align 1 + ret void +} + +define void @neg_add_lshr_rshrnb_b_6(ptr %ptr, ptr %dst, i64 %index12){ +; CHECK-LABEL: neg_add_lshr_rshrnb_b_6: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: add z0.h, z0.h, #1 // =0x1 +; CHECK-NEXT: lsr z0.h, z0.h, #6 +; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] +; CHECK-NEXT: ret + %wide.load13 = load , ptr %ptr, align 2 + %1 = add %wide.load13, trunc ( shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 6, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i8, ptr %dst, i64 %index12 + store %3, ptr %4, align 1 + ret void +} + +define void @add_lshr_rshrnb_h_7(ptr %ptr, ptr %dst, i64 %index12){ +; CHECK-LABEL: add_lshr_rshrnb_h_7: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: rshrnb z0.b, z0.h, #7 +; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2] +; CHECK-NEXT: ret + %wide.load13 = load , ptr %ptr, align 2 + %1 = add %wide.load13, trunc ( shufflevector ( insertelement ( poison, i32 64, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i8, ptr %dst, i64 %index12 + store %3, ptr %4, align 1 + ret void +} + +define void @add_lshr_rshrn_h_6(ptr %ptr, ptr %dst, i64 %index12){ +; CHECK-LABEL: add_lshr_rshrn_h_6: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: rshrnb z0.h, z0.s, #6 +; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1] +; CHECK-NEXT: ret + %wide.load13 = load , ptr %ptr, align 2 + %1 = add %wide.load13, trunc ( shufflevector ( insertelement ( poison, i64 32, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 6, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i16, ptr %dst, i64 %index12 + store %3, ptr %4, align 1 + ret void +} + +define void @add_lshr_rshrnb_h_2(ptr %ptr, ptr %dst, i64 %index12){ +; CHECK-LABEL: add_lshr_rshrnb_h_2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: rshrnb z0.h, z0.s, #2 +; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1] +; CHECK-NEXT: ret + %wide.load13 = load , ptr %ptr, align 2 + %1 = add %wide.load13, trunc ( shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i16, ptr %dst, i64 %index12 + store %3, ptr %4, align 1 + ret void +} + +define void @neg_add_lshr_rshrnb_h_0(ptr %ptr, ptr %dst, i64 %index12){ +; CHECK-LABEL: neg_add_lshr_rshrnb_h_0: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %wide.load13 = load , ptr %ptr, align 2 + %1 = add %wide.load13, trunc ( shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 -1, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i16, ptr %dst, i64 %index12 + store %3, ptr %4, align 1 + ret void +}