diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1014,6 +1014,12 @@ llvm_i32_ty], [IntrNoMem]>; + class AdvSIMD_SVE_PTEST_Intrinsic + : Intrinsic<[llvm_i1_ty], + [llvm_anyvector_ty, + LLVMMatchType<0>], + [IntrNoMem]>; + class AdvSIMD_SVE_TBL_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1553,6 +1559,14 @@ def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic; // +// Testing predicates +// + +def int_aarch64_sve_ptest_any : AdvSIMD_SVE_PTEST_Intrinsic; +def int_aarch64_sve_ptest_first : AdvSIMD_SVE_PTEST_Intrinsic; +def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic; + +// // Gather loads: // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -212,6 +212,7 @@ TBL, INSR, + PTEST, PTRUE, // Unsigned gather loads. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1357,6 +1357,7 @@ case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI"; case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO"; case AArch64ISD::INSR: return "AArch64ISD::INSR"; + case AArch64ISD::PTEST: return "AArch64ISD::PTEST"; case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE"; case AArch64ISD::GLD1: return "AArch64ISD::GLD1"; case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED"; @@ -10889,6 +10890,30 @@ return SDValue(); } +static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, + AArch64CC::CondCode Cond) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + SDLoc DL(Op); + EVT OpVT = Op.getValueType(); + assert(OpVT.isScalableVector() && TLI.isTypeLegal(OpVT) && + "Expected legal scalable vector type!"); + + // Ensure target specific opcodes are using legal type. + EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue TVal = DAG.getConstant(1, DL, OutVT); + SDValue FVal = DAG.getConstant(0, DL, OutVT); + + // Set condition code (CC) flags. + SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op); + + // Convert CC to integer based on requested condition. + // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare. + SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32); + SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test); + return DAG.getZExtOrTrunc(Res, DL, VT); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -10989,6 +11014,15 @@ case Intrinsic::aarch64_sve_cmpls_wide: return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs, true, DCI, DAG); + case Intrinsic::aarch64_sve_ptest_any: + return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), + AArch64CC::ANY_ACTIVE); + case Intrinsic::aarch64_sve_ptest_first: + return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), + AArch64CC::FIRST_ACTIVE); + case Intrinsic::aarch64_sve_ptest_last: + return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), + AArch64CC::LAST_ACTIVE); } return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -73,6 +73,9 @@ def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>; +def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>; + let Predicates = [HasSVE] in { def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">; @@ -1086,6 +1089,15 @@ def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)), + (PTEST_PP PPR:$pg, PPR:$src)>; + def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)), + (PTEST_PP PPR:$pg, PPR:$src)>; + def : Pat<(AArch64ptest (nxv4i1 PPR:$pg), (nxv4i1 PPR:$src)), + (PTEST_PP PPR:$pg, PPR:$src)>; + def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)), + (PTEST_PP PPR:$pg, PPR:$src)>; + def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -250,7 +250,13 @@ AL = 0xe, // Always (unconditional) Always (unconditional) NV = 0xf, // Always (unconditional) Always (unconditional) // Note the NV exists purely to disassemble 0b1111. Execution is "always". - Invalid + Invalid, + + // Common aliases used for SVE. + ANY_ACTIVE = NE, // (!Z) + FIRST_ACTIVE = MI, // ( N) + LAST_ACTIVE = LO, // (!C) + NONE_ACTIVE = EQ // ( Z) }; inline static const char *getCondCodeName(CondCode Code) { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; PTEST +; + +define i1 @ptest_any( %pg, %a) { +; CHECK-LABEL: ptest_any: +; CHECK: ptest p0, p1.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %out = call i1 @llvm.aarch64.sve.ptest.any( %pg, %a) + ret i1 %out +} + +define i1 @ptest_first( %pg, %a) { +; CHECK-LABEL: ptest_first: +; CHECK: ptest p0, p1.b +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %out = call i1 @llvm.aarch64.sve.ptest.first( %pg, %a) + ret i1 %out +} + +define i1 @ptest_last( %pg, %a) { +; CHECK-LABEL: ptest_last: +; CHECK: ptest p0, p1.b +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %out = call i1 @llvm.aarch64.sve.ptest.last( %pg, %a) + ret i1 %out +} + +declare i1 @llvm.aarch64.sve.ptest.any( %pg, %a) +declare i1 @llvm.aarch64.sve.ptest.first( %pg, %a) +declare i1 @llvm.aarch64.sve.ptest.last( %pg, %a) diff --git a/llvm/test/CodeGen/AArch64/sve-setcc.ll b/llvm/test/CodeGen/AArch64/sve-setcc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-setcc.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +; Ensure we use the inverted CC result of SVE compare instructions when branching. +define void @sve_cmplt_setcc_inverted(* %out, %in, %pg) { +; CHECK-LABEL: @sve_cmplt_setcc_inverted +; CHECK: cmplt p1.h, p0/z, z0.h, #0 +; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: b.ne +entry: + %0 = tail call @llvm.aarch64.sve.cmplt.wide.nxv8i16( %pg, %in, zeroinitializer) + %1 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv8i1( %pg, %0) + br i1 %1, label %if.end, label %if.then + +if.then: + tail call void @llvm.masked.store.nxv8i16.p0nxv8i16( %in, * %out, i32 2, %pg) + br label %if.end + +if.end: + ret void +} + +declare i1 @llvm.aarch64.sve.ptest.any.nxv8i1(, ) + +declare @llvm.aarch64.sve.cmplt.wide.nxv8i16(, , ) + +declare void @llvm.masked.store.nxv8i16.p0nxv8i16(, *, i32, )